[PATCH] drm/amd/pm: fix gpu reset failure by MP1 state setting

2021-03-22 Thread Guchun Chen
Instead of blocking varied unsupported MP1 state in upper level,
defer and skip such MP1 state handling in specific ASIC.

Signed-off-by: Lijo Lazar 
Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/pm/amdgpu_dpm.c|  3 ---
 .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c| 10 +++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c 
b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
index 15e239582a97..0a6bb3311f0f 100644
--- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c
@@ -1027,9 +1027,6 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev,
int ret = 0;
const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs;
 
-   if (mp1_state == PP_MP1_STATE_NONE)
-   return 0;
-
if (pp_funcs && pp_funcs->set_mp1_state) {
ret = pp_funcs->set_mp1_state(
adev->powerplay.pp_handle,
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 722fe067ac2c..72d9c1be1835 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -3113,14 +3113,18 @@ static int 
sienna_cichlid_system_features_control(struct smu_context *smu,
 static int sienna_cichlid_set_mp1_state(struct smu_context *smu,
enum pp_mp1_state mp1_state)
 {
+   int ret;
+
switch (mp1_state) {
case PP_MP1_STATE_UNLOAD:
-   return smu_cmn_set_mp1_state(smu, mp1_state);
+   ret = smu_cmn_set_mp1_state(smu, mp1_state);
+   break;
default:
-   return -EINVAL;
+   /* Ignore others */
+   ret = 0;
}
 
-   return 0;
+   return ret;
 }
 
 static const struct pptable_funcs sienna_cichlid_ppt_funcs = {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset

2021-03-22 Thread Joshua Ashton

Typo in the title:

s/dispaly/display

- Joshie ✨

On 3/22/21 8:11 AM, Lang Yu wrote:

In amdggpu reset, while dm.dc_lock is held by dm_suspend,
handle_hpd_rx_irq tries to acquire it. Deadlock occurred!

Deadlock log:

[  104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin!

[  104.640084] ==
[  104.640092] WARNING: possible circular locking dependency detected
[  104.640099] 5.11.0-custom #1 Tainted: GW   E
[  104.640107] --
[  104.640114] cat/1158 is trying to acquire lock:
[  104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: 
__flush_work+0x2e3/0x450
[  104.640144]
but task is already holding lock:
[  104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]
[  104.640581]
which lock already depends on the new lock.

[  104.640590]
the existing dependency chain (in reverse order) is:
[  104.640598]
-> #2 (>dm.dc_lock){+.+.}-{3:3}:
[  104.640611]lock_acquire+0xca/0x390
[  104.640623]__mutex_lock+0x9b/0x930
[  104.640633]mutex_lock_nested+0x1b/0x20
[  104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu]
[  104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641264]process_one_work+0x2a7/0x5b0
[  104.641275]worker_thread+0x4a/0x3d0
[  104.641283]kthread+0x125/0x160
[  104.641290]ret_from_fork+0x22/0x30
[  104.641300]
-> #1 (>hpd_lock){+.+.}-{3:3}:
[  104.641312]lock_acquire+0xca/0x390
[  104.641321]__mutex_lock+0x9b/0x930
[  104.641328]mutex_lock_nested+0x1b/0x20
[  104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu]
[  104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641931]process_one_work+0x2a7/0x5b0
[  104.641940]worker_thread+0x4a/0x3d0
[  104.641948]kthread+0x125/0x160
[  104.641954]ret_from_fork+0x22/0x30
[  104.641963]
-> #0 ((work_completion)(>work)){+.+.}-{0:0}:
[  104.641975]check_prev_add+0x94/0xbf0
[  104.641983]__lock_acquire+0x130d/0x1ce0
[  104.641992]lock_acquire+0xca/0x390
[  104.642000]__flush_work+0x303/0x450
[  104.642008]flush_work+0x10/0x20
[  104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu]
[  104.642312]dm_suspend+0x181/0x1d0 [amdgpu]
[  104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu]
[  104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu]
[  104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu]
[  104.643715]gpu_recover_get+0x2e/0x60 [amdgpu]
[  104.643951]simple_attr_read+0x6d/0x110
[  104.643960]debugfs_attr_read+0x49/0x70
[  104.643970]full_proxy_read+0x5f/0x90
[  104.643979]vfs_read+0xa3/0x190
[  104.643986]ksys_read+0x70/0xf0
[  104.643992]__x64_sys_read+0x1a/0x20
[  104.643999]do_syscall_64+0x38/0x90
[  104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  104.644017]
other info that might help us debug this:

[  104.644026] Chain exists of:
  (work_completion)(>work) --> >hpd_lock --> 
>dm.dc_lock

[  104.644043]  Possible unsafe locking scenario:

[  104.644049]CPU0CPU1
[  104.644055]
[  104.644060]   lock(>dm.dc_lock);
[  104.644066]lock(>hpd_lock);
[  104.644075]lock(>dm.dc_lock);
[  104.644083]   lock((work_completion)(>work));
[  104.644090]
 *** DEADLOCK ***

[  104.644096] 3 locks held by cat/1158:
[  104.644103]  #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: 
simple_attr_read+0x4e/0x110
[  104.644119]  #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: 
amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  104.644489]  #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]

Signed-off-by: Lang Yu 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e176ea84d75b..8727488df769 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param)
}
}
  
-	mutex_lock(>dm.dc_lock);

+   if (!amdgpu_in_reset(adev))
+   mutex_lock(>dm.dc_lock);
  #ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
  #else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
  #endif
-   mutex_unlock(>dm.dc_lock);
+   if 

RE: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset

2021-03-22 Thread Yu, Lang
[AMD Official Use Only - Internal Distribution Only]



-Original Message-
From: Grodzovsky, Andrey  
Sent: Monday, March 22, 2021 11:01 PM
To: Yu, Lang ; amd-gfx@lists.freedesktop.org
Cc: Deucher, Alexander ; Huang, Ray 

Subject: Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset



On 2021-03-22 4:11 a.m., Lang Yu wrote:
> In amdggpu reset, while dm.dc_lock is held by dm_suspend, 
> handle_hpd_rx_irq tries to acquire it. Deadlock occurred!
> 
> Deadlock log:
> 
> [  104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin!
> 
> [  104.640084] ==
> [  104.640092] WARNING: possible circular locking dependency detected
> [  104.640099] 5.11.0-custom #1 Tainted: GW   E
> [  104.640107] --
> [  104.640114] cat/1158 is trying to acquire lock:
> [  104.640120] 88810a09ce00 
> ((work_completion)(>work)){+.+.}-{0:0}, at: __flush_work+0x2e3/0x450 [  
> 104.640144]
> but task is already holding lock:
> [  104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
> dm_suspend+0xb2/0x1d0 [amdgpu] [  104.640581]
> which lock already depends on the new lock.
> 
> [  104.640590]
> the existing dependency chain (in reverse order) is:
> [  104.640598]
> -> #2 (>dm.dc_lock){+.+.}-{3:3}:
> [  104.640611]lock_acquire+0xca/0x390
> [  104.640623]__mutex_lock+0x9b/0x930
> [  104.640633]mutex_lock_nested+0x1b/0x20
> [  104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu]
> [  104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu]
> [  104.641264]process_one_work+0x2a7/0x5b0
> [  104.641275]worker_thread+0x4a/0x3d0
> [  104.641283]kthread+0x125/0x160
> [  104.641290]ret_from_fork+0x22/0x30
> [  104.641300]
> -> #1 (>hpd_lock){+.+.}-{3:3}:
> [  104.641312]lock_acquire+0xca/0x390
> [  104.641321]__mutex_lock+0x9b/0x930
> [  104.641328]mutex_lock_nested+0x1b/0x20
> [  104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu]
> [  104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu]
> [  104.641931]process_one_work+0x2a7/0x5b0
> [  104.641940]worker_thread+0x4a/0x3d0
> [  104.641948]kthread+0x125/0x160
> [  104.641954]ret_from_fork+0x22/0x30
> [  104.641963]
> -> #0 ((work_completion)(>work)){+.+.}-{0:0}:
> [  104.641975]check_prev_add+0x94/0xbf0
> [  104.641983]__lock_acquire+0x130d/0x1ce0
> [  104.641992]lock_acquire+0xca/0x390
> [  104.642000]__flush_work+0x303/0x450
> [  104.642008]flush_work+0x10/0x20
> [  104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu]
> [  104.642312]dm_suspend+0x181/0x1d0 [amdgpu]
> [  104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu]
> [  104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
> [  104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu]
> [  104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu]
> [  104.643715]gpu_recover_get+0x2e/0x60 [amdgpu]
> [  104.643951]simple_attr_read+0x6d/0x110
> [  104.643960]debugfs_attr_read+0x49/0x70
> [  104.643970]full_proxy_read+0x5f/0x90
> [  104.643979]vfs_read+0xa3/0x190
> [  104.643986]ksys_read+0x70/0xf0
> [  104.643992]__x64_sys_read+0x1a/0x20
> [  104.643999]do_syscall_64+0x38/0x90
> [  104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [  104.644017]
> other info that might help us debug this:
> 
> [  104.644026] Chain exists of:
>   (work_completion)(>work) --> 
> >hpd_lock --> >dm.dc_lock
> 
> [  104.644043]  Possible unsafe locking scenario:
> 
> [  104.644049]CPU0CPU1
> [  104.644055]
> [  104.644060]   lock(>dm.dc_lock);
> [  104.644066]lock(>hpd_lock);
> [  104.644075]lock(>dm.dc_lock);
> [  104.644083]   lock((work_completion)(>work));
> [  104.644090]
>  *** DEADLOCK ***
> 
> [  104.644096] 3 locks held by cat/1158:
> [  104.644103]  #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: 
> simple_attr_read+0x4e/0x110 [  104.644119]  #1: 88810a0a1600 
> (>reset_sem){}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 
> [amdgpu] [  104.644489]  #2: 88810a09cc70 
> (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu]
> 
> Signed-off-by: Lang Yu 
> ---
>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 --
>   1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index e176ea84d75b..8727488df769 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ 

[PATCH] drm/amd/display: Use DRM_DEBUG_DP

2021-03-22 Thread Luben Tuikov
Convert IRQ-based prints from DRM_DEBUG_DRIVER to
DRM_DEBUG_DP, as the latter is not used in drm/amd
prior to this patch and since IRQ-based prints
drown out the rest of the driver's
DRM_DEBUG_DRIVER messages.

Cc: Harry Wentland 
Cc: Alex Deucher 
Signed-off-by: Luben Tuikov 
---
 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 +--
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index f455fc3aa561..aabaa652f6dc 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -449,9 +449,9 @@ static void dm_pflip_high_irq(void *interrupt_params)
amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE;
spin_unlock_irqrestore(_to_drm(adev)->event_lock, flags);
 
-   DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
-amdgpu_crtc->crtc_id, amdgpu_crtc,
-vrr_active, (int) !e);
+   DRM_DEBUG_DP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp 
%d\n",
+amdgpu_crtc->crtc_id, amdgpu_crtc,
+vrr_active, (int) !e);
 }
 
 static void dm_vupdate_high_irq(void *interrupt_params)
@@ -993,8 +993,7 @@ static void event_mall_stutter(struct work_struct *work)
dc_allow_idle_optimizations(
dm->dc, dm->active_vblank_irq_count == 0);
 
-   DRM_DEBUG_DRIVER("Allow idle optimizations (MALL): %d\n", 
dm->active_vblank_irq_count == 0);
-
+   DRM_DEBUG_DP("Allow idle optimizations (MALL): %d\n", 
dm->active_vblank_irq_count == 0);
 
mutex_unlock(>dc_lock);
 }
@@ -1810,8 +1809,8 @@ static void dm_gpureset_toggle_interrupts(struct 
amdgpu_device *adev,
if (acrtc && state->stream_status[i].plane_count != 0) {
irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) 
? 0 : -EBUSY;
-   DRM_DEBUG("crtc %d - vupdate irq %sabling: r=%d\n",
- acrtc->crtc_id, enable ? "en" : "dis", rc);
+   DRM_DEBUG_DP("crtc %d - vupdate irq %sabling: r=%d\n",
+acrtc->crtc_id, enable ? "en" : "dis", rc);
if (rc)
DRM_WARN("Failed to %s pflip interrupts\n",
 enable ? "enable" : "disable");
@@ -4966,8 +4965,8 @@ static void update_stream_scaling_settings(const struct 
drm_display_mode *mode,
stream->src = src;
stream->dst = dst;
 
-   DRM_DEBUG_DRIVER("Destination Rectangle x:%d  y:%d  width:%d  
height:%d\n",
-   dst.x, dst.y, dst.width, dst.height);
+   DRM_DEBUG_DP("Destination Rectangle x:%d  y:%d  width:%d  height:%d\n",
+dst.x, dst.y, dst.width, dst.height);
 
 }
 
@@ -5710,8 +5709,8 @@ static inline int dm_set_vupdate_irq(struct drm_crtc 
*crtc, bool enable)
 
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
 
-   DRM_DEBUG_DRIVER("crtc %d - vupdate irq %sabling: r=%d\n",
-acrtc->crtc_id, enable ? "en" : "dis", rc);
+   DRM_DEBUG_DP("crtc %d - vupdate irq %sabling: r=%d\n",
+acrtc->crtc_id, enable ? "en" : "dis", rc);
return rc;
 }
 
@@ -6664,7 +6663,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane 
*plane,
int r;
 
if (!new_state->fb) {
-   DRM_DEBUG_DRIVER("No FB bound\n");
+   DRM_DEBUG_DP("No FB bound\n");
return 0;
}
 
@@ -7896,11 +7895,11 @@ static void handle_cursor_update(struct drm_plane 
*plane,
if (!plane->state->fb && !old_plane_state->fb)
return;
 
-   DRM_DEBUG_DRIVER("%s: crtc_id=%d with size %d to %d\n",
-__func__,
-amdgpu_crtc->crtc_id,
-plane->state->crtc_w,
-plane->state->crtc_h);
+   DRM_DEBUG_DP("%s: crtc_id=%d with size %d to %d\n",
+__func__,
+amdgpu_crtc->crtc_id,
+plane->state->crtc_w,
+plane->state->crtc_h);
 
ret = get_cursor_position(plane, crtc, );
if (ret)
@@ -7958,8 +7957,8 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc)
/* Mark this event as consumed */
acrtc->base.state->event = NULL;
 
-   DRM_DEBUG_DRIVER("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
-acrtc->crtc_id);
+   DRM_DEBUG_DP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n",
+acrtc->crtc_id);
 }
 
 static void update_freesync_state_on_stream(
@@ -8265,9 +8264,9 @@ static void amdgpu_dm_commit_planes(struct 
drm_atomic_state *state,
   

Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

2021-03-22 Thread R, Bindu
[AMD Official Use Only - Internal Distribution Only]

​Hi,
The updated patch has been merged and is available with commit ID 
"ef5c594461650de0a18aa0bfd240189991790d7e".
Somehow missed to mail the updated version, attached is the updated patch, 
please review and let me know if any changes required.

Thanks,
Bindu

 ​

From: Michel Dänzer 
Sent: Monday, March 22, 2021 5:32 AM
To: R, Bindu ; Lakha, Bhawanpreet ; 
amd-gfx@lists.freedesktop.org 
Cc: Deucher, Alexander ; Zhou1, Tao 
; Feng, Kenneth 
Subject: Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

On 2021-03-20 1:31 a.m., R, Bindu wrote:
>
> The Update patch has been submitted.

Submitted where? Still can't see it.


--
Earthling Michel Dänzer   |   
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fredhat.com%2Fdata=04%7C01%7CBindu.R%40amd.com%7C5ecbf65b60ec491fbc4408d8ed156b10%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637520023540798291%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0sdata=VcoJRCxTATVs8JZZU%2FzcYxM1yfFFq1Z8perrtVU1PKE%3Dreserved=0
Libre software enthusiast | Mesa and X developer
From ef5c594461650de0a18aa0bfd240189991790d7e Mon Sep 17 00:00:00 2001
From: Bindu Ramamurthy 
Date: Tue, 16 Mar 2021 17:08:47 -0400
Subject: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

[Why]
idle optimization was being disabled after commit.

[How]
check vblank count for display off and enable idle optimization based on this count.
Also,check added to ensure vblank count does not decrement, when count reaches 0.

Signed-off-by: Bindu Ramamurthy 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 6e7a333abbe0..f455fc3aa561 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -987,7 +987,7 @@ static void event_mall_stutter(struct work_struct *work)
 
 	if (vblank_work->enable)
 		dm->active_vblank_irq_count++;
-	else
+	else if(dm->active_vblank_irq_count)
 		dm->active_vblank_irq_count--;
 
 	dc_allow_idle_optimizations(
@@ -8705,9 +8705,14 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 		dm_enable_per_frame_crtc_master_sync(dc_state);
 		mutex_lock(>dc_lock);
 		WARN_ON(!dc_commit_state(dm->dc, dc_state));
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+   /* Allow idle optimization when vblank count is 0 for display off */
+   if (dm->active_vblank_irq_count == 0)
+   dc_allow_idle_optimizations(dm->dc,true);
+#endif
 		mutex_unlock(>dc_lock);
 	}
-
+			   
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
 		struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Michal Hocko
On Mon 22-03-21 14:05:48, Matthew Wilcox wrote:
> On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote:
> > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:
> > > Am 20.03.21 um 14:17 schrieb Daniel Vetter:
> > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König
> > > >  wrote:
> > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter:
> > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:
> > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter:
> > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote:
> > > > > > > > > Don't print a warning when we fail to allocate a page for 
> > > > > > > > > swapping things out.
> > > > > > > > > 
> > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead 
> > > > > > > > > of GFP_NOFS.
> > > > > > > > Uh this part doesn't make sense. Especially since you only do 
> > > > > > > > it for the
> > > > > > > > debugfs file, not in general. Which means you've just 
> > > > > > > > completely broken
> > > > > > > > the shrinker.
> > > > > > > Are you sure? My impression is that GFP_NOFS should now work much 
> > > > > > > more out
> > > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore().
> > > > > > Yeah, if you'd put it in the right place :-)
> > > > > > 
> > > > > > But also -mm folks are very clear that memalloc_no*() family is for 
> > > > > > dire
> > > > > > situation where there's really no other way out. For anything where 
> > > > > > you
> > > > > > know what you're doing, you really should use explicit gfp flags.
> > > > > My impression is just the other way around. You should try to avoid 
> > > > > the
> > > > > NOFS/NOIO flags and use the memalloc_no* approach instead.
> > > > Where did you get that idea?
> > > 
> > > Well from the kernel comment on GFP_NOFS:
> > > 
> > >  * %GFP_NOFS will use direct reclaim but will not use any filesystem
> > > interfaces.
> > >  * Please try to avoid using this flag directly and instead use
> > >  * memalloc_nofs_{save,restore} to mark the whole scope which
> > > cannot/shouldn't
> > >  * recurse into the FS layer with a short explanation why. All allocation
> > >  * requests will inherit GFP_NOFS implicitly.
> > 
> > Huh that's interesting, since iirc Willy or Dave told me the opposite, and
> > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
> > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).
> > 
> > Adding them, maybe I got confused.
> 
> My impression is that the scoped API is preferred these days.
> 
> https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html
> 
> I'd probably need to spend a few months learning the DRM subsystem to
> have a more detailed opinion on whether passing GFP flags around explicitly
> or using the scope API is the better approach for your situation.

yes, in an ideal world we would have a clearly defined scope of the
reclaim recursion wrt FS/IO associated with it. I've got back to
https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/
and there are two things standing out. Why does ttm_tt_debugfs_shrink_show
really require NOFS semantic? And why does it play with
fs_reclaim_acquire?

-- 
Michal Hocko
SUSE Labs
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Christian König

Am 22.03.21 um 18:02 schrieb Daniel Vetter:

On Mon, Mar 22, 2021 at 5:06 PM Michal Hocko  wrote:

On Mon 22-03-21 14:05:48, Matthew Wilcox wrote:

On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote:

On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:

Am 20.03.21 um 14:17 schrieb Daniel Vetter:

On Sat, Mar 20, 2021 at 10:04 AM Christian König
 wrote:

Am 19.03.21 um 20:06 schrieb Daniel Vetter:

On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:

Am 19.03.21 um 18:52 schrieb Daniel Vetter:

On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote:

Don't print a warning when we fail to allocate a page for swapping things out.

Also rely on memalloc_nofs_save/memalloc_nofs_restore instead of GFP_NOFS.

Uh this part doesn't make sense. Especially since you only do it for the
debugfs file, not in general. Which means you've just completely broken
the shrinker.

Are you sure? My impression is that GFP_NOFS should now work much more out
of the box with the memalloc_nofs_save()/memalloc_nofs_restore().

Yeah, if you'd put it in the right place :-)

But also -mm folks are very clear that memalloc_no*() family is for dire
situation where there's really no other way out. For anything where you
know what you're doing, you really should use explicit gfp flags.

My impression is just the other way around. You should try to avoid the
NOFS/NOIO flags and use the memalloc_no* approach instead.

Where did you get that idea?

Well from the kernel comment on GFP_NOFS:

  * %GFP_NOFS will use direct reclaim but will not use any filesystem
interfaces.
  * Please try to avoid using this flag directly and instead use
  * memalloc_nofs_{save,restore} to mark the whole scope which
cannot/shouldn't
  * recurse into the FS layer with a short explanation why. All allocation
  * requests will inherit GFP_NOFS implicitly.

Huh that's interesting, since iirc Willy or Dave told me the opposite, and
the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).

Adding them, maybe I got confused.

My impression is that the scoped API is preferred these days.

https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html

I'd probably need to spend a few months learning the DRM subsystem to
have a more detailed opinion on whether passing GFP flags around explicitly
or using the scope API is the better approach for your situation.

yes, in an ideal world we would have a clearly defined scope of the
reclaim recursion wrt FS/IO associated with it. I've got back to
https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/
and there are two things standing out. Why does ttm_tt_debugfs_shrink_show
really require NOFS semantic? And why does it play with
fs_reclaim_acquire?

It's our shrinker. shrink_show simply triggers that specific shrinker
asking it to shrink everything it can, which helps a lot with testing
without having to drive the entire system against the OOM wall.
fs_reclaim_acquire is there to make sure lockdep understands that this
is a shrinker and that it checks all the dependencies for us like if
we'd be in real reclaim. There is some drop caches interfaces in proc
iirc, but those drop everything, and they don't have the fs_reclaim
annotations to teach lockdep about what we're doing.


To summarize the debugfs code is basically to test if that stuff really 
works with GFP_NOFS.


My only concern is that if I could rely on memalloc_no* being used we 
could optimize this quite a bit further.


Regards,
Christian.


-Daniel


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Amdgpu kernel oops and freezing on system suspend and hibernate

2021-03-22 Thread Alex Deucher
On Thu, Mar 18, 2021 at 8:19 AM Harvey  wrote:
>
> Alex,
>
> I waited for kernel 5.11.7 to hit our repos yesterday evening and tested
> again:
>
> 1. The suspend issue is gone - suspend and resume now work as expected.
>
> 2. System hibernation seems to be a different beast - still freezing

You need this patch:
https://gitlab.freedesktop.org/agd5f/linux/-/commit/711c13547aad08f2cfe996e0cddc3d56f1233081

Alex
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2

2021-03-22 Thread Daniel Vetter
On Mon, Mar 22, 2021 at 5:07 PM Felix Kuehling  wrote:
>
> Am 2021-03-22 um 10:15 a.m. schrieb Daniel Vetter:
> > On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote:
> >> Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33
> >> are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about
> >> a week ago. The remaining 11 patches are current work-in-progress with
> >> further cleanup and fixes.
> >>
> >> MMU notifiers and CPU page faults now can split ranges and update our range
> >> data structures without taking heavy locks by doing some of the critical
> >> work in a deferred work handler. This includes updating MMU notifiers and
> >> the SVM range interval tree. In the mean time, new ranges can live as
> >> children of their parent ranges until the deferred work handler 
> >> consolidates
> >> them in the main interval tree.
> > I'm totally swammped with intel stuff unfortunately, so not really time to
> > dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault
> > inversion is planned to be handled now? Or that still tbd?
>
> Navi is still TBD. This patch series focuses on GFXv9 because that's the
> IP our data center GPUs are on. The code here has two modes of
> operations, one that relies on page faults and one that relies on
> preemptions. The latter should work on Navi just fine. So that's our
> minimal fallback option.
>
>
> >
> > Other thing I noticed is that amdkfd still uses the mmu_notifier directly,
> > and not the mmu_interval_notifier. But you're talking a lot about managing
> > intervals here, and so I'm wondering whether we shouldn't do this in core
> > code? Everyone will have the same painful locking problems here (well atm
> > everyone = you only I think), sharing this imo would make a ton of
> > sense.
>
> We use mmu_interval_notifiers in all the range-based code, including
> even our legacy userptr code. The only non-interval notifier that's
> still in use in KFD is the one we use for cleanup on process termination.

I guess my git grep got wrong, I thought I've only found it in the
amdgpu userptr code, not on the amdkfd side of things. Sounds all
good.
-Daniel

>
>
> >
> > I think the other one is moving over more generic pasid code, but I think
> > that's going to be less useful here and maybe more a long term project.
>
> Yes, it's unrelated to this work.
>
> Regards,
>   Felix
>
>
> >
> > Cheers, Daniel
> >
> >> We also added proper DMA mapping of system memory pages.
> >>
> >> Current work in progress is cleaning up all the locking, simplifying our
> >> code and data structures and resolving a few known bugs.
> >>
> >> This series and the corresponding ROCm Thunk and KFDTest changes are also
> >> available on gitub:
> >>   
> >> https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
> >>   
> >> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip
> >>
> >> An updated Thunk
> >>
> >> Alex Sierra (10):
> >>   drm/amdgpu: replace per_device_list by array
> >>   drm/amdkfd: helper to convert gpu id and idx
> >>   drm/amdkfd: add xnack enabled flag to kfd_process
> >>   drm/amdkfd: add ioctl to configure and query xnack retries
> >>   drm/amdgpu: enable 48-bit IH timestamp counter
> >>   drm/amdkfd: SVM API call to restore page tables
> >>   drm/amdkfd: add svm_bo reference for eviction fence
> >>   drm/amdgpu: add param bit flag to create SVM BOs
> >>   drm/amdgpu: svm bo enable_signal call condition
> >>   drm/amdgpu: add svm_bo eviction to enable_signal cb
> >>
> >> Felix Kuehling (22):
> >>   drm/amdkfd: map svm range to GPUs
> >>   drm/amdkfd: svm range eviction and restore
> >>   drm/amdkfd: validate vram svm range from TTM
> >>   drm/amdkfd: HMM migrate ram to vram
> >>   drm/amdkfd: HMM migrate vram to ram
> >>   drm/amdkfd: invalidate tables on page retry fault
> >>   drm/amdkfd: page table restore through svm API
> >>   drm/amdkfd: add svm_bo eviction mechanism support
> >>   drm/amdkfd: refine migration policy with xnack on
> >>   drm/amdkfd: add svm range validate timestamp
> >>   drm/amdkfd: multiple gpu migrate vram to vram
> >>   drm/amdkfd: Fix dma unmapping
> >>   drm/amdkfd: Call mutex_destroy
> >>   drm/amdkfd: Fix spurious restore failures
> >>   drm/amdkfd: Fix svm_bo_list locking in eviction worker
> >>   drm/amdkfd: Simplify split_by_granularity
> >>   drm/amdkfd: Point out several race conditions
> >>   drm/amdkfd: Return pdd from kfd_process_device_from_gduid
> >>   drm/amdkfd: Remove broken deferred mapping
> >>   drm/amdkfd: Allow invalid pages in migration.src
> >>   drm/amdkfd: Correct locking during migration and mapping
> >>   drm/amdkfd: Nested locking and invalidation of child ranges
> >>
> >> Philip Yang (12):
> >>   drm/amdkfd: add svm ioctl API
> >>   drm/amdkfd: register svm range
> >>   drm/amdkfd: add svm ioctl GET_ATTR op
> >>   drm/amdgpu: add common HMM get pages function
> >>   drm/amdkfd: validate svm range system memory
> >>   

Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Daniel Vetter
On Mon, Mar 22, 2021 at 5:06 PM Michal Hocko  wrote:
>
> On Mon 22-03-21 14:05:48, Matthew Wilcox wrote:
> > On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote:
> > > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:
> > > > Am 20.03.21 um 14:17 schrieb Daniel Vetter:
> > > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König
> > > > >  wrote:
> > > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter:
> > > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:
> > > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter:
> > > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König 
> > > > > > > > > wrote:
> > > > > > > > > > Don't print a warning when we fail to allocate a page for 
> > > > > > > > > > swapping things out.
> > > > > > > > > >
> > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore 
> > > > > > > > > > instead of GFP_NOFS.
> > > > > > > > > Uh this part doesn't make sense. Especially since you only do 
> > > > > > > > > it for the
> > > > > > > > > debugfs file, not in general. Which means you've just 
> > > > > > > > > completely broken
> > > > > > > > > the shrinker.
> > > > > > > > Are you sure? My impression is that GFP_NOFS should now work 
> > > > > > > > much more out
> > > > > > > > of the box with the 
> > > > > > > > memalloc_nofs_save()/memalloc_nofs_restore().
> > > > > > > Yeah, if you'd put it in the right place :-)
> > > > > > >
> > > > > > > But also -mm folks are very clear that memalloc_no*() family is 
> > > > > > > for dire
> > > > > > > situation where there's really no other way out. For anything 
> > > > > > > where you
> > > > > > > know what you're doing, you really should use explicit gfp flags.
> > > > > > My impression is just the other way around. You should try to avoid 
> > > > > > the
> > > > > > NOFS/NOIO flags and use the memalloc_no* approach instead.
> > > > > Where did you get that idea?
> > > >
> > > > Well from the kernel comment on GFP_NOFS:
> > > >
> > > >  * %GFP_NOFS will use direct reclaim but will not use any filesystem
> > > > interfaces.
> > > >  * Please try to avoid using this flag directly and instead use
> > > >  * memalloc_nofs_{save,restore} to mark the whole scope which
> > > > cannot/shouldn't
> > > >  * recurse into the FS layer with a short explanation why. All 
> > > > allocation
> > > >  * requests will inherit GFP_NOFS implicitly.
> > >
> > > Huh that's interesting, since iirc Willy or Dave told me the opposite, and
> > > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
> > > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).
> > >
> > > Adding them, maybe I got confused.
> >
> > My impression is that the scoped API is preferred these days.
> >
> > https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html
> >
> > I'd probably need to spend a few months learning the DRM subsystem to
> > have a more detailed opinion on whether passing GFP flags around explicitly
> > or using the scope API is the better approach for your situation.
>
> yes, in an ideal world we would have a clearly defined scope of the
> reclaim recursion wrt FS/IO associated with it. I've got back to
> https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/
> and there are two things standing out. Why does ttm_tt_debugfs_shrink_show
> really require NOFS semantic? And why does it play with
> fs_reclaim_acquire?

It's our shrinker. shrink_show simply triggers that specific shrinker
asking it to shrink everything it can, which helps a lot with testing
without having to drive the entire system against the OOM wall.
fs_reclaim_acquire is there to make sure lockdep understands that this
is a shrinker and that it checks all the dependencies for us like if
we'd be in real reclaim. There is some drop caches interfaces in proc
iirc, but those drop everything, and they don't have the fs_reclaim
annotations to teach lockdep about what we're doing.
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: Amdgpu kernel oops and freezing on system suspend and hibernate

2021-03-22 Thread Harvey

Still freezing on 5.11.8 and 5.12-rc4.

Log on 5.12-rc4 looks a little different:


Mär 22 17:40:26 obelix systemd[1]: Reached target Sleep.
Mär 22 17:40:26 obelix systemd[1]: Starting Hibernate...
Mär 22 17:40:26 obelix kernel: PM: hibernation: hibernation entry
Mär 22 17:40:26 obelix systemd-sleep[2380]: Suspending system...
Mär 22 17:40:46 obelix kernel: Filesystems sync: 0.012 seconds
Mär 22 17:40:46 obelix kernel: Freezing user space processes ...
Mär 22 17:40:46 obelix kernel: Freezing of tasks failed after 20.003 
seconds (1 tasks refusing to freeze, wq_busy=0):
Mär 22 17:40:46 obelix kernel: task:Xorgstate:D stack:0 
pid: 1635 ppid:  1633 flags:0x0004

Mär 22 17:40:46 obelix kernel: Call Trace:
Mär 22 17:40:46 obelix kernel:  __schedule+0x2fc/0x8b0
Mär 22 17:40:46 obelix kernel:  schedule+0x5b/0xc0
Mär 22 17:40:46 obelix kernel:  rpm_resume+0x18c/0x810
Mär 22 17:40:46 obelix kernel:  ? wait_woken+0x80/0x80
Mär 22 17:40:46 obelix kernel:  __pm_runtime_resume+0x4a/0x80
Mär 22 17:40:46 obelix kernel:  amdgpu_drm_ioctl+0x33/0x80 [amdgpu]
Mär 22 17:40:46 obelix kernel:  __x64_sys_ioctl+0x83/0xb0
Mär 22 17:40:46 obelix kernel:  do_syscall_64+0x33/0x40
Mär 22 17:40:46 obelix kernel:  entry_SYSCALL_64_after_hwframe+0x44/0xae
Mär 22 17:40:46 obelix kernel: RIP: 0033:0x7f7647d4de6b
Mär 22 17:40:46 obelix kernel: RSP: 002b:7ffec3671e88 EFLAGS: 
0246 ORIG_RAX: 0010
Mär 22 17:40:46 obelix kernel: RAX: ffda RBX: 
7ffec3671ec0 RCX: 7f7647d4de6b
Mär 22 17:40:46 obelix kernel: RDX: 7ffec3671ec0 RSI: 
c06864a2 RDI: 000d
Mär 22 17:40:46 obelix kernel: RBP: c06864a2 R08: 
 R09: 
Mär 22 17:40:46 obelix kernel: R10:  R11: 
0246 R12: 5609594eedf0
Mär 22 17:40:46 obelix kernel: R13: 000d R14: 
 R15: 

Mär 22 17:40:46 obelix kernel:
Mär 22 17:40:46 obelix kernel: OOM killer enabled.
Mär 22 17:40:46 obelix kernel: Restarting tasks ... done.
Mär 22 17:40:46 obelix kernel: thermal thermal_zone1: failed to read out 
thermal zone (-61)
Mär 22 17:40:46 obelix rtkit-daemon[1381]: The canary thread is 
apparently starving. Taking action.

Mär 22 17:40:46 obelix rtkit-daemon[1381]: Demoting known real-time threads.
Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 
2346 of process 1780.
Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 
1811 of process 1780.
Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 
1810 of process 1780.
Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 
1780 of process 1780.

Mär 22 17:40:46 obelix rtkit-daemon[1381]: Demoted 4 threads.
Mär 22 17:40:46 obelix systemd-sleep[2380]: Failed to suspend system. 
System resumed again: Device or resource busy
Mär 22 17:40:46 obelix systemd[1]: systemd-hibernate.service: Main 
process exited, code=exited, status=1/FAILURE
Mär 22 17:40:46 obelix systemd[1]: systemd-hibernate.service: Failed 
with result 'exit-code'.

Mär 22 17:40:46 obelix systemd[1]: Failed to start Hibernate.
Mär 22 17:40:46 obelix kernel: PM: hibernation: hibernation exit
Mär 22 17:40:46 obelix systemd[1]: Dependency failed for Hibernate.
Mär 22 17:40:46 obelix audit[1]: SERVICE_START pid=1 uid=0 
auid=4294967295 ses=4294967295 msg='unit=systemd-hibernate 
comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? 
terminal=? res=failed'
Mär 22 17:40:46 obelix systemd[1]: hibernate.target: Job 
hibernate.target/start failed with result 'dependency'.

Mär 22 17:40:46 obelix systemd-logind[1091]: Operation 'sleep' finished.
Mär 22 17:40:46 obelix systemd[1]: Stopped target Sleep.
Mär 22 17:40:46 obelix NetworkManager[1089]:   [1616431246.8706] 
manager: sleep: wake requested (sleeping: yes  enabled: yes)
Mär 22 17:40:46 obelix kernel: audit: type=1130 
audit(1616431246.867:108): pid=1 uid=0 auid=4294967295 ses=4294967295 
msg='unit=systemd-hibernate comm="systemd" 
exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=failed'
Mär 22 17:40:46 obelix NetworkManager[1089]:   [1616431246.8708] 
device (wlp4s0): state change: unmanaged -> unavailable (reason 
'managed', sys-iface-state: 'external')
Mär 22 17:40:47 obelix NetworkManager[1089]:   [1616431247.1288] 
device (p2p-dev-wlp4s0): state change: unmanaged -> unavailable (reason 
'managed', sys-iface-state: 'external')
Mär 22 17:40:47 obelix NetworkManager[1089]:   [1616431247.1296] 
manager: NetworkManager state is now DISCONNECTED
Mär 22 17:40:47 obelix NetworkManager[1089]:   [1616431247.2208] 
device (wlp4s0): supplicant interface state: internal-starting -> 
disconnected
Mär 22 17:40:47 obelix NetworkManager[1089]:   [1616431247.2209] 
device (p2p-dev-wlp4s0): state change: unavailable -> unmanaged (reason 
'removed', sys-iface-state: 'removed')
Mär 22 17:40:47 obelix NetworkManager[1089]:   [1616431247.2216] 
Wi-Fi P2P device controlled by 

Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2

2021-03-22 Thread Felix Kuehling
Am 2021-03-22 um 10:15 a.m. schrieb Daniel Vetter:
> On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote:
>> Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33
>> are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about
>> a week ago. The remaining 11 patches are current work-in-progress with
>> further cleanup and fixes.
>>
>> MMU notifiers and CPU page faults now can split ranges and update our range
>> data structures without taking heavy locks by doing some of the critical
>> work in a deferred work handler. This includes updating MMU notifiers and
>> the SVM range interval tree. In the mean time, new ranges can live as
>> children of their parent ranges until the deferred work handler consolidates
>> them in the main interval tree.
> I'm totally swammped with intel stuff unfortunately, so not really time to
> dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault
> inversion is planned to be handled now? Or that still tbd?

Navi is still TBD. This patch series focuses on GFXv9 because that's the
IP our data center GPUs are on. The code here has two modes of
operations, one that relies on page faults and one that relies on
preemptions. The latter should work on Navi just fine. So that's our
minimal fallback option.


>
> Other thing I noticed is that amdkfd still uses the mmu_notifier directly,
> and not the mmu_interval_notifier. But you're talking a lot about managing
> intervals here, and so I'm wondering whether we shouldn't do this in core
> code? Everyone will have the same painful locking problems here (well atm
> everyone = you only I think), sharing this imo would make a ton of
> sense.

We use mmu_interval_notifiers in all the range-based code, including
even our legacy userptr code. The only non-interval notifier that's
still in use in KFD is the one we use for cleanup on process termination.


>
> I think the other one is moving over more generic pasid code, but I think
> that's going to be less useful here and maybe more a long term project.

Yes, it's unrelated to this work.

Regards,
  Felix


>
> Cheers, Daniel
>
>> We also added proper DMA mapping of system memory pages.
>>
>> Current work in progress is cleaning up all the locking, simplifying our
>> code and data structures and resolving a few known bugs.
>>
>> This series and the corresponding ROCm Thunk and KFDTest changes are also
>> available on gitub:
>>   https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
>>   
>> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip
>>
>> An updated Thunk
>>
>> Alex Sierra (10):
>>   drm/amdgpu: replace per_device_list by array
>>   drm/amdkfd: helper to convert gpu id and idx
>>   drm/amdkfd: add xnack enabled flag to kfd_process
>>   drm/amdkfd: add ioctl to configure and query xnack retries
>>   drm/amdgpu: enable 48-bit IH timestamp counter
>>   drm/amdkfd: SVM API call to restore page tables
>>   drm/amdkfd: add svm_bo reference for eviction fence
>>   drm/amdgpu: add param bit flag to create SVM BOs
>>   drm/amdgpu: svm bo enable_signal call condition
>>   drm/amdgpu: add svm_bo eviction to enable_signal cb
>>
>> Felix Kuehling (22):
>>   drm/amdkfd: map svm range to GPUs
>>   drm/amdkfd: svm range eviction and restore
>>   drm/amdkfd: validate vram svm range from TTM
>>   drm/amdkfd: HMM migrate ram to vram
>>   drm/amdkfd: HMM migrate vram to ram
>>   drm/amdkfd: invalidate tables on page retry fault
>>   drm/amdkfd: page table restore through svm API
>>   drm/amdkfd: add svm_bo eviction mechanism support
>>   drm/amdkfd: refine migration policy with xnack on
>>   drm/amdkfd: add svm range validate timestamp
>>   drm/amdkfd: multiple gpu migrate vram to vram
>>   drm/amdkfd: Fix dma unmapping
>>   drm/amdkfd: Call mutex_destroy
>>   drm/amdkfd: Fix spurious restore failures
>>   drm/amdkfd: Fix svm_bo_list locking in eviction worker
>>   drm/amdkfd: Simplify split_by_granularity
>>   drm/amdkfd: Point out several race conditions
>>   drm/amdkfd: Return pdd from kfd_process_device_from_gduid
>>   drm/amdkfd: Remove broken deferred mapping
>>   drm/amdkfd: Allow invalid pages in migration.src
>>   drm/amdkfd: Correct locking during migration and mapping
>>   drm/amdkfd: Nested locking and invalidation of child ranges
>>
>> Philip Yang (12):
>>   drm/amdkfd: add svm ioctl API
>>   drm/amdkfd: register svm range
>>   drm/amdkfd: add svm ioctl GET_ATTR op
>>   drm/amdgpu: add common HMM get pages function
>>   drm/amdkfd: validate svm range system memory
>>   drm/amdkfd: deregister svm range
>>   drm/amdgpu: export vm update mapping interface
>>   drm/amdkfd: register HMM device private zone
>>   drm/amdkfd: support xgmi same hive mapping
>>   drm/amdkfd: copy memory through gart table
>>   drm/amdgpu: reserve fence slot to update page table
>>   drm/amdkfd: Add SVM API support capability bits
>>
>>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 +
>>  

Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.

2021-03-22 Thread Mark Yacoub
"friendly ping"

On Wed, Mar 10, 2021 at 11:14 AM Mark Yacoub 
wrote:

> From: Mark Yacoub 
>
> On initializing the framebuffer, call drm_any_plane_has_format to do a
> check if the modifier is supported. drm_any_plane_has_format calls
> dm_plane_format_mod_supported which is extended to validate that the
> modifier is on the list of the plane's supported modifiers.
>
> The bug was caught using igt-gpu-tools test:
> kms_addfb_basic.addfb25-bad-modifier
>
> Tested on ChromeOS Zork by turning on the display, running an overlay
> test, and running a YT video.
>
> Cc: Alex Deucher 
> Cc: Bas Nieuwenhuizen 
> Signed-off-by: default avatarMark Yacoub 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_display.c   | 13 +
>  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  9 +
>  2 files changed, 22 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> index afa5f8ad0f563..a947b5aa420d2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
> @@ -908,6 +908,19 @@ int amdgpu_display_gem_fb_verify_and_init(
>  _fb_funcs);
> if (ret)
> goto err;
> +   /* Verify that the modifier is supported. */
> +   if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format,
> + mode_cmd->modifier[0])) {
> +   struct drm_format_name_buf format_name;
> +   drm_dbg_kms(dev,
> +   "unsupported pixel format %s / modifier
> 0x%llx\n",
> +   drm_get_format_name(mode_cmd->pixel_format,
> +   _name),
> +   mode_cmd->modifier[0]);
> +
> +   ret = -EINVAL;
> +   goto err;
> +   }
>
> ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj);
> if (ret)
> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> index 961abf1cf040c..21314024a83ce 100644
> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> @@ -3939,6 +3939,7 @@ static bool dm_plane_format_mod_supported(struct
> drm_plane *plane,
>  {
> struct amdgpu_device *adev = drm_to_adev(plane->dev);
> const struct drm_format_info *info = drm_format_info(format);
> +   int i;
>
> enum dm_micro_swizzle microtile =
> modifier_gfx9_swizzle_mode(modifier) & 3;
>
> @@ -3952,6 +3953,14 @@ static bool dm_plane_format_mod_supported(struct
> drm_plane *plane,
> if (modifier == DRM_FORMAT_MOD_LINEAR)
> return true;
>
> +   /* Check that the modifier is on the list of the plane's supported
> modifiers. */
> +   for (i = 0; i < plane->modifier_count; i++) {
> +   if (modifier == plane->modifiers[i])
> +   break;
> +   }
> +   if (i == plane->modifier_count)
> +   return false;
> +
> /*
>  * The arbitrary tiling support for multiplane formats has not
> been hooked
>  * up.
> --
> 2.30.1.766.gb4fecdf3b7-goog
>
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Matthew Wilcox
On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote:
> On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:
> > Am 20.03.21 um 14:17 schrieb Daniel Vetter:
> > > On Sat, Mar 20, 2021 at 10:04 AM Christian König
> > >  wrote:
> > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter:
> > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:
> > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter:
> > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote:
> > > > > > > > Don't print a warning when we fail to allocate a page for 
> > > > > > > > swapping things out.
> > > > > > > > 
> > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead 
> > > > > > > > of GFP_NOFS.
> > > > > > > Uh this part doesn't make sense. Especially since you only do it 
> > > > > > > for the
> > > > > > > debugfs file, not in general. Which means you've just completely 
> > > > > > > broken
> > > > > > > the shrinker.
> > > > > > Are you sure? My impression is that GFP_NOFS should now work much 
> > > > > > more out
> > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore().
> > > > > Yeah, if you'd put it in the right place :-)
> > > > > 
> > > > > But also -mm folks are very clear that memalloc_no*() family is for 
> > > > > dire
> > > > > situation where there's really no other way out. For anything where 
> > > > > you
> > > > > know what you're doing, you really should use explicit gfp flags.
> > > > My impression is just the other way around. You should try to avoid the
> > > > NOFS/NOIO flags and use the memalloc_no* approach instead.
> > > Where did you get that idea?
> > 
> > Well from the kernel comment on GFP_NOFS:
> > 
> >  * %GFP_NOFS will use direct reclaim but will not use any filesystem
> > interfaces.
> >  * Please try to avoid using this flag directly and instead use
> >  * memalloc_nofs_{save,restore} to mark the whole scope which
> > cannot/shouldn't
> >  * recurse into the FS layer with a short explanation why. All allocation
> >  * requests will inherit GFP_NOFS implicitly.
> 
> Huh that's interesting, since iirc Willy or Dave told me the opposite, and
> the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
> GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).
> 
> Adding them, maybe I got confused.

My impression is that the scoped API is preferred these days.

https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html

I'd probably need to spend a few months learning the DRM subsystem to
have a more detailed opinion on whether passing GFP flags around explicitly
or using the scope API is the better approach for your situation.

I usually defer to Michal on these kinds of questions.

> > > The kernel is full of explicit gfp_t flag
> > > passing to make this as explicit as possible. The memalloc_no* stuff
> > > is just for when you go through entire subsystems and really can't
> > > wire it through. I can't find the discussion anymore, but that was the
> > > advice I got from mm/fs people.
> > > 
> > > One reason is that generally a small GFP_KERNEL allocation never
> > > fails. But it absolutely can fail if it's in a memalloc_no* section,
> > > and these kind of non-obvious non-local effects are a real pain in
> > > testing and review. Hence explicit gfp_flag passing as much as
> > > possible.

I agree with this; it's definitely a problem with the scope API.  I wanted
to extend it to include GFP_NOWAIT, but if you do that, your chances of
memory allocation failure go way up, so you really want to set __GFP_NOWARN
too, but now you need to audit all the places that you're calling to be
sure they really handle errors correctly.

So I think I'm giving up on that patch set.

> > > > > > > If this is just to paper over the seq_printf doing the wrong 
> > > > > > > allocations,
> > > > > > > then just move that out from under the fs_reclaim_acquire/release 
> > > > > > > part.
> > > > > > No, that wasn't the problem.
> > > > > > 
> > > > > > We have just seen to many failures to allocate pages for swapout 
> > > > > > and I think
> > > > > > that would improve this because in a lot of cases we can then 
> > > > > > immediately
> > > > > > swap things out instead of having to rely on upper layers.
> > > > > Yeah, you broke it. Now the real shrinker is running with GFP_KERNEL,
> > > > > because your memalloc_no is only around the debugfs function. And ofc 
> > > > > it's
> > > > > much easier to allocate with GFP_KERNEL, right until you deadlock :-)
> > > > The problem here is that for example kswapd calls the shrinker without
> > > > holding a FS lock as far as I can see.
> > > > 
> > > > And it is rather sad that we can't optimize this case directly.
> > > I'm still not clear what you want to optimize? You can check for "is
> > > this kswapd" in pf flags, but that sounds very hairy and fragile.
> > 
> > Well we only need the NOFS flag when the shrinker callback really comes 

Re: [PATCH] amdgpu: avoid incorrect %hu format string

2021-03-22 Thread Tom Rix


On 3/22/21 4:54 AM, Arnd Bergmann wrote:
> From: Arnd Bergmann 
>
> clang points out that the %hu format string does not match the type
> of the variables here:
>
> drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type 
> 'unsigned short' but the argument has type 'unsigned int' [-Wformat]
>   version_major, version_minor);
>   ^
> include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR'
> __drm_err(fmt, ##__VA_ARGS__)
>   ~~~^~~
>
> Change it to a regular %u, the same way a previous patch did for
> another instance of the same warning.

It would be good to explicitly call out the change.

ex/ do you mean mine ?

0b437e64e0af ("drm/amdgpu: remove h from printk format specifier")

This was for a different reason.

imo, you do not need to include what another patch did.

so you could also just remove this bit from the commit log.


The change itself looks good.

Reviewed-by: Tom Rix 

>
> Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier")
> Signed-off-by: Arnd Bergmann 
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index e2ed4689118a..c6dbc0801604 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
>   if ((adev->asic_type == CHIP_POLARIS10 ||
>adev->asic_type == CHIP_POLARIS11) &&
>   (adev->uvd.fw_version < FW_1_66_16))
> - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is 
> too old.\n",
> + DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is 
> too old.\n",
> version_major, version_minor);
>   } else {
>   unsigned int enc_major, enc_minor, dec_minor;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()

2021-03-22 Thread Gustavo A. R. Silva



On 3/22/21 09:04, Chen, Guchun wrote:
> [AMD Public Use]
> 
> Thanks for your patch, Silva. The issue has been fixed by " a5c6007e20e1 
> drm/amd/display: fix modprobe failure on vega series".

Great. :)
Good to know this is already fixed.

Thanks!
--
Gustavo
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: 16 bpc fixed point (RGBA16) framebuffer support for core and AMD.

2021-03-22 Thread Ville Syrjälä
On Fri, Mar 19, 2021 at 10:03:12PM +0100, Mario Kleiner wrote:
> Hi,
> 
> this patch series adds the fourcc's for 16 bit fixed point unorm
> framebuffers to the core, and then an implementation for AMD gpu's
> with DisplayCore.
> 
> This is intended to allow for pageflipping to, and direct scanout of,
> Vulkan swapchain images in the format VK_FORMAT_R16G16B16A16_UNORM.
> I have patched AMD's GPUOpen amdvlk OSS driver to enable this format
> for swapchains, mapping to DRM_FORMAT_XBGR16161616:
> Link: 
> https://github.com/kleinerm/pal/commit/a25d4802074b13a8d5f7edc96ae45469ecbac3c4

We should also add support for these formats into igt.a Should 
be semi-easy by just adding the suitable float<->uint16
conversion stuff.

-- 
Ville Syrjälä
Intel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/pm: drop redundant and unneeded BACO APIs V2

2021-03-22 Thread Deucher, Alexander
[AMD Official Use Only - Internal Distribution Only]

Reviewed-by: Alex Deucher 

From: amd-gfx  on behalf of Evan Quan 

Sent: Monday, March 22, 2021 2:11 AM
To: amd-gfx@lists.freedesktop.org 
Cc: Quan, Evan 
Subject: [PATCH] drm/amd/pm: drop redundant and unneeded BACO APIs V2

Use other APIs which are with the same functionality but much
more clean.

V2: drop mediate unneeded interface

Change-Id: I5e9e0ab5d39b49b02434f18e12392b13931396be
Signed-off-by: Evan Quan 
---
 drivers/gpu/drm/amd/amdgpu/nv.c   | 25 +-
 drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h   |  9 ---
 drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 95 ---
 3 files changed, 3 insertions(+), 126 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index e9cc3201054f..46d4bbabce75 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -515,21 +515,9 @@ static int nv_asic_mode2_reset(struct amdgpu_device *adev)
 return ret;
 }

-static bool nv_asic_supports_baco(struct amdgpu_device *adev)
-{
-   struct smu_context *smu = >smu;
-
-   if (smu_baco_is_support(smu))
-   return true;
-   else
-   return false;
-}
-
 static enum amd_reset_method
 nv_asic_reset_method(struct amdgpu_device *adev)
 {
-   struct smu_context *smu = >smu;
-
 if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 ||
 amdgpu_reset_method == AMD_RESET_METHOD_MODE2 ||
 amdgpu_reset_method == AMD_RESET_METHOD_BACO ||
@@ -548,7 +536,7 @@ nv_asic_reset_method(struct amdgpu_device *adev)
 case CHIP_DIMGREY_CAVEFISH:
 return AMD_RESET_METHOD_MODE1;
 default:
-   if (smu_baco_is_support(smu))
+   if (amdgpu_dpm_is_baco_supported(adev))
 return AMD_RESET_METHOD_BACO;
 else
 return AMD_RESET_METHOD_MODE1;
@@ -558,7 +546,6 @@ nv_asic_reset_method(struct amdgpu_device *adev)
 static int nv_asic_reset(struct amdgpu_device *adev)
 {
 int ret = 0;
-   struct smu_context *smu = >smu;

 switch (nv_asic_reset_method(adev)) {
 case AMD_RESET_METHOD_PCI:
@@ -567,13 +554,7 @@ static int nv_asic_reset(struct amdgpu_device *adev)
 break;
 case AMD_RESET_METHOD_BACO:
 dev_info(adev->dev, "BACO reset\n");
-
-   ret = smu_baco_enter(smu);
-   if (ret)
-   return ret;
-   ret = smu_baco_exit(smu);
-   if (ret)
-   return ret;
+   ret = amdgpu_dpm_baco_reset(adev);
 break;
 case AMD_RESET_METHOD_MODE2:
 dev_info(adev->dev, "MODE2 reset\n");
@@ -981,7 +962,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs =
 .need_full_reset = _need_full_reset,
 .need_reset_on_init = _need_reset_on_init,
 .get_pcie_replay_count = _get_pcie_replay_count,
-   .supports_baco = _asic_supports_baco,
+   .supports_baco = _dpm_is_baco_supported,
 .pre_asic_init = _pre_asic_init,
 .update_umd_stable_pstate = _update_umd_stable_pstate,
 .query_video_codecs = _query_video_codecs,
diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h 
b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
index 517f333fbc4b..02675155028d 100644
--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
@@ -1285,15 +1285,6 @@ int smu_get_power_limit(struct smu_context *smu,
 uint32_t *limit,
 enum smu_ppt_limit_level limit_level);

-int smu_set_azalia_d3_pme(struct smu_context *smu);
-
-bool smu_baco_is_support(struct smu_context *smu);
-
-int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state);
-
-int smu_baco_enter(struct smu_context *smu);
-int smu_baco_exit(struct smu_context *smu);
-
 bool smu_mode1_reset_is_support(struct smu_context *smu);
 bool smu_mode2_reset_is_support(struct smu_context *smu);
 int smu_mode1_reset(struct smu_context *smu);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c 
b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 4120d28f782b..1bb0c0966e3d 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -2682,48 +2682,6 @@ static int smu_set_xgmi_pstate(void *handle,
 return ret;
 }

-int smu_set_azalia_d3_pme(struct smu_context *smu)
-{
-   int ret = 0;
-
-   if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
-   return -EOPNOTSUPP;
-
-   mutex_lock(>mutex);
-
-   if (smu->ppt_funcs->set_azalia_d3_pme)
-   ret = smu->ppt_funcs->set_azalia_d3_pme(smu);
-
-   mutex_unlock(>mutex);
-
-   return ret;
-}
-
-/*
- * On system suspending or resetting, the dpm_enabled
- * flag will be cleared. So that those SMU services which
- * are not supported 

Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

2021-03-22 Thread Steven Price

On 15/03/2021 05:23, Zhang, Jack (Jian) wrote:

[AMD Public Use]

Hi, Rob/Tomeu/Steven,

Would you please help to review this patch for panfrost driver?

Thanks,
Jack Zhang

-Original Message-
From: Jack Zhang 
Sent: Monday, March 15, 2021 1:21 PM
To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, Christian 
; Grodzovsky, Andrey ; Liu, Monk 
; Deng, Emily 
Cc: Zhang, Jack (Jian) 
Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak

re-insert Bailing jobs to avoid memory leak.

V2: move re-insert step to drm/scheduler logic
V3: add panfrost's return value for bailing jobs
in case it hits the memleak issue.


This commit message could do with some work - it's really hard to 
decipher what the actual problem you're solving is.




Signed-off-by: Jack Zhang 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 8 ++--
  drivers/gpu/drm/panfrost/panfrost_job.c| 4 ++--
  drivers/gpu/drm/scheduler/sched_main.c | 8 +++-
  include/drm/gpu_scheduler.h| 1 +
  5 files changed, 19 insertions(+), 6 deletions(-)


[...]

diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c 
b/drivers/gpu/drm/panfrost/panfrost_job.c
index 6003cfeb1322..e2cb4f32dae1 100644
--- a/drivers/gpu/drm/panfrost/panfrost_job.c
+++ b/drivers/gpu/drm/panfrost/panfrost_job.c
@@ -444,7 +444,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
 * spurious. Bail out.
 */
if (dma_fence_is_signaled(job->done_fence))
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   return DRM_GPU_SCHED_STAT_BAILING;
  
  	dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",

js,
@@ -456,7 +456,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct 
drm_sched_job
  
  	/* Scheduler is already stopped, nothing to do. */

if (!panfrost_scheduler_stop(>js->queue[js], sched_job))
-   return DRM_GPU_SCHED_STAT_NOMINAL;
+   return DRM_GPU_SCHED_STAT_BAILING;
  
  	/* Schedule a reset if there's no reset in progress. */

if (!atomic_xchg(>reset.pending, 1))


This looks correct to me - in these two cases drm_sched_stop() is not 
called on the sched_job, so it looks like currently the job will be leaked.



diff --git a/drivers/gpu/drm/scheduler/sched_main.c 
b/drivers/gpu/drm/scheduler/sched_main.c
index 92d8de24d0a1..a44f621fb5c4 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work)
  {
struct drm_gpu_scheduler *sched;
struct drm_sched_job *job;
+   int ret;
  
  	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
  
@@ -331,8 +332,13 @@ static void drm_sched_job_timedout(struct work_struct *work)

list_del_init(>list);
spin_unlock(>job_list_lock);
  
-		job->sched->ops->timedout_job(job);

+   ret = job->sched->ops->timedout_job(job);
  
+		if (ret == DRM_GPU_SCHED_STAT_BAILING) {

+   spin_lock(>job_list_lock);
+   list_add(>node, >ring_mirror_list);
+   spin_unlock(>job_list_lock);
+   }


I think we could really do with a comment somewhere explaining what 
"bailing" means in this context. For the Panfrost case we have two cases:


 * The GPU job actually finished while the timeout code was running 
(done_fence is signalled).


 * The GPU is already in the process of being reset (Panfrost has 
multiple queues, so mostly like a bad job in another queue).


I'm also not convinced that (for Panfrost) it makes sense to be adding 
the jobs back to the list. For the first case above clearly the job 
could just be freed (it's complete). The second case is more interesting 
and Panfrost currently doesn't handle this well. In theory the driver 
could try to rescue the job ('soft stop' in Mali language) so that it 
could be resubmitted. Panfrost doesn't currently support that, so 
attempting to resubmit the job is almost certainly going to fail.


It's on my TODO list to look at improving Panfrost in this regard, but 
sadly still quite far down.


Steve


/*
 * Guilty job did complete and hence needs to be manually 
removed
 * See drm_sched_stop doc.
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index 4ea8606d91fe..8093ac2427ef 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -210,6 +210,7 @@ enum drm_gpu_sched_stat {
DRM_GPU_SCHED_STAT_NONE, /* Reserve 0 */
DRM_GPU_SCHED_STAT_NOMINAL,
DRM_GPU_SCHED_STAT_ENODEV,
+   DRM_GPU_SCHED_STAT_BAILING,
  };
  
  /**




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset

2021-03-22 Thread Andrey Grodzovsky




On 2021-03-22 4:11 a.m., Lang Yu wrote:

In amdggpu reset, while dm.dc_lock is held by dm_suspend,
handle_hpd_rx_irq tries to acquire it. Deadlock occurred!

Deadlock log:

[  104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin!

[  104.640084] ==
[  104.640092] WARNING: possible circular locking dependency detected
[  104.640099] 5.11.0-custom #1 Tainted: GW   E
[  104.640107] --
[  104.640114] cat/1158 is trying to acquire lock:
[  104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: 
__flush_work+0x2e3/0x450
[  104.640144]
but task is already holding lock:
[  104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]
[  104.640581]
which lock already depends on the new lock.

[  104.640590]
the existing dependency chain (in reverse order) is:
[  104.640598]
-> #2 (>dm.dc_lock){+.+.}-{3:3}:
[  104.640611]lock_acquire+0xca/0x390
[  104.640623]__mutex_lock+0x9b/0x930
[  104.640633]mutex_lock_nested+0x1b/0x20
[  104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu]
[  104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641264]process_one_work+0x2a7/0x5b0
[  104.641275]worker_thread+0x4a/0x3d0
[  104.641283]kthread+0x125/0x160
[  104.641290]ret_from_fork+0x22/0x30
[  104.641300]
-> #1 (>hpd_lock){+.+.}-{3:3}:
[  104.641312]lock_acquire+0xca/0x390
[  104.641321]__mutex_lock+0x9b/0x930
[  104.641328]mutex_lock_nested+0x1b/0x20
[  104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu]
[  104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641931]process_one_work+0x2a7/0x5b0
[  104.641940]worker_thread+0x4a/0x3d0
[  104.641948]kthread+0x125/0x160
[  104.641954]ret_from_fork+0x22/0x30
[  104.641963]
-> #0 ((work_completion)(>work)){+.+.}-{0:0}:
[  104.641975]check_prev_add+0x94/0xbf0
[  104.641983]__lock_acquire+0x130d/0x1ce0
[  104.641992]lock_acquire+0xca/0x390
[  104.642000]__flush_work+0x303/0x450
[  104.642008]flush_work+0x10/0x20
[  104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu]
[  104.642312]dm_suspend+0x181/0x1d0 [amdgpu]
[  104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu]
[  104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu]
[  104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu]
[  104.643715]gpu_recover_get+0x2e/0x60 [amdgpu]
[  104.643951]simple_attr_read+0x6d/0x110
[  104.643960]debugfs_attr_read+0x49/0x70
[  104.643970]full_proxy_read+0x5f/0x90
[  104.643979]vfs_read+0xa3/0x190
[  104.643986]ksys_read+0x70/0xf0
[  104.643992]__x64_sys_read+0x1a/0x20
[  104.643999]do_syscall_64+0x38/0x90
[  104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  104.644017]
other info that might help us debug this:

[  104.644026] Chain exists of:
  (work_completion)(>work) --> >hpd_lock --> 
>dm.dc_lock

[  104.644043]  Possible unsafe locking scenario:

[  104.644049]CPU0CPU1
[  104.644055]
[  104.644060]   lock(>dm.dc_lock);
[  104.644066]lock(>hpd_lock);
[  104.644075]lock(>dm.dc_lock);
[  104.644083]   lock((work_completion)(>work));
[  104.644090]
 *** DEADLOCK ***

[  104.644096] 3 locks held by cat/1158:
[  104.644103]  #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: 
simple_attr_read+0x4e/0x110
[  104.644119]  #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: 
amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  104.644489]  #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]

Signed-off-by: Lang Yu 
---
  drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 --
  1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e176ea84d75b..8727488df769 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param)
}
}
  
-	mutex_lock(>dm.dc_lock);

+   if (!amdgpu_in_reset(adev))
+   mutex_lock(>dm.dc_lock);
  #ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
  #else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
  #endif
-   mutex_unlock(>dm.dc_lock);
+   if (!amdgpu_in_reset(adev))
+   

Re: [PATCH] drm/amd/display: Set AMDGPU_DM_DEFAULT_MIN_BACKLIGHT to 0

2021-03-22 Thread Alex Deucher
On Sun, Mar 21, 2021 at 8:12 PM Evan Benn  wrote:
>
> On Sat, Mar 20, 2021 at 8:36 AM Alex Deucher  wrote:
> >
> > On Fri, Mar 19, 2021 at 5:31 PM Evan Benn  wrote:
> > >
> > > On Sat, 20 Mar 2021 at 02:10, Harry Wentland  
> > > wrote:
> > > > On 2021-03-19 10:22 a.m., Alex Deucher wrote:
> > > > > On Fri, Mar 19, 2021 at 3:23 AM Evan Benn  
> > > > > wrote:
> > > > >>
> > > > >> AMDGPU_DM_DEFAULT_MIN_BACKLIGHT was set to the value of 12
> > > > >> to ensure no display backlight will flicker at low user brightness
> > > > >> settings. However this value is quite bright, so for devices that do 
> > > > >> not
> > > > >> implement the ACPI ATIF
> > > > >> ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS
> > > > >> functionality the user cannot set the brightness to a low level even 
> > > > >> if
> > > > >> the display would support such a low PWM.
> > > > >>
> > > > >> This ATIF feature is not implemented on for example AMD grunt 
> > > > >> chromebooks.
> > > > >>
> > > > >> Signed-off-by: Evan Benn 
> > > > >>
> > > > >> ---
> > > > >> I could not find a justification for the reason for the value. It has
> > > > >> caused some noticable regression for users: 
> > > > >> https://bugzilla.kernel.org/show_bug.cgi?id=203439>>>
> > > > >> Maybe this can be either user controlled or userspace configured, but
> > > > >> preventing users from turning their backlight dim seems wrong.
> > > > >
> > > > > My understanding is that some panels flicker if you set the min to a
> > > > > value too low.  This was a safe minimum if the platform didn't specify
> > > > > it's own safe minimum.  I think we'd just be trading one bug for
> > > > > another (flickering vs not dim enough).  Maybe a whitelist or
> > > > > blacklist would be a better solution?
> > > > >
> > > >
> > > > Yeah, this is a NACK from me as-is for the reasons Alex described.
> > >
> > > Thanks Harry + Alex,
> > >
> > > I agree this solution is not the best.
> > >
> > > >
> > > > I agree a whitelist approach might be best.
> > >
> > > Do you have any idea what an allowlist could be keyed on?
> > > Is the flickering you observed here a function of the panel or the gpu
> > > or some other component?
> > > Maybe we could move the minimum level into the logic for that hardware.
> > >
> >
> > Maybe the panel string from the EDID?  Either that or something from
> > dmi data?  Harry would probably have a better idea.
>
> One problem with keying from panel EDID is that for example the grunt 
> chromebook
> platform has more than 100 different panels already shipped. Add to that that
> repair centers or people repairing their own device will use 'compatible'
> panels. I'm sure the AMD windows laptops have even more variety!
>

Do all of those "compatible" panels work with the min backlight level
of 0?  If so, maybe something platform specific like a DMI string
would make more sense.

Alex


> >
> > Alex
> >
> > > >
> > > > Is this fix perhaps for OLED panels? If so we could use a different
> > > > min-value for OLED panels that don't do PWM, but use 12 for everything 
> > > > else.
> > >
> > > All the chromebooks I have worked with LCD + LED backlight have been
> > > fine with a backlight set to 0.
> > > We do have OLED panels too, but I'm not aware of what they do.
> > >
> > > > Harry
> > > >
> > > > > Alex
> > > > >
> > > > >
> > > > >>
> > > > >> Also reviewed here: 
> > > > >> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2748377>>>
> > > > >>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
> > > > >>   1 file changed, 1 insertion(+), 1 deletion(-)
> > > > >>
> > > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > > > >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >> index 573cf17262da..0129bd69b94e 100644
> > > > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > > >> @@ -3151,7 +3151,7 @@ static int amdgpu_dm_mode_config_init(struct 
> > > > >> amdgpu_device *adev)
> > > > >>  return 0;
> > > > >>   }
> > > > >>
> > > > >> -#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
> > > > >> +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 0
> > > > >>   #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
> > > > >>   #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
> > > > >>
> > > > >> --
> > > > >> 2.31.0.291.g576ba9dcdaf-goog
> > > > >>
> > > > >> ___
> > > > >> dri-devel mailing list
> > > > >> dri-de...@lists.freedesktop.org
> > > > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel>> 
> > > > >> ___
> > > > > dri-devel mailing list
> > > > > dri-de...@lists.freedesktop.org
> > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel>>
> > > >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Daniel Vetter
On Mon, Mar 22, 2021 at 02:05:48PM +, Matthew Wilcox wrote:
> On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote:
> > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:
> > > Am 20.03.21 um 14:17 schrieb Daniel Vetter:
> > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König
> > > >  wrote:
> > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter:
> > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:
> > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter:
> > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote:
> > > > > > > > > Don't print a warning when we fail to allocate a page for 
> > > > > > > > > swapping things out.
> > > > > > > > > 
> > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead 
> > > > > > > > > of GFP_NOFS.
> > > > > > > > Uh this part doesn't make sense. Especially since you only do 
> > > > > > > > it for the
> > > > > > > > debugfs file, not in general. Which means you've just 
> > > > > > > > completely broken
> > > > > > > > the shrinker.
> > > > > > > Are you sure? My impression is that GFP_NOFS should now work much 
> > > > > > > more out
> > > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore().
> > > > > > Yeah, if you'd put it in the right place :-)
> > > > > > 
> > > > > > But also -mm folks are very clear that memalloc_no*() family is for 
> > > > > > dire
> > > > > > situation where there's really no other way out. For anything where 
> > > > > > you
> > > > > > know what you're doing, you really should use explicit gfp flags.
> > > > > My impression is just the other way around. You should try to avoid 
> > > > > the
> > > > > NOFS/NOIO flags and use the memalloc_no* approach instead.
> > > > Where did you get that idea?
> > > 
> > > Well from the kernel comment on GFP_NOFS:
> > > 
> > >  * %GFP_NOFS will use direct reclaim but will not use any filesystem
> > > interfaces.
> > >  * Please try to avoid using this flag directly and instead use
> > >  * memalloc_nofs_{save,restore} to mark the whole scope which
> > > cannot/shouldn't
> > >  * recurse into the FS layer with a short explanation why. All allocation
> > >  * requests will inherit GFP_NOFS implicitly.
> > 
> > Huh that's interesting, since iirc Willy or Dave told me the opposite, and
> > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
> > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).
> > 
> > Adding them, maybe I got confused.
> 
> My impression is that the scoped API is preferred these days.
> 
> https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html
> 
> I'd probably need to spend a few months learning the DRM subsystem to
> have a more detailed opinion on whether passing GFP flags around explicitly
> or using the scope API is the better approach for your situation.

Atm it's a single allocation in the ttm shrinker that's already explicitly
using GFP_NOFS that we're talking about here.

The scoped api might make sense for gpu scheduler, where we really operate
under GFP_NOWAIT for somewhat awkward reasons. But also I thought at least
for GFP_NOIO you generally need a mempool and think about how you
guarantee forward progress anyway. Is that also a bit outdated thinking,
and nowadays we could operate under the assumption that this Just Works?
Given that GFP_NOFS seems to fall over already for us I'm not super sure
about that ...

> I usually defer to Michal on these kinds of questions.
> 
> > > > The kernel is full of explicit gfp_t flag
> > > > passing to make this as explicit as possible. The memalloc_no* stuff
> > > > is just for when you go through entire subsystems and really can't
> > > > wire it through. I can't find the discussion anymore, but that was the
> > > > advice I got from mm/fs people.
> > > > 
> > > > One reason is that generally a small GFP_KERNEL allocation never
> > > > fails. But it absolutely can fail if it's in a memalloc_no* section,
> > > > and these kind of non-obvious non-local effects are a real pain in
> > > > testing and review. Hence explicit gfp_flag passing as much as
> > > > possible.
> 
> I agree with this; it's definitely a problem with the scope API.  I wanted
> to extend it to include GFP_NOWAIT, but if you do that, your chances of
> memory allocation failure go way up, so you really want to set __GFP_NOWARN
> too, but now you need to audit all the places that you're calling to be
> sure they really handle errors correctly.
> 
> So I think I'm giving up on that patch set.

Yeah the auditing is what scares me, and why at least personally I prefer
explicit gfp flags. It's much easier to debug a lockdep splat involving
fs_reclaim than memory allocation failures leading to very strange bugs
because we're not handling the allocation failure properly (or maybe not
even at all).
-Daniel

> 
> > > > > > > > If this is just to paper over the seq_printf doing the wrong 
> > > > > > > > 

Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2

2021-03-22 Thread Daniel Vetter
On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote:
> Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33
> are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about
> a week ago. The remaining 11 patches are current work-in-progress with
> further cleanup and fixes.
> 
> MMU notifiers and CPU page faults now can split ranges and update our range
> data structures without taking heavy locks by doing some of the critical
> work in a deferred work handler. This includes updating MMU notifiers and
> the SVM range interval tree. In the mean time, new ranges can live as
> children of their parent ranges until the deferred work handler consolidates
> them in the main interval tree.

I'm totally swammped with intel stuff unfortunately, so not really time to
dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault
inversion is planned to be handled now? Or that still tbd?

Other thing I noticed is that amdkfd still uses the mmu_notifier directly,
and not the mmu_interval_notifier. But you're talking a lot about managing
intervals here, and so I'm wondering whether we shouldn't do this in core
code? Everyone will have the same painful locking problems here (well atm
everyone = you only I think), sharing this imo would make a ton of
sense.

I think the other one is moving over more generic pasid code, but I think
that's going to be less useful here and maybe more a long term project.

Cheers, Daniel

> 
> We also added proper DMA mapping of system memory pages.
> 
> Current work in progress is cleaning up all the locking, simplifying our
> code and data structures and resolving a few known bugs.
> 
> This series and the corresponding ROCm Thunk and KFDTest changes are also
> available on gitub:
>   https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
>   
> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip
> 
> An updated Thunk
> 
> Alex Sierra (10):
>   drm/amdgpu: replace per_device_list by array
>   drm/amdkfd: helper to convert gpu id and idx
>   drm/amdkfd: add xnack enabled flag to kfd_process
>   drm/amdkfd: add ioctl to configure and query xnack retries
>   drm/amdgpu: enable 48-bit IH timestamp counter
>   drm/amdkfd: SVM API call to restore page tables
>   drm/amdkfd: add svm_bo reference for eviction fence
>   drm/amdgpu: add param bit flag to create SVM BOs
>   drm/amdgpu: svm bo enable_signal call condition
>   drm/amdgpu: add svm_bo eviction to enable_signal cb
> 
> Felix Kuehling (22):
>   drm/amdkfd: map svm range to GPUs
>   drm/amdkfd: svm range eviction and restore
>   drm/amdkfd: validate vram svm range from TTM
>   drm/amdkfd: HMM migrate ram to vram
>   drm/amdkfd: HMM migrate vram to ram
>   drm/amdkfd: invalidate tables on page retry fault
>   drm/amdkfd: page table restore through svm API
>   drm/amdkfd: add svm_bo eviction mechanism support
>   drm/amdkfd: refine migration policy with xnack on
>   drm/amdkfd: add svm range validate timestamp
>   drm/amdkfd: multiple gpu migrate vram to vram
>   drm/amdkfd: Fix dma unmapping
>   drm/amdkfd: Call mutex_destroy
>   drm/amdkfd: Fix spurious restore failures
>   drm/amdkfd: Fix svm_bo_list locking in eviction worker
>   drm/amdkfd: Simplify split_by_granularity
>   drm/amdkfd: Point out several race conditions
>   drm/amdkfd: Return pdd from kfd_process_device_from_gduid
>   drm/amdkfd: Remove broken deferred mapping
>   drm/amdkfd: Allow invalid pages in migration.src
>   drm/amdkfd: Correct locking during migration and mapping
>   drm/amdkfd: Nested locking and invalidation of child ranges
> 
> Philip Yang (12):
>   drm/amdkfd: add svm ioctl API
>   drm/amdkfd: register svm range
>   drm/amdkfd: add svm ioctl GET_ATTR op
>   drm/amdgpu: add common HMM get pages function
>   drm/amdkfd: validate svm range system memory
>   drm/amdkfd: deregister svm range
>   drm/amdgpu: export vm update mapping interface
>   drm/amdkfd: register HMM device private zone
>   drm/amdkfd: support xgmi same hive mapping
>   drm/amdkfd: copy memory through gart table
>   drm/amdgpu: reserve fence slot to update page table
>   drm/amdkfd: Add SVM API support capability bits
> 
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  |   16 +-
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   13 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|   83 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   90 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   48 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   11 +
>  drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 +
>  drivers/gpu/drm/amd/amdkfd/Kconfig|1 +
>  drivers/gpu/drm/amd/amdkfd/Makefile   |4 +-
>  

RE: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()

2021-03-22 Thread Chen, Guchun
[AMD Public Use]

Thanks for your patch, Silva. The issue has been fixed by " a5c6007e20e1 
drm/amd/display: fix modprobe failure on vega series".

Regards,
Guchun

-Original Message-
From: amd-gfx  On Behalf Of Gustavo A. 
R. Silva
Sent: Monday, March 22, 2021 8:51 PM
To: Lee Jones ; Wentland, Harry ; 
Li, Sun peng (Leo) ; Deucher, Alexander 
; Koenig, Christian ; 
David Airlie ; Daniel Vetter 
Cc: Gustavo A. R. Silva ; 
dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; 
linux-ker...@vger.kernel.org
Subject: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()

The wrong sizeof values are currently being used as arguments to kzalloc().

Fix this by using the right arguments *dceip and *vbios, correspondingly.

Addresses-Coverity-ID: 1502901 ("Wrong sizeof argument")
Fixes: fca1e079055e ("drm/amd/display/dc/calcs/dce_calcs: Remove some large 
variables from the stack")
Signed-off-by: Gustavo A. R. Silva 
---
 drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 
b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 556ecfabc8d2..1244fcb0f446 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
 
enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id);
 
-   dceip = kzalloc(sizeof(dceip), GFP_KERNEL);
+   dceip = kzalloc(sizeof(*dceip), GFP_KERNEL);
if (!dceip)
return;
 
-   vbios = kzalloc(sizeof(vbios), GFP_KERNEL);
+   vbios = kzalloc(sizeof(*vbios), GFP_KERNEL);
if (!vbios) {
kfree(dceip);
return;
--
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cguchun.chen%40amd.com%7C4ec6ae20f70a488fd2dd08d8ed3987cd%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637520178643844637%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=YKVR3n%2FnX50dwuP91T1xPxW%2FvgisWDY0dvF8PxO4P4A%3Dreserved=0
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 2/2] drm/amdgpu: Introduce new SETUP_TMR interface

2021-03-22 Thread Zeng, Oak
[AMD Official Use Only - Internal Distribution Only]

Hello all,

Can someone help to review below patches? We verified with firmware team and 
want to check-in together with psp firmware

Regards,
Oak



On 2021-03-12, 4:24 PM, "Zeng, Oak"  wrote:

This new interface passes both virtual and physical address
to PSP. It is backword compatible with old interface.

Signed-off-by: Oak Zeng 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 13 ++---
 drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 11 ++-
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index cd3eda9..99e1a3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -328,8 +328,13 @@ psp_cmd_submit_buf(struct psp_context *psp,

 static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
  struct psp_gfx_cmd_resp *cmd,
- uint64_t tmr_mc, uint32_t size)
+ uint64_t tmr_mc, struct amdgpu_bo *tmr_bo)
 {
+struct amdgpu_device *adev = psp->adev;
+uint32_t size = amdgpu_bo_size(tmr_bo);
+uint64_t tmr_pa = amdgpu_bo_gpu_offset(tmr_bo) +
+adev->vm_manager.vram_base_offset - adev->gmc.vram_start;
+
 if (amdgpu_sriov_vf(psp->adev))
 cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
 else
@@ -337,6 +342,9 @@ static void psp_prep_tmr_cmd_buf(struct psp_context 
*psp,
 cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
 cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
 cmd->cmd.cmd_setup_tmr.buf_size = size;
+cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1;
+cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa);
+cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa);
 }

 static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd,
@@ -456,8 +464,7 @@ static int psp_tmr_load(struct psp_context *psp)
 if (!cmd)
 return -ENOMEM;

-psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr,
- amdgpu_bo_size(psp->tmr_bo));
+psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo);
 DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n",
  amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr);

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h 
b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index a41b054..604a1c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -170,10 +170,19 @@ struct psp_gfx_cmd_setup_tmr
 uint32_tbuf_phy_addr_lo;   /* bits [31:0] of GPU Virtual 
address of TMR buffer (must be 4 KB aligned) */
 uint32_tbuf_phy_addr_hi;   /* bits [63:32] of GPU Virtual 
address of TMR buffer */
 uint32_tbuf_size;  /* buffer size in bytes (must 
be multiple of 4 KB) */
+union {
+struct {
+uint32_tsriov_enabled:1; /* whether the device runs under SR-IOV*/
+uint32_tvirt_phy_addr:1; /* driver passes both virtual and physical 
address to PSP*/
+uint32_treserved:30;
+} bitfield;
+uint32_ttmr_flags;
+};
+uint32_tsystem_phy_addr_lo;/* bits [31:0] of system 
physical address of TMR buffer (must be 4 KB aligned) */
+uint32_tsystem_phy_addr_hi;/* bits [63:32] of system 
physical address of TMR buffer */

 };

-
 /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */
 enum psp_gfx_fw_type {
 GFX_FW_TYPE_NONE= 0,/* */
--
2.7.4


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] drm/amdgpu: Fix a typo

2021-03-22 Thread Alex Deucher
On Sat, Mar 20, 2021 at 3:52 AM Randy Dunlap
 wrote:
>
>
>
> On Fri, 19 Mar 2021, Bhaskar Chowdhury wrote:
>
> > s/traing/training/
> >
> > ...Plus the entire sentence construction for better readability.
> >
> > Signed-off-by: Bhaskar Chowdhury 
> > ---
> > Changes from V1:
> >  Alex and Randy's suggestions incorporated.
> >
> > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 8 
> > 1 file changed, 4 insertions(+), 4 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
> > b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> > index c325d6f53a71..bf3857867f51 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
> > @@ -661,10 +661,10 @@ static int psp_v11_0_memory_training(struct 
> > psp_context *psp, uint32_t ops)
> >
> >   if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
> >   /*
> > -  * Long traing will encroach certain mount of bottom VRAM,
> > -  * saving the content of this bottom VRAM to system memory
> > -  * before training, and restoring it after training to avoid
> > -  * VRAM corruption.
> > +  * Long training will encroach a certain amount on the bottom 
> > of VRAM;
> > + * save the content from the bottom VRAM to system memory
> > + * before training, and restore it after training to avoid
> > + * VRAM corruption.
>
> These 3 new lines are indented with spaces instead of tabs. Oops.  :(
>
> (I may be too late with this comment -- sorry about that.)

I fixed that up when I applied it.

Thanks,

Alex


>
> >*/
> >   sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;
> >
> > --
> > 2.26.2
> >
> >
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()

2021-03-22 Thread Gustavo A. R. Silva
The wrong sizeof values are currently being used as arguments to
kzalloc().

Fix this by using the right arguments *dceip and *vbios,
correspondingly.

Addresses-Coverity-ID: 1502901 ("Wrong sizeof argument")
Fixes: fca1e079055e ("drm/amd/display/dc/calcs/dce_calcs: Remove some large 
variables from the stack")
Signed-off-by: Gustavo A. R. Silva 
---
 drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 
b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 556ecfabc8d2..1244fcb0f446 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
 
enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id);
 
-   dceip = kzalloc(sizeof(dceip), GFP_KERNEL);
+   dceip = kzalloc(sizeof(*dceip), GFP_KERNEL);
if (!dceip)
return;
 
-   vbios = kzalloc(sizeof(vbios), GFP_KERNEL);
+   vbios = kzalloc(sizeof(*vbios), GFP_KERNEL);
if (!vbios) {
kfree(dceip);
return;
-- 
2.27.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/ttm: stop warning on TT shrinker failure

2021-03-22 Thread Daniel Vetter
On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote:
> Am 20.03.21 um 14:17 schrieb Daniel Vetter:
> > On Sat, Mar 20, 2021 at 10:04 AM Christian König
> >  wrote:
> > > Am 19.03.21 um 20:06 schrieb Daniel Vetter:
> > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote:
> > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter:
> > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote:
> > > > > > > Don't print a warning when we fail to allocate a page for 
> > > > > > > swapping things out.
> > > > > > > 
> > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead of 
> > > > > > > GFP_NOFS.
> > > > > > Uh this part doesn't make sense. Especially since you only do it 
> > > > > > for the
> > > > > > debugfs file, not in general. Which means you've just completely 
> > > > > > broken
> > > > > > the shrinker.
> > > > > Are you sure? My impression is that GFP_NOFS should now work much 
> > > > > more out
> > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore().
> > > > Yeah, if you'd put it in the right place :-)
> > > > 
> > > > But also -mm folks are very clear that memalloc_no*() family is for dire
> > > > situation where there's really no other way out. For anything where you
> > > > know what you're doing, you really should use explicit gfp flags.
> > > My impression is just the other way around. You should try to avoid the
> > > NOFS/NOIO flags and use the memalloc_no* approach instead.
> > Where did you get that idea?
> 
> Well from the kernel comment on GFP_NOFS:
> 
>  * %GFP_NOFS will use direct reclaim but will not use any filesystem
> interfaces.
>  * Please try to avoid using this flag directly and instead use
>  * memalloc_nofs_{save,restore} to mark the whole scope which
> cannot/shouldn't
>  * recurse into the FS layer with a short explanation why. All allocation
>  * requests will inherit GFP_NOFS implicitly.

Huh that's interesting, since iirc Willy or Dave told me the opposite, and
the memalloc_no* stuff is for e.g. nfs calling into network layer (needs
GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think).

Adding them, maybe I got confused.

> > The kernel is full of explicit gfp_t flag
> > passing to make this as explicit as possible. The memalloc_no* stuff
> > is just for when you go through entire subsystems and really can't
> > wire it through. I can't find the discussion anymore, but that was the
> > advice I got from mm/fs people.
> > 
> > One reason is that generally a small GFP_KERNEL allocation never
> > fails. But it absolutely can fail if it's in a memalloc_no* section,
> > and these kind of non-obvious non-local effects are a real pain in
> > testing and review. Hence explicit gfp_flag passing as much as
> > possible.
> > 
> > > > > > If this is just to paper over the seq_printf doing the wrong 
> > > > > > allocations,
> > > > > > then just move that out from under the fs_reclaim_acquire/release 
> > > > > > part.
> > > > > No, that wasn't the problem.
> > > > > 
> > > > > We have just seen to many failures to allocate pages for swapout and 
> > > > > I think
> > > > > that would improve this because in a lot of cases we can then 
> > > > > immediately
> > > > > swap things out instead of having to rely on upper layers.
> > > > Yeah, you broke it. Now the real shrinker is running with GFP_KERNEL,
> > > > because your memalloc_no is only around the debugfs function. And ofc 
> > > > it's
> > > > much easier to allocate with GFP_KERNEL, right until you deadlock :-)
> > > The problem here is that for example kswapd calls the shrinker without
> > > holding a FS lock as far as I can see.
> > > 
> > > And it is rather sad that we can't optimize this case directly.
> > I'm still not clear what you want to optimize? You can check for "is
> > this kswapd" in pf flags, but that sounds very hairy and fragile.
> 
> Well we only need the NOFS flag when the shrinker callback really comes from
> a memory shortage in the FS subsystem, and that is rather unlikely.
> 
> When we would allow all other cases to be able to directly IO the freed up
> pages to swap it would certainly help.

tbh I'm not sure. i915-gem code has played tricks with special casing the
kswapd path, and they do kinda scare me at least. I'm not sure whether
there's not some hidden dependencies there that would make this a bad
idea. Like afaik direct reclaim can sometimes stall for kswapd to catch up
a bit, or at least did in the past (I think, really not much clue about
this)

The other thing is that the fs_reclaim_acquire/release annotation really
only works well if you use it outside of the direct reclaim path too.
Otherwise it's not much better than just lots of testing. That pretty much
means you have to annotate the kswapd path.
-Daniel



> 
> Christian.
> 
> > -Daniel
> > 
> > > Anyway you are right if some caller doesn't use the memalloc_no*()
> > > approach we are busted.
> > > 
> > > Going to change the patch to only not 

Re: [PATCH] drm/ttm: stop warning on TT shrinker failure v2

2021-03-22 Thread Daniel Vetter
On Mon, Mar 22, 2021 at 12:22 PM Christian König
 wrote:
>
> Don't print a warning when we fail to allocate a page for swapping things out.
>
> v2: only stop the warning
>
> Signed-off-by: Christian König 

Reviewed-by: Daniel Vetter 

It is kinda surprising that page allocator warns here even though we
explicitly asked for non-GFP_KERNEL (which is the only one where you
pretty much can assume you will get memory no matter what, since worst
case the OOM killer makes space for you).

But then with memalloc_no* and friends these failures could happen in
unexpected places, and I think the code that warns isn't aware of the
original gfp flags, so makes some sense from an implementation pov.
-Daniel


> ---
>  drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
> index 2f0833c98d2c..48b9a650630b 100644
> --- a/drivers/gpu/drm/ttm/ttm_tt.c
> +++ b/drivers/gpu/drm/ttm/ttm_tt.c
> @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker 
> *shrink,
> };
> int ret;
>
> -   ret = ttm_bo_swapout(, GFP_NOFS);
> +   ret = ttm_bo_swapout(, GFP_NOFS | __GFP_NOWARN);
> return ret < 0 ? SHRINK_EMPTY : ret;
>  }
>
> --
> 2.25.1
>


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH 29/44] drm/amdgpu: reserve fence slot to update page table

2021-03-22 Thread Christian König



Am 22.03.21 um 11:58 schrieb Felix Kuehling:

From: Philip Yang 

Forgot to reserve a fence slot to use sdma to update page table, cause
below kernel BUG backtrace to handle vm retry fault while application is
exiting.

[  133.048143] kernel BUG at 
/home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281!
[  133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu]
[  133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280
[  133.048672]  amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu]
[  133.048788]  amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu]
[  133.048905]  amdgpu_vm_handle_fault+0x202/0x370 [amdgpu]
[  133.049031]  gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu]
[  133.049165]  ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu]
[  133.049289]  ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049408]  amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049534]  amdgpu_ih_process+0x9b/0x1c0 [amdgpu]
[  133.049657]  amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu]
[  133.049669]  process_one_work+0x29f/0x640
[  133.049678]  worker_thread+0x39/0x3f0
[  133.049685]  ? process_one_work+0x640/0x640

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 


Reviewed-by: Christian König 

I'm going to push this to amd-staging-drm-next since it is really an 
independent bug fix.


Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 --
  1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a61df234f012..3e32f76cd7bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
-   long r;
+   int r;
bool is_compute_context = false;
  
  	spin_lock(>vm_manager.pasid_lock);

@@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
value = 0;
}
  
+	r = dma_resv_reserve_shared(root->tbo.base.resv, 1);

+   if (r) {
+   pr_debug("failed %d to reserve fence slot\n", r);
+   goto error_unlock;
+   }
+
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
addr, flags, value, NULL, NULL,
NULL);
@@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
  error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
-   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+   DRM_ERROR("Can't handle page fault (%d)\n", r);
  
  error_unref:

amdgpu_bo_unref();


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH 00/14] DC Patches March 22, 2021

2021-03-22 Thread Wheeler, Daniel
[AMD Public Use]

Hi all,

This week this patchset was tested on a HP Envy 360, with Ryzen 5 4500U, on the 
following display types (via usb-c to dp/dvi/hdmi/vga):
4k 60z, 1440p 144hz, 1680*1050 60hz, internal eDP 1080p 60hz

Tested on a Sapphire Pulse RX5700XT on the following display types (via DP):
4k60 60hz, 1440p 144hz, 1680x1050 60hz.

Also tested on a Reference AMD RX6800 on the following display types (via DP):
4k60 60hz, 1440p 144hz. 

Also used a MST hub at 2x 4k 30hz on all systems.

Did not see a visual impact from the patchset tested.

Thank you,

Dan Wheeler
Technologist  |  AMD
SW Display
O +(1) 905-882-2600 ext. 74665
--
1 Commerce Valley Dr E, Thornhill, ON L3T 7X6
Facebook |  Twitter |  amd.com  


-Original Message-
From: amd-gfx  On Behalf Of Solomon Chiu
Sent: March 19, 2021 9:47 PM
To: amd-gfx@lists.freedesktop.org
Cc: Brol, Eryk ; Chiu, Solomon ; Li, 
Sun peng (Leo) ; Wentland, Harry ; 
Zhuo, Qingqing ; Siqueira, Rodrigo 
; Jacob, Anson ; Pillai, 
Aurabindo ; Lakha, Bhawanpreet 
; R, Bindu 
Subject: [PATCH 00/14] DC Patches March 22, 2021 

This DC patchset brings improvements in multiple areas. In summary, we 
highlight:
 
* Populate socclk entries for dcn2.1
* hide VGH asic specific structs
* Add kernel doc to crc_rd_wrk field
* revert max lb lines change
* Log DMCUB trace buffer events
* Fix debugfs link_settings entry
* revert max lb use by default for n10
* Deallocate IRQ handlers on amdgpu_dm_irq_fini
* Fixed Clock Recovery Sequence
* Fix UBSAN: shift-out-of-bounds warning
* [FW Promotion] Release 0.0.57
* Change input parameter for set_drr
* Use pwrseq instance to determine eDP instance


Alvin Lee (1):
  drm/amd/display: Change input parameter for set_drr

Anson Jacob (1):
  drm/amd/display: Fix UBSAN: shift-out-of-bounds warning

Anthony Koo (1):
  drm/amd/display: [FW Promotion] Release 0.0.57

Aric Cyr (1):
  drm/amd/display: 3.2.128

David Galiffi (1):
  drm/amd/display: Fixed Clock Recovery Sequence

Dmytro Laktyushkin (3):
  drm/amd/display: hide VGH asic specific structs
  drm/amd/display: revert max lb lines change
  drm/amd/display: revert max lb use by default for n10

Fangzhi Zuo (1):
  drm/amd/display: Fix debugfs link_settings entry

Jake Wang (1):
  drm/amd/display: Use pwrseq instance to determine eDP instance

Leo (Hanghong) Ma (1):
  drm/amd/display: Log DMCUB trace buffer events

Roman Li (1):
  drm/amd/display: Populate socclk entries for dcn2.1

Victor Lu (1):
  drm/amd/display: Deallocate IRQ handlers on amdgpu_dm_irq_fini

Wayne Lin (1):
  drm/amd/display: Add kernel doc to crc_rd_wrk field

 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c |  48   
.../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h |  14 +++  
.../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c |  15 +--  
.../amd/display/amdgpu_dm/amdgpu_dm_helpers.c |  12 +-  
.../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c |  71 +++
 .../amd/display/amdgpu_dm/amdgpu_dm_trace.h   |  21 
 .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c  | 116 +-  
.../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c |  13 ++
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c| 101 +++
 .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h|  28 ++---
 drivers/gpu/drm/amd/display/dc/core/dc.c  |  10 +-
 drivers/gpu/drm/amd/display/dc/core/dc_link.c |   8 +-
 .../gpu/drm/amd/display/dc/core/dc_link_dp.c  |  10 +-
 drivers/gpu/drm/amd/display/dc/dc.h   |   2 +-
 .../display/dc/dce110/dce110_hw_sequencer.c   |   9 +-
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.c |  14 +--
 .../amd/display/dc/dcn10/dcn10_hw_sequencer.h |   3 +-
 .../drm/amd/display/dc/dcn10/dcn10_resource.c |   4 +-
 .../drm/amd/display/dc/dcn20/dcn20_resource.c |   6 +-
 .../drm/amd/display/dc/dcn21/dcn21_resource.c |   3 +-
 .../drm/amd/display/dc/dcn30/dcn30_resource.c |   2 +-
 .../amd/display/dc/dcn301/dcn301_resource.c   |   2 +-
 .../amd/display/dc/dcn302/dcn302_resource.c   |   2 +-
 .../dc/dml/dcn20/display_rq_dlg_calc_20.c |  28 -
 .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c   |  28 -
 .../dc/dml/dcn21/display_rq_dlg_calc_21.c |  28 -
 .../dc/dml/dcn30/display_rq_dlg_calc_30.c |  28 -
 .../display/dc/dml/dml1_display_rq_dlg_calc.c |  28 -
 .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h   |   9 --
 .../gpu/drm/amd/display/dc/inc/hw_sequencer.h |   3 +-
 .../display/dc/irq/dcn21/irq_service_dcn21.c  |  32 -  
.../display/dc/irq/dcn30/irq_service_dcn30.c  |  32 -
 .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h   |   4 +-
 .../amd/display/modules/freesync/freesync.c   |  37 --
 .../amd/display/modules/inc/mod_freesync.h|   7 +-
 35 files changed, 581 insertions(+), 197 deletions(-)

--
2.29.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org

Re: [RESEND 00/53] Rid GPU from W=1 warnings

2021-03-22 Thread Lee Jones
On Fri, 19 Mar 2021, Daniel Vetter wrote:

> On Fri, Mar 19, 2021 at 08:24:07AM +, Lee Jones wrote:
> > On Thu, 18 Mar 2021, Daniel Vetter wrote:
> > 
> > > On Wed, Mar 17, 2021 at 9:32 PM Daniel Vetter  wrote:
> > > >
> > > > On Wed, Mar 17, 2021 at 9:17 AM Lee Jones  wrote:
> > > > >
> > > > > On Thu, 11 Mar 2021, Lee Jones wrote:
> > > > >
> > > > > > On Thu, 11 Mar 2021, Daniel Vetter wrote:
> > > > > >
> > > > > > > On Mon, Mar 08, 2021 at 09:19:32AM +, Lee Jones wrote:
> > > > > > > > On Fri, 05 Mar 2021, Roland Scheidegger wrote:
> > > > > > > >
> > > > > > > > > The vmwgfx ones look all good to me, so for
> > > > > > > > > 23-53: Reviewed-by: Roland Scheidegger 
> > > > > > > > > That said, they were already signed off by Zack, so not sure 
> > > > > > > > > what
> > > > > > > > > happened here.
> > > > > > > >
> > > > > > > > Yes, they were accepted at one point, then dropped without a 
> > > > > > > > reason.
> > > > > > > >
> > > > > > > > Since I rebased onto the latest -next, I had to pluck them back 
> > > > > > > > out of
> > > > > > > > a previous one.
> > > > > > >
> > > > > > > They should show up in linux-next again. We merge patches for 
> > > > > > > next merge
> > > > > > > window even during the current merge window, but need to make 
> > > > > > > sure they
> > > > > > > don't pollute linux-next. Occasionally the cut off is wrong so 
> > > > > > > patches
> > > > > > > show up, and then get pulled again.
> > > > > > >
> > > > > > > Unfortunately especially the 5.12 merge cycle was very wobbly due 
> > > > > > > to some
> > > > > > > confusion here. But your patches should all be in linux-next 
> > > > > > > again (they
> > > > > > > are queued up for 5.13 in drm-misc-next, I checked that).
> > > > > > >
> > > > > > > Sorry for the confusion here.
> > > > > >
> > > > > > Oh, I see.  Well so long as they don't get dropped, I'll be happy.
> > > > > >
> > > > > > Thanks for the explanation Daniel
> > > > >
> > > > > After rebasing today, all of my GPU patches have remained.  Would
> > > > > someone be kind enough to check that everything is still in order
> > > > > please?
> > > >
> > > > It's still broken somehow. I've kiced Maxime and Maarten again,
> > > > they're also on this thread.
> > > 
> > > You're patches have made it into drm-next meanwhile, so they should
> > > show up in linux-next through that tree at least. Except if that one
> > > also has some trouble.
> > 
> > Thanks for letting me know.
> > 
> > I see some patches made it back in, others didn't.
> > 
> > I'll resend the stragglers - bear with.
> 
> The vmwgfx ones should all be back, the others I guess just werent ever
> applied. I'll vacuum them all up if you resend. Apologies for the wobbly
> ride.

NP, it happens.

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: fix modprobe failure on vega series

2021-03-22 Thread Lee Jones
On Mon, 22 Mar 2021, Guchun Chen wrote:

> Fixes: d88b34caee83 ("Remove some large variables from the stack")
> 
> [   41.232097] Call Trace:
> [   41.232105]  kvasprintf+0x66/0xd0
> [   41.232122]  kasprintf+0x49/0x70
> [   41.232136]  __drm_crtc_init_with_planes+0x2e1/0x340 [drm]
> [   41.232219]  ? create_object+0x263/0x3b0
> [   41.232231]  drm_crtc_init_with_planes+0x46/0x60 [drm]
> [   41.232303]  amdgpu_dm_init+0x69c/0x1750 [amdgpu]
> [   41.232998]  ? phm_wait_for_register_unequal.part.1+0x58/0x90 [amdgpu]
> [   41.233662]  ? smu9_wait_for_response+0x7d/0xa0 [amdgpu]
> [   41.234294]  ? smu9_send_msg_to_smc_with_parameter+0x77/0xd0 [amdgpu]
> [   41.234912]  ? smum_send_msg_to_smc_with_parameter+0x96/0x100 [amdgpu]
> [   41.235520]  ? psm_set_states+0x5c/0x60 [amdgpu]
> [   41.236165]  dm_hw_init+0x12/0x20 [amdgpu]
> [   41.236834]  amdgpu_device_init+0x1402/0x1df0 [amdgpu]
> [   41.237314]  amdgpu_driver_load_kms+0x65/0x320 [amdgpu]
> [   41.237780]  amdgpu_pci_probe+0x150/0x250 [amdgpu]
> [   41.238240]  local_pci_probe+0x47/0xa0
> [   41.238253]  pci_device_probe+0x10b/0x1c0
> [   41.238265]  really_probe+0xf5/0x4c0
> [   41.238275]  driver_probe_device+0xe8/0x150
> [   41.238284]  device_driver_attach+0x58/0x60
> [   41.238293]  __driver_attach+0xa3/0x140
> [   41.238301]  ? device_driver_attach+0x60/0x60
> [   41.238309]  ? device_driver_attach+0x60/0x60
> [   41.238317]  bus_for_each_dev+0x74/0xb0
> [   41.238330]  ? kmem_cache_alloc_trace+0x31a/0x470
> [   41.238341]  driver_attach+0x1e/0x20
> [   41.238348]  bus_add_driver+0x14a/0x220
> [   41.238357]  ? 0xc0f09000
> [   41.238364]  driver_register+0x60/0x100
> [   41.238373]  ? 0xc0f09000
> [   41.238379]  __pci_register_driver+0x54/0x60
> [   41.238389]  amdgpu_init+0x68/0x1000 [amdgpu]
> [   41.238836]  do_one_initcall+0x48/0x1e0
> [   41.238850]  ? kmem_cache_alloc_trace+0x31a/0x470
> [   41.238862]  do_init_module+0x5f/0x224
> [   41.238876]  load_module+0x266b/0x2ec0
> [   41.238887]  ? security_kernel_post_read_file+0x5c/0x70
> [   41.238905]  __do_sys_finit_module+0xc1/0x120
> [   41.238913]  ? __do_sys_finit_module+0xc1/0x120
> [   41.238924]  __x64_sys_finit_module+0x1a/0x20
> [   41.238932]  do_syscall_64+0x37/0x80
> [   41.238942]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> 
> Signed-off-by: Guchun Chen 
> ---
>  drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 
> b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
> index 556ecfabc8d2..1244fcb0f446 100644
> --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
> +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
> @@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
>  
>   enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id);
>  
> - dceip = kzalloc(sizeof(dceip), GFP_KERNEL);
> + dceip = kzalloc(sizeof(*dceip), GFP_KERNEL);
>   if (!dceip)
>   return;
>  
> - vbios = kzalloc(sizeof(vbios), GFP_KERNEL);
> + vbios = kzalloc(sizeof(*vbios), GFP_KERNEL);
>   if (!vbios) {
>   kfree(dceip);
>   return;

Oh yes, of course.

Looks like it was missed in review too.

Sorry about the school boy error!

Reviewed-by: Lee Jones 

-- 
Lee Jones [李琼斯]
Senior Technical Lead - Developer Services
Linaro.org │ Open source software for Arm SoCs
Follow Linaro: Facebook | Twitter | Blog
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drivers: gpu: Remove duplicate include of amdgpu_hdp.h

2021-03-22 Thread Wan Jiabing
amdgpu_hdp.h has been included at line 91, so remove
the duplicate include.

Signed-off-by: Wan Jiabing 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 49267eb64302..68836c22ef25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,7 +107,6 @@
 #include "amdgpu_gfxhub.h"
 #include "amdgpu_df.h"
 #include "amdgpu_smuio.h"
-#include "amdgpu_hdp.h"
 
 #define MAX_GPU_INSTANCE   16
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/pm/powerplay/smumgr/smu7_smumgr: Fix some typo error

2021-03-22 Thread samirweng1979
From: wengjianfeng 

change 'addres' to 'address'

Signed-off-by: wengjianfeng 
---
 drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c 
b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
index aae2524..0de9e0e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c
@@ -97,7 +97,7 @@ int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t 
smc_start_address,
addr = smc_start_address;
 
while (byte_count >= 4) {
-   /* Bytes are written into the SMC addres space with the MSB first. */
+   /* Bytes are written into the SMC address space with the MSB first. */
data = src[0] * 0x100 + src[1] * 0x1 + src[2] * 0x100 + 
src[3];
 
result = smu7_set_smc_sram_address(hwmgr, addr, limit);
@@ -127,7 +127,7 @@ int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t 
smc_start_address,
extra_shift = 8 * (4 - byte_count);
 
while (byte_count > 0) {
-   /* Bytes are written into the SMC addres space with the 
MSB first. */
+   /* Bytes are written into the SMC address space with 
the MSB first. */
data = (0x100 * data) + *src++;
byte_count--;
}
-- 
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


RE: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""

2021-03-22 Thread Chen, Guchun
[AMD Public Use]

Hi Christian,

I will conduct one stress test for this tomorrow. Would you mind waiting for my 
ack before submitting?

Regards,
Guchun

-Original Message-
From: Christian König  
Sent: Monday, March 22, 2021 8:41 PM
To: amd-gfx@lists.freedesktop.org
Cc: Chen, Guchun ; Das, Nirmoy 
Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""

Now that we found the underlying problem we can re-apply this patch.

This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +-
 1 file changed, 18 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9268db1172bd..bc3951b71079 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
 
 /**
  * DOC: GPUVM
@@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
- * @nodes: array of drm_mm_nodes with the MC addresses
+ * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
@@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
   bool unlocked, struct dma_resv *resv,
   uint64_t start, uint64_t last,
   uint64_t flags, uint64_t offset,
-  struct drm_mm_node *nodes,
+  struct ttm_resource *res,
   dma_addr_t *pages_addr,
   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
+   struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
-   uint64_t pfn;
int r;
 
memset(, 0, sizeof(params));
@@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
 
-   pfn = offset >> PAGE_SHIFT;
-   if (nodes) {
-   while (pfn >= nodes->size) {
-   pfn -= nodes->size;
-   ++nodes;
-   }
-   }
-
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
@@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
-   do {
+   amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
+);
+   while (cursor.remaining) {
uint64_t tmp, num_entries, addr;
 
-
-   num_entries = last - start + 1;
-   if (nodes) {
-   addr = nodes->start << PAGE_SHIFT;
-   num_entries = min((nodes->size - pfn) *
-   AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
-   } else {
-   addr = 0;
-   }
-
+   num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
if (pages_addr) {
bool contiguous = true;
 
if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+   uint64_t pfn = cursor.start >> PAGE_SHIFT;
uint64_t count;
 
contiguous = pages_addr[pfn + 1] == @@ -1684,16 
+1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
}
 
if (!contiguous) {
-   addr = pfn << PAGE_SHIFT;
+   addr = cursor.start;
params.pages_addr = pages_addr;
} else {
-   addr = pages_addr[pfn];
+   addr = pages_addr[cursor.start >> PAGE_SHIFT];
params.pages_addr = NULL;
}
 
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-   addr += bo_adev->vm_manager.vram_base_offset;
-   addr += pfn << PAGE_SHIFT;
+   addr = bo_adev->vm_manager.vram_base_offset +
+   cursor.start;
+   } else {
+   addr = 0;
}
 
tmp = start + num_entries;
@@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
-  

[PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""

2021-03-22 Thread Christian König
Now that we found the underlying problem we can re-apply this patch.

This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c.

Signed-off-by: Christian König 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +-
 1 file changed, 18 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9268db1172bd..bc3951b71079 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "amdgpu_res_cursor.h"
 
 /**
  * DOC: GPUVM
@@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
  * @last: last mapped entry
  * @flags: flags for the entries
  * @offset: offset into nodes and pages_addr
- * @nodes: array of drm_mm_nodes with the MC addresses
+ * @res: ttm_resource to map
  * @pages_addr: DMA addresses to use for mapping
  * @fence: optional resulting fence
  *
@@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
   bool unlocked, struct dma_resv *resv,
   uint64_t start, uint64_t last,
   uint64_t flags, uint64_t offset,
-  struct drm_mm_node *nodes,
+  struct ttm_resource *res,
   dma_addr_t *pages_addr,
   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
+   struct amdgpu_res_cursor cursor;
enum amdgpu_sync_mode sync_mode;
-   uint64_t pfn;
int r;
 
memset(, 0, sizeof(params));
@@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
else
sync_mode = AMDGPU_SYNC_EXPLICIT;
 
-   pfn = offset >> PAGE_SHIFT;
-   if (nodes) {
-   while (pfn >= nodes->size) {
-   pfn -= nodes->size;
-   ++nodes;
-   }
-   }
-
amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
@@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
-   do {
+   amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE,
+);
+   while (cursor.remaining) {
uint64_t tmp, num_entries, addr;
 
-
-   num_entries = last - start + 1;
-   if (nodes) {
-   addr = nodes->start << PAGE_SHIFT;
-   num_entries = min((nodes->size - pfn) *
-   AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
-   } else {
-   addr = 0;
-   }
-
+   num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT;
if (pages_addr) {
bool contiguous = true;
 
if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
+   uint64_t pfn = cursor.start >> PAGE_SHIFT;
uint64_t count;
 
contiguous = pages_addr[pfn + 1] ==
@@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
}
 
if (!contiguous) {
-   addr = pfn << PAGE_SHIFT;
+   addr = cursor.start;
params.pages_addr = pages_addr;
} else {
-   addr = pages_addr[pfn];
+   addr = pages_addr[cursor.start >> PAGE_SHIFT];
params.pages_addr = NULL;
}
 
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
-   addr += bo_adev->vm_manager.vram_base_offset;
-   addr += pfn << PAGE_SHIFT;
+   addr = bo_adev->vm_manager.vram_base_offset +
+   cursor.start;
+   } else {
+   addr = 0;
}
 
tmp = start + num_entries;
@@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (r)
goto error_unlock;
 
-   pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
-   if (nodes && nodes->size == pfn) {
-   pfn = 0;
-   ++nodes;
-   }
+   amdgpu_res_next(, num_entries * AMDGPU_GPU_PAGE_SIZE);
start = tmp;
-
-   } while (unlikely(start != last + 1));
+   };
 
r = 

Re: [PATCH] drivers: gpu: Remove duplicate include of amdgpu_hdp.h

2021-03-22 Thread Christian König



Am 22.03.21 um 13:02 schrieb Wan Jiabing:

amdgpu_hdp.h has been included at line 91, so remove
the duplicate include.

Signed-off-by: Wan Jiabing 


Acked-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 -
  1 file changed, 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 49267eb64302..68836c22ef25 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -107,7 +107,6 @@
  #include "amdgpu_gfxhub.h"
  #include "amdgpu_df.h"
  #include "amdgpu_smuio.h"
-#include "amdgpu_hdp.h"
  
  #define MAX_GPU_INSTANCE		16
  


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] amdgpu: avoid incorrect %hu format string

2021-03-22 Thread Christian König

Am 22.03.21 um 12:54 schrieb Arnd Bergmann:

From: Arnd Bergmann 

clang points out that the %hu format string does not match the type
of the variables here:

drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type 
'unsigned short' but the argument has type 'unsigned int' [-Wformat]
   version_major, version_minor);
   ^
include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR'
 __drm_err(fmt, ##__VA_ARGS__)
   ~~~^~~

Change it to a regular %u, the same way a previous patch did for
another instance of the same warning.

Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier")
Signed-off-by: Arnd Bergmann 


Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e2ed4689118a..c6dbc0801604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if ((adev->asic_type == CHIP_POLARIS10 ||
 adev->asic_type == CHIP_POLARIS11) &&
(adev->uvd.fw_version < FW_1_66_16))
-   DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too 
old.\n",
+   DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too 
old.\n",
  version_major, version_minor);
} else {
unsigned int enc_major, enc_minor, dec_minor;


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] amdgpu: avoid incorrect %hu format string

2021-03-22 Thread Arnd Bergmann
From: Arnd Bergmann 

clang points out that the %hu format string does not match the type
of the variables here:

drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type 
'unsigned short' but the argument has type 'unsigned int' [-Wformat]
  version_major, version_minor);
  ^
include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR'
__drm_err(fmt, ##__VA_ARGS__)
  ~~~^~~

Change it to a regular %u, the same way a previous patch did for
another instance of the same warning.

Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier")
Signed-off-by: Arnd Bergmann 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index e2ed4689118a..c6dbc0801604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
if ((adev->asic_type == CHIP_POLARIS10 ||
 adev->asic_type == CHIP_POLARIS11) &&
(adev->uvd.fw_version < FW_1_66_16))
-   DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is 
too old.\n",
+   DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is 
too old.\n",
  version_major, version_minor);
} else {
unsigned int enc_major, enc_minor, dec_minor;
-- 
2.29.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/ttm: stop warning on TT shrinker failure v2

2021-03-22 Thread Christian König
Don't print a warning when we fail to allocate a page for swapping things out.

v2: only stop the warning

Signed-off-by: Christian König 
---
 drivers/gpu/drm/ttm/ttm_tt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 2f0833c98d2c..48b9a650630b 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker 
*shrink,
};
int ret;
 
-   ret = ttm_bo_swapout(, GFP_NOFS);
+   ret = ttm_bo_swapout(, GFP_NOFS | __GFP_NOWARN);
return ret < 0 ? SHRINK_EMPTY : ret;
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 38/44] drm/amdkfd: Simplify split_by_granularity

2021-03-22 Thread Felix Kuehling
svm_range_split_by_granularity always added the parent range and only
the parent range to the update list for the caller to add it to the
deferred work list. So just do that in the caller unconditionally and
eliminate the update_list parameter.

Split the range so that the original prange is always the one that
will be migrated. That way we can eliminate the pmigrate parameter
and simplify the code further.

Update the outdated documentation.

Change-Id: Ifdc8d29b2abda67478e0d41daf5b46b861802ae7
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 29 --
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 73 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  4 +-
 3 files changed, 30 insertions(+), 76 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index da2ff655812e..5c8b32873086 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -780,12 +780,10 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t 
best_loc,
 static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
 {
unsigned long addr = vmf->address;
-   struct list_head update_list;
-   struct svm_range *pmigrate;
struct vm_area_struct *vma;
+   enum svm_work_list_ops op;
struct svm_range *parent;
struct svm_range *prange;
-   struct svm_range *next;
struct kfd_process *p;
struct mm_struct *mm;
int r = 0;
@@ -816,31 +814,24 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
goto out_unlock_prange;
 
svm_range_lock(parent);
-   r = svm_range_split_by_granularity(p, mm, addr, parent, prange,
-  , _list);
+   r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
svm_range_unlock(parent);
if (r) {
pr_debug("failed %d to split range by granularity\n", r);
goto out_unlock_prange;
}
 
-   r = svm_migrate_vram_to_ram(pmigrate, mm);
+   r = svm_migrate_vram_to_ram(prange, mm);
if (r)
pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r,
-pmigrate, pmigrate->start, pmigrate->last);
-
-   list_for_each_entry_safe(prange, next, _list, update_list) {
-   enum svm_work_list_ops op;
-
-   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
-   if (p->xnack_enabled && prange == pmigrate)
-   op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
-   else
-   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+prange, prange->start, prange->last);
 
-   svm_range_add_list_work(>svms, prange, mm, op);
-   list_del_init(>update_list);
-   }
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && parent == prange)
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+   else
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+   svm_range_add_list_work(>svms, parent, mm, op);
schedule_deferred_list_work(>svms);
 
 out_unlock_prange:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3a7030d9f331..fbcb1491e987 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1005,16 +1005,14 @@ void svm_range_add_child(struct svm_range *prange, 
struct mm_struct *mm,
  *
  * @p: the process with svms list
  * @mm: mm structure
+ * @addr: the vm fault address in pages, to split the prange
  * @parent: parent range if prange is from child list
  * @prange: prange to split
- * @addr: the vm fault address in pages, to split the prange
- * @pmigrate: output, the range to be migrated to ram
- * @update_list: output, the ranges to update notifier
  *
- * Collects small ranges that make up one migration granule and splits the 
first
- * and the last range at the granularity boundary
+ * Trims @prange to be a single aligned block of prange->granularity if
+ * possible. The head and tail are added to the child_list in @parent.
  *
- * Context: caller hold svms lock
+ * Context: caller must hold mmap_read_lock and prange->lock
  *
  * Return:
  * 0 - OK, otherwise error code
@@ -1022,75 +1020,42 @@ void svm_range_add_child(struct svm_range *prange, 
struct mm_struct *mm,
 int
 svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm,
   unsigned long addr, struct svm_range *parent,
-  struct svm_range *prange,
-  struct svm_range **pmigrate,
-  struct list_head *update_list)
+  struct svm_range *prange)
 {
-   struct svm_range *tail;
-   struct svm_range *new;
-   unsigned long start;
-   unsigned long 

[PATCH 40/44] drm/amdkfd: Return pdd from kfd_process_device_from_gduid

2021-03-22 Thread Felix Kuehling
This saves callers from looking up the pdd with a linear search later.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  8 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 -
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++-
 3 files changed, 29 insertions(+), 40 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2ccfdb218198..ca44547c46a0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -865,11 +865,15 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct 
mm_struct *mm);
 int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
uint32_t gpu_idx, uint32_t *gpuid);
 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
-int kfd_process_device_from_gpuidx(struct kfd_process *p,
-   uint32_t gpu_idx, struct kfd_dev **gpu);
 int kfd_process_gpuid_from_kgd(struct kfd_process *p,
   struct amdgpu_device *adev, uint32_t *gpuid,
   uint32_t *gpuidx);
+
+static inline struct kfd_process_device *kfd_process_device_from_gpuidx(
+   struct kfd_process *p, uint32_t gpuidx) {
+   return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL;
+}
+
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 48ea6f393353..cc988bf6057d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1674,16 +1674,6 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, 
uint32_t gpu_id)
return -EINVAL;
 }
 
-int kfd_process_device_from_gpuidx(struct kfd_process *p,
-   uint32_t gpu_idx, struct kfd_dev **gpu)
-{
-   if (gpu_idx < p->n_pdds) {
-   *gpu = p->pdds[gpu_idx]->dev;
-   return 0;
-   }
-   return -EINVAL;
-}
-
 int
 kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
   uint32_t *gpuid, uint32_t *gpuidx)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c48fe2f276b9..081d6bb75b09 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -175,12 +175,11 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t 
*dma_addr,
 
 void svm_range_free_dma_mappings(struct svm_range *prange)
 {
-   struct kfd_dev *kfd_dev;
+   struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
struct device *dev;
struct kfd_process *p;
uint32_t gpuidx;
-   int r;
 
p = container_of(prange->svms, struct kfd_process, svms);
 
@@ -189,12 +188,12 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
if (!dma_addr)
continue;
 
-   r = kfd_process_device_from_gpuidx(p, gpuidx, _dev);
-   if (r) {
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
pr_debug("failed to find device idx %d\n", gpuidx);
-   return;
+   continue;
}
-   dev = _dev->pdev->dev;
+   dev = >dev->pdev->dev;
svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
kvfree(dma_addr);
prange->dma_addr[gpuidx] = NULL;
@@ -549,10 +548,9 @@ void svm_range_vram_node_free(struct svm_range *prange)
 struct amdgpu_device *
 svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id)
 {
+   struct kfd_process_device *pdd;
struct kfd_process *p;
-   struct kfd_dev *dev;
int32_t gpu_idx;
-   int r;
 
p = container_of(prange->svms, struct kfd_process, svms);
 
@@ -561,13 +559,13 @@ svm_range_get_adev_by_id(struct svm_range *prange, 
uint32_t gpu_id)
pr_debug("failed to get device by id 0x%x\n", gpu_id);
return NULL;
}
-   r = kfd_process_device_from_gpuidx(p, gpu_idx, );
-   if (r < 0) {
+   pdd = kfd_process_device_from_gpuidx(p, gpu_idx);
+   if (!pdd) {
pr_debug("failed to get device by idx 0x%x\n", gpu_idx);
return NULL;
}
 
-   return (struct amdgpu_device *)dev->kgd;
+   return (struct amdgpu_device *)pdd->dev->kgd;
 }
 
 static int svm_range_validate_vram(struct svm_range *prange)
@@ -1120,7 +1118,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
struct dma_fence *fence = NULL;
struct amdgpu_device *adev;
struct kfd_process *p;
-   struct kfd_dev *dev;
uint32_t 

[PATCH 42/44] drm/amdkfd: Allow invalid pages in migration.src

2021-03-22 Thread Felix Kuehling
This can happen when syste memory page were never allocated. Skip them
during the migration. 0-initialize the BO.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 50 ++--
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 6748c5db64f5..87561b907543 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -310,7 +310,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
src = scratch;
dst = (uint64_t *)(scratch + npages);
 
-   r = svm_range_vram_node_new(adev, prange, false);
+   r = svm_range_vram_node_new(adev, prange, true);
if (r) {
pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
goto out;
@@ -328,17 +328,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
for (i = j = 0; i < npages; i++) {
struct page *spage;
 
-   spage = migrate_pfn_to_page(migrate->src[i]);
-   src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE);
-   r = dma_mapping_error(dev, src[i]);
-   if (r) {
-   pr_debug("failed %d dma_map_page\n", r);
-   goto out_free_vram_pages;
-   }
-
-   pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
-src[i] >> PAGE_SHIFT, page_to_pfn(spage));
-
dst[i] = vram_addr + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
svm_migrate_get_vram_page(prange, migrate->dst[i]);
@@ -346,6 +335,43 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
 
+   if (migrate->src[i] & MIGRATE_PFN_VALID) {
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+   r = dma_mapping_error(dev, src[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   goto out_free_vram_pages;
+   }
+   } else {
+   if (j) {
+   j--;
+   r = svm_migrate_copy_memory_gart(
+   adev, src + i - j,
+   dst + i - j, j + 1,
+   FROM_RAM_TO_VRAM,
+   mfence);
+   if (r)
+   goto out_free_vram_pages;
+   offset = j;
+   vram_addr = (node->start + offset) << 
PAGE_SHIFT;
+   j = 0;
+   }
+   offset++;
+   vram_addr += PAGE_SIZE;
+   if (offset >= node->size) {
+   node++;
+   pr_debug("next node size 0x%llx\n", node->size);
+   vram_addr = node->start << PAGE_SHIFT;
+   offset = 0;
+   }
+   continue;
+   }
+
+   pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
+src[i] >> PAGE_SHIFT, page_to_pfn(spage));
+
if (j + offset >= node->size - 1 && i < npages - 1) {
r = svm_migrate_copy_memory_gart(adev, src + i - j,
 dst + i - j, j + 1,
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 41/44] drm/amdkfd: Remove broken deferred mapping

2021-03-22 Thread Felix Kuehling
Mapping without validation is broken. Also removed saving the pages from
the last migration. They may be invalidated without an MMU notifier to
catch it, so let the next proper validation take care of it.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 14 --
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 ++---
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 101d1f71db84..6748c5db64f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -539,18 +539,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
src = (uint64_t *)(scratch + npages);
dst = scratch;
 
-   /* FIXME: Is it legal to hold on to this page array? We don't have
-* proper references to the pages and we may not have an MMU notifier
-* set up for the range at this point that could invalidate it (if
-* it's a child range).
-*/
-   prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr),
-   GFP_KERNEL | __GFP_ZERO);
-   if (!prange->pages_addr) {
-   r = -ENOMEM;
-   goto out_oom;
-   }
-
for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
struct page *spage;
 
@@ -590,8 +578,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n",
  dst[i] >> PAGE_SHIFT, page_to_pfn(dpage));
 
-   prange->pages_addr[i] = page_to_pfn(dpage);
-
migrate->dst[i] = migrate_pfn(page_to_pfn(dpage));
migrate->dst[i] |= MIGRATE_PFN_LOCKED;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 081d6bb75b09..aedb2c84131e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1695,7 +1695,6 @@ static void
 svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
 {
struct mm_struct *mm = prange->work_item.mm;
-   int r;
 
switch (prange->work_item.op) {
case SVM_OP_NULL:
@@ -1718,11 +1717,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
 svms, prange, prange->start, prange->last);
svm_range_update_notifier_and_interval_tree(mm, prange);
-   /* FIXME: need to validate somewhere */
-   r = svm_range_map_to_gpus(prange, true);
-   if (r)
-   pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n",
-r, svms, prange->start, prange->last);
+   /* TODO: implement deferred validation and mapping */
break;
case SVM_OP_ADD_RANGE:
pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange,
@@ -1735,11 +1730,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
 prange, prange->start, prange->last);
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
-   /* FIXME: need to validate somewhere */
-   r = svm_range_map_to_gpus(prange, true);
-   if (r)
-   pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n",
-r, svms, prange->start, prange->last);
+   /* TODO: implement deferred validation and mapping */
break;
default:
WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange,
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 43/44] drm/amdkfd: Correct locking during migration and mapping

2021-03-22 Thread Felix Kuehling
This fixes potential race conditions between any code that validates and
maps SVM ranges and MMU notifiers. The whole sequence is encapsulated in
svm_range_validate_and_map. The page_addr and hmm_range structures are
not useful outside that function, so they were removed from
struct svm_range.

Validation of system memory pages before migration to VRAM is maintained
as an explicit workaround. It should not be needed, but without it there
are still some page locking deadlocks to be investigated.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |   6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 573 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   9 +-
 3 files changed, 310 insertions(+), 278 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 87561b907543..4d79d69d8aac 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -466,9 +466,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
migrate_vma_finalize();
}
 
-   kvfree(prange->pages_addr);
-   prange->pages_addr = NULL;
-
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
svm_range_free_dma_mappings(prange);
 
@@ -513,6 +510,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t 
best_loc,
pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms,
 prange->start, prange->last, best_loc);
 
+   /* FIXME: workaround for page locking bug with invalid pages */
+   svm_range_prefault(prange, mm);
+
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index aedb2c84131e..0a6e28f0dcaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -39,6 +39,16 @@
  */
 #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
 
+struct svm_validate_context {
+   struct kfd_process *process;
+   struct svm_range *prange;
+   bool intr;
+   unsigned long bitmap[MAX_GPU_INSTANCE];
+   struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1];
+   struct list_head validate_list;
+   struct ww_acquire_ctx ticket;
+};
+
 static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -118,17 +128,14 @@ static void svm_range_remove_notifier(struct svm_range 
*prange)
 }
 
 static int
-svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr,
- unsigned long *pages_addr, uint64_t npages)
+svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr,
+ unsigned long *hmm_pfns, uint64_t npages)
 {
enum dma_data_direction dir = DMA_BIDIRECTIONAL;
dma_addr_t *addr = *dma_addr;
struct page *page;
int i, r;
 
-   if (!pages_addr)
-   return 0;
-
if (!addr) {
addr = kvmalloc_array(npages, sizeof(*addr),
  GFP_KERNEL | __GFP_ZERO);
@@ -142,7 +149,7 @@ svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr,
  "leaking dma mapping\n"))
dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
 
-   page = hmm_pfn_to_page(pages_addr[i]);
+   page = hmm_pfn_to_page(hmm_pfns[i]);
addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
r = dma_mapping_error(dev, addr[i]);
if (r) {
@@ -155,6 +162,37 @@ svm_range_dma_map(struct device *dev, dma_addr_t 
**dma_addr,
return 0;
 }
 
+static int
+svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap,
+ unsigned long *hmm_pfns)
+{
+   struct kfd_process *p;
+   uint32_t gpuidx;
+   int r;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+
+   for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+   struct kfd_process_device *pdd;
+   struct amdgpu_device *adev;
+
+   pr_debug("mapping to gpu idx 0x%x\n", gpuidx);
+   pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+   if (!pdd) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   return -EINVAL;
+   }
+   adev = (struct amdgpu_device *)pdd->dev->kgd;
+
+   r = svm_range_dma_map_dev(adev->dev, >dma_addr[gpuidx],
+ hmm_pfns, prange->npages);
+   if (r)
+   break;
+   }
+
+   return r;
+}
+
 void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
 unsigned long offset, unsigned long npages)
 {
@@ -207,7 +245,6 @@ static void svm_range_free(struct 

[PATCH 34/44] drm/amdkfd: Fix dma unmapping

2021-03-22 Thread Felix Kuehling
Don't dma_unmap in unmap_from_gpu. The dma_addr arrays are protected
by the migrate_mutex, which we cannot hold when unmapping in MMU
notifiers.

Instead dma_unmap and free dma_addr arrays whenever the pages_array
is invalidated: when migrating to VRAM and when re-validating RAM.

Freeing dma_addr arrays in svm_migrate_vma_to_vram fixes a bug where
the stale system memory pages were mapped instead of VRAM after a
migration.

When freeing dma_addr arrays, ignore the access bitmasks. Those masks
may have changed since the dma_addr arrays were allocated and mapped.

Change-Id: I01a6121c4c9908c1da4f303e87dcafd509fabc86
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  1 +
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index cf71b98303c2..da2ff655812e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -444,6 +444,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
prange->pages_addr = NULL;
 
svm_range_dma_unmap(adev->dev, scratch, 0, npages);
+   svm_range_free_dma_mappings(prange);
 
 out_free:
kvfree(buf);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 6c46d43bf613..3eea8f87724d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -173,9 +173,8 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t 
*dma_addr,
}
 }
 
-static void svm_range_free_dma_mappings(struct svm_range *prange)
+void svm_range_free_dma_mappings(struct svm_range *prange)
 {
-   DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_dev *kfd_dev;
dma_addr_t *dma_addr;
struct device *dev;
@@ -184,13 +183,8 @@ static void svm_range_free_dma_mappings(struct svm_range 
*prange)
int r;
 
p = container_of(prange->svms, struct kfd_process, svms);
-   if (p->xnack_enabled)
-   bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
-   else
-   bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
- MAX_GPU_INSTANCE);
 
-   for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+   for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) {
dma_addr = prange->dma_addr[gpuidx];
if (!dma_addr)
continue;
@@ -292,6 +286,7 @@ svm_range_validate_ram(struct mm_struct *mm, struct 
svm_range *prange)
}
 
kvfree(prange->pages_addr);
+   svm_range_free_dma_mappings(prange);
prange->pages_addr = prange->hmm_range->hmm_pfns;
prange->hmm_range->hmm_pfns = NULL;
 
@@ -1192,11 +1187,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, 
unsigned long start,
if (r)
break;
}
-
-   svm_range_dma_unmap(adev->dev, prange->dma_addr[gpuidx],
-   start - prange->start,
-   last - start + 1);
-
amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev,
  p->pasid);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index fea9c63b5f95..b2ab920ab884 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -175,5 +175,6 @@ void svm_range_add_list_work(struct svm_range_list *svms,
 void schedule_deferred_list_work(struct svm_range_list *svms);
 void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
 unsigned long offset, unsigned long npages);
+void svm_range_free_dma_mappings(struct svm_range *prange);
 
 #endif /* KFD_SVM_H_ */
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 44/44] drm/amdkfd: Nested locking and invalidation of child ranges

2021-03-22 Thread Felix Kuehling
This allows validation of child ranges, so the GPU page fault handler
can be more light-weight.

Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  8 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 40 +---
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 4d79d69d8aac..cc8bf6438383 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -826,12 +826,18 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
}
 
mutex_lock(>migrate_mutex);
+   if (prange != parent)
+   mutex_lock_nested(>migrate_mutex, 1);
 
if (!prange->actual_loc)
goto out_unlock_prange;
 
svm_range_lock(parent);
+   if (prange != parent)
+   mutex_lock_nested(>lock, 1);
r = svm_range_split_by_granularity(p, mm, addr, parent, prange);
+   if (prange != parent)
+   mutex_unlock(>lock);
svm_range_unlock(parent);
if (r) {
pr_debug("failed %d to split range by granularity\n", r);
@@ -852,6 +858,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
schedule_deferred_list_work(>svms);
 
 out_unlock_prange:
+   if (prange != parent)
+   mutex_unlock(>migrate_mutex);
mutex_unlock(>migrate_mutex);
 out:
mutex_unlock(>svms.lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0a6e28f0dcaf..bc1a9e9ba722 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1281,10 +1281,6 @@ void svm_range_unreserve_bos(struct svm_validate_context 
*ctx)
  * serialize concurrent migrations or validations of the same range, the
  * prange->migrate_mutex must be held.
  *
- * The range must be in the inverval tree and have an MMU notifier to catch
- * concurrent invalidations of the virtual address range. This means it cannot
- * be a child range.
- *
  * For VRAM ranges, the SVM BO must be allocated and valid (protected by its
  * eviction fence.
  *
@@ -1568,10 +1564,24 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
schedule_delayed_work(>restore_work,
msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
} else {
+   struct svm_range *pchild;
+   unsigned long s, l;
+
pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
 prange->svms, start, last);
svm_range_lock(prange);
-   svm_range_unmap_from_gpus(prange, start, last);
+   list_for_each_entry(pchild, >child_list, child_list) {
+   mutex_lock_nested(>lock, 1);
+   s = max(start, pchild->start);
+   l = min(last, pchild->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(pchild, s, l);
+   mutex_unlock(>lock);
+   }
+   s = max(start, prange->start);
+   l = min(last, prange->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(prange, s, l);
svm_range_unlock(prange);
}
 
@@ -1927,6 +1937,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct 
svm_range *prange,
struct svm_range_list *svms;
struct svm_range *pchild;
struct kfd_process *p;
+   unsigned long s, l;
bool unmap_parent;
 
p = kfd_lookup_process_by_mm(mm);
@@ -1937,14 +1948,23 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct 
svm_range *prange,
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms,
 prange, prange->start, prange->last, start, last);
 
-   svm_range_unmap_from_gpus(prange, start, last);
-
svm_range_lock(prange);
 
unmap_parent = start <= prange->start && last >= prange->last;
 
-   list_for_each_entry(pchild, >child_list, child_list)
+   list_for_each_entry(pchild, >child_list, child_list) {
+   mutex_lock_nested(>lock, 1);
+   s = max(start, pchild->start);
+   l = min(last, pchild->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(pchild, s, l);
svm_range_unmap_split(mm, prange, pchild, start, last);
+   mutex_unlock(>lock);
+   }
+   s = max(start, prange->start);
+   l = min(last, prange->last);
+   if (l >= s)
+   svm_range_unmap_from_gpus(prange, s, l);
svm_range_unmap_split(mm, prange, prange, start, last);
 
svm_range_unlock(prange);
@@ -2142,12 +2162,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
goto out;
}
 
-   

[PATCH 32/44] drm/amdkfd: multiple gpu migrate vram to vram

2021-03-22 Thread Felix Kuehling
If prefetch range to gpu with acutal location is another gpu, or GPU
retry fault restore pages to migrate the range with acutal location is
gpu, then migrate from one gpu to another gpu.

Use system memory as bridge because sdma engine may not able to access
another gpu vram, use sdma of source gpu to migrate to system memory,
then use sdma of destination gpu to migrate from system memory to gpu.

Print out gpuid or gpuidx in debug messages.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |  4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 +---
 3 files changed, 96 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index cd89b38e3d9b..cf71b98303c2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -462,8 +462,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct 
svm_range *prange,
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
-   struct mm_struct *mm)
+static int
+svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
@@ -723,6 +724,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, 
struct mm_struct *mm)
return r;
 }
 
+/**
+ * svm_migrate_vram_to_vram - migrate svm range from device to device
+ * @prange: range structure
+ * @best_loc: the device to migrate to
+ * @mm: process mm, use current->mm if NULL
+ *
+ * Context: Process context, caller hold mmap read lock, svms lock, prange lock
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc,
+struct mm_struct *mm)
+{
+   int r;
+
+   /*
+* TODO: for both devices with PCIe large bar or on same xgmi hive, skip
+* system memory as migration bridge
+*/
+
+   pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc);
+
+   r = svm_migrate_vram_to_ram(prange, mm);
+   if (r)
+   return r;
+
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+}
+
+int
+svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
+{
+   if  (!prange->actual_loc)
+   return svm_migrate_ram_to_vram(prange, best_loc, mm);
+   else
+   return svm_migrate_vram_to_vram(prange, best_loc, mm);
+
+}
+
 /**
  * svm_migrate_to_ram - CPU page fault handler
  * @vmf: CPU vm fault vma, address
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 9949b55d3b6a..bc680619d135 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,8 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
-   struct mm_struct *mm);
+int svm_migrate_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index f4b4fea06ac9..6c46d43bf613 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -363,8 +363,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
-static bool svm_range_validate_svm_bo(struct svm_range *prange)
+static bool
+svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange)
 {
+   struct amdgpu_device *bo_adev;
+
mutex_lock(>lock);
if (!prange->svm_bo) {
mutex_unlock(>lock);
@@ -376,6 +379,22 @@ static bool svm_range_validate_svm_bo(struct svm_range 
*prange)
return true;
}
if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   /*
+* Migrate from GPU to GPU, remove range from source bo_adev
+* svm_bo range list, and return false to allocate svm_bo from
+* destination adev.
+*/
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   if (bo_adev != adev) {
+   mutex_unlock(>lock);
+
+   spin_lock(>svm_bo->list_lock);
+   list_del_init(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+
+   

[PATCH 37/44] drm/amdkfd: Fix svm_bo_list locking in eviction worker

2021-03-22 Thread Felix Kuehling
Take the svm_bo_list spin lock when iterating of the range list during
eviction.

Change-Id: I979d959e06c32e114cea8d151933b8ee7455627e
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 19 +--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 49aca4664411..3a7030d9f331 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2494,7 +2494,6 @@ int svm_range_schedule_evict_svm_bo(struct 
amdgpu_amdkfd_fence *fence)
 static void svm_range_evict_svm_bo_worker(struct work_struct *work)
 {
struct svm_range_bo *svm_bo;
-   struct svm_range *prange;
struct kfd_process *p;
struct mm_struct *mm;
 
@@ -2511,13 +2510,29 @@ static void svm_range_evict_svm_bo_worker(struct 
work_struct *work)
return;
 
mmap_read_lock(mm);
-   list_for_each_entry(prange, _bo->range_list, svm_bo_list) {
+   spin_lock(_bo->list_lock);
+   while (!list_empty(_bo->range_list)) {
+   struct svm_range *prange =
+   list_first_entry(_bo->range_list,
+   struct svm_range, svm_bo_list);
+   list_del_init(>svm_bo_list);
+   spin_unlock(_bo->list_lock);
+
pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
 prange->start, prange->last);
+
mutex_lock(>migrate_mutex);
svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm);
+
+   mutex_lock(>lock);
+   prange->svm_bo = NULL;
+   mutex_unlock(>lock);
+
mutex_unlock(>migrate_mutex);
+
+   spin_lock(_bo->list_lock);
}
+   spin_unlock(_bo->list_lock);
mmap_read_unlock(mm);
 
dma_fence_signal(_bo->eviction_fence->base);
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 33/44] drm/amdkfd: Add SVM API support capability bits

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

SVMAPISupported property added to HSA_CAPABILITY, the value match
HSA_CAPABILITY defined in Thunk spec:

SVMAPISupported: it will not be supported on older kernels that don't
have HMM or on systems with GFXv8 or older GPUs without support for
48-bit virtual addresses.

CoherentHostAccess property added to HSA_MEMORYPROPERTY, the value match
HSA_MEMORYPROPERTY defined in Thunk spec:

CoherentHostAccess: whether or not device memory can be coherently
accessed by the host CPU.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |  6 ++
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 ++
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index cdef608db4f4..ab9fe854b4d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1419,6 +1419,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
dev->node_props.capability |= (adev->ras_features != 0) ?
HSA_CAP_RASEVENTNOTIFY : 0;
 
+   /* SVM API and HMM page migration work together, device memory type
+* is initalized to not 0 when page migration register device memory.
+*/
+   if (adev->kfd.dev->pgmap.type != 0)
+   dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED;
+
kfd_debug_print_topology();
 
if (!res)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index b8b68087bd7a..6bd6380b0ee0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -53,8 +53,9 @@
 #define HSA_CAP_ASIC_REVISION_MASK 0x03c0
 #define HSA_CAP_ASIC_REVISION_SHIFT22
 #define HSA_CAP_SRAM_EDCSUPPORTED  0x0400
+#define HSA_CAP_SVMAPI_SUPPORTED   0x0800
 
-#define HSA_CAP_RESERVED   0xf80f8000
+#define HSA_CAP_RESERVED   0xf00f8000
 
 struct kfd_node_properties {
uint64_t hive_id;
@@ -98,9 +99,10 @@ struct kfd_node_properties {
 #define HSA_MEM_HEAP_TYPE_GPU_LDS  4
 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH  5
 
-#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
-#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
-#define HSA_MEM_FLAGS_RESERVED 0xfffc
+#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001
+#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002
+#define HSA_MEM_FLAGS_COHERENTHOSTACCESS   0x0004
+#define HSA_MEM_FLAGS_RESERVED 0xfff8
 
 struct kfd_mem_properties {
struct list_headlist;
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 35/44] drm/amdkfd: Call mutex_destroy

2021-03-22 Thread Felix Kuehling
Destroy SVM-related mutexes correctly.

Change-Id: I85da30b1b0dce72433e6d3b507cb0b55b83b433c
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3eea8f87724d..0fbc037b06e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -209,6 +209,8 @@ static void svm_range_free(struct svm_range *prange)
svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
kvfree(prange->pages_addr);
+   mutex_destroy(>lock);
+   mutex_destroy(>migrate_mutex);
kfree(prange);
 }
 
@@ -2220,6 +,8 @@ void svm_range_list_fini(struct kfd_process *p)
list_for_each_entry_safe(prange, next, >svms.list, list)
svm_range_free(prange);
 
+   mutex_destroy(>svms.lock);
+
pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, >svms);
 }
 
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 36/44] drm/amdkfd: Fix spurious restore failures

2021-03-22 Thread Felix Kuehling
Restore can appear to fail if the svms->evicted counter changes before
the function can acquire the necessary locks. Re-read the counter after
acquiring the lock to minimize the chances of having to reschedule the
worker.

Change-Id: I236b912bddf106583be264abde2f6bd1a5d5a083
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 0fbc037b06e3..49aca4664411 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1429,6 +1429,8 @@ static void svm_range_restore_work(struct work_struct 
*work)
svm_range_list_lock_and_flush_work(svms, mm);
mutex_lock(>lock);
 
+   evicted_ranges = atomic_read(>evicted_ranges);
+
list_for_each_entry(prange, >list, list) {
invalid = atomic_read(>invalid);
if (!invalid)
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 31/44] drm/amdkfd: add svm range validate timestamp

2021-03-22 Thread Felix Kuehling
With xnack on, add validate timestamp in order to handle GPU vm fault
from multiple GPUs.

If GPU retry fault need migrate the range to the best restore location,
use range validate timestamp to record system timestamp after range is
restored to update GPU page table.

Because multiple pages of same range have multiple retry fault, define
AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to the long time period that
pending retry fault may still comes after page table update, to skip
duplicate retry fault of same range.

If difference between system timestamp and range last validate timestamp
is bigger than AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING, that means the
retry fault is from another GPU, then continue to handle retry fault
recover.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 ++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 98c049dc3a63..f4b4fea06ac9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,11 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+/* Long enough to ensure no retry fault comes after svm range is restored and
+ * page table is updated.
+ */
+#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING   2000
+
 static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
@@ -246,6 +251,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
+   prange->validate_timestamp = ktime_to_us(ktime_get());
mutex_init(>migrate_mutex);
mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
@@ -578,19 +584,25 @@ static int svm_range_validate_vram(struct svm_range 
*prange)
 static int
 svm_range_validate(struct mm_struct *mm, struct svm_range *prange)
 {
+   struct kfd_process *p;
int r;
 
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] actual loc 0x%x\n",
 prange->svms, prange, prange->start, prange->last,
 prange->actual_loc);
 
+   p = container_of(prange->svms, struct kfd_process, svms);
+
if (!prange->actual_loc)
r = svm_range_validate_ram(mm, prange);
else
r = svm_range_validate_vram(prange);
 
-   pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d invalid %d\n", prange->svms,
-prange->start, prange->last, r, atomic_read(>invalid));
+   if (!r)
+   prange->validate_timestamp = ktime_to_us(ktime_get());
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d\n", prange->svms,
+prange->start, prange->last, r);
 
return r;
 }
@@ -2086,6 +2098,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
struct svm_range_list *svms;
struct svm_range *prange;
struct kfd_process *p;
+   uint64_t timestamp;
int32_t best_loc;
int r = 0;
 
@@ -2119,6 +2132,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, 
unsigned int pasid,
}
 
mutex_lock(>migrate_mutex);
+   timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp;
+   /* skip duplicate vm fault on different pages of same range */
+   if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) {
+   pr_debug("svms 0x%p [0x%lx %lx] already restored\n",
+svms, prange->start, prange->last);
+   goto out_unlock_range;
+   }
 
best_loc = svm_range_best_restore_location(prange, adev);
if (best_loc == -1) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index fed28e487878..fea9c63b5f95 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -87,6 +87,7 @@ struct svm_work_list_item {
  * @actual_loc: the actual location, 0 for CPU, or GPU id
  * @granularity:migration granularity, log2 num pages
  * @invalid:not 0 means cpu page table is invalidated
+ * @validate_timestamp: system timestamp when range is validated
  * @notifier:   register mmu interval notifier
  * @work_item:  deferred work item information
  * @deferred_list: list header used to add range to deferred list
@@ -125,6 +126,7 @@ struct svm_range {
uint32_tactual_loc;
uint8_t granularity;
atomic_tinvalid;
+   uint64_tvalidate_timestamp;
struct mmu_interval_notifiernotifier;
struct svm_work_list_item   work_item;
struct list_headdeferred_list;
-- 
2.31.0

___
amd-gfx 

[PATCH 39/44] drm/amdkfd: Point out several race conditions

2021-03-22 Thread Felix Kuehling
There are several race conditions with XNACK enabled. For now just some
FIXME comments with ideas how to fix it.

Change-Id: If0abab6dcb8f4e95c9d8820f6c569263eda29a89
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  5 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 21 -
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 5c8b32873086..101d1f71db84 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -539,6 +539,11 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct 
svm_range *prange,
src = (uint64_t *)(scratch + npages);
dst = scratch;
 
+   /* FIXME: Is it legal to hold on to this page array? We don't have
+* proper references to the pages and we may not have an MMU notifier
+* set up for the range at this point that could invalidate it (if
+* it's a child range).
+*/
prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr),
GFP_KERNEL | __GFP_ZERO);
if (!prange->pages_addr) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index fbcb1491e987..c48fe2f276b9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1727,7 +1727,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n",
 svms, prange, prange->start, prange->last);
svm_range_update_notifier_and_interval_tree(mm, prange);
-
+   /* FIXME: need to validate somewhere */
r = svm_range_map_to_gpus(prange, true);
if (r)
pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n",
@@ -1744,6 +1744,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
 prange, prange->start, prange->last);
svm_range_add_to_svms(prange);
svm_range_add_notifier_locked(mm, prange);
+   /* FIXME: need to validate somewhere */
r = svm_range_map_to_gpus(prange, true);
if (r)
pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n",
@@ -2068,6 +2069,14 @@ svm_range_best_restore_location(struct svm_range *prange,
return -1;
 }
 
+/* FIXME: This function can race with MMU notifiers. MMU notifiers can
+ * invalidate the page addresses concurrently, so we may end up mapping
+ * invalid addresses here. We cannot hold the prange->lock (held in MMU
+ * notifier) while updating page tables because of lock dependencies,
+ * as SDMA page table updates need reservation locks. Only unmapping
+ * works without reservations. May need to hold the mmap_write_lock to
+ * prevent concurrent MMU notifiers.
+ */
 int
 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
uint64_t addr)
@@ -2592,6 +2601,16 @@ svm_range_set_attr(struct kfd_process *p, uint64_t 
start, uint64_t size,
continue;
}
 
+   /* FIXME: With xnack on, this can race with MMU notifiers.
+* They may invalidate page addresses before we map them.
+* Then we end up mapping invalid addresses in the GPU page
+* table. May need to find a way to still hold the mmap write
+* for map_to_gpus but drop it for validate to allow
+* concurrent evictions. This will lead to some retry logic
+* and the need to protect the update list differently.
+* Maybe factor migration and validation into a common helper
+* function shared with the GPU page fault handler.
+*/
r = svm_range_validate(mm, prange);
if (r) {
pr_debug("failed %d to validate svm range\n", r);
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 30/44] drm/amdkfd: refine migration policy with xnack on

2021-03-22 Thread Felix Kuehling
With xnack on, GPU vm fault handler decide the best restore location,
then migrate range to the best restore location and update GPU mapping
to recover the GPU vm fault.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |   7 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |  16 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 150 ---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   1 +
 6 files changed, 157 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 8ce3ff56a0ce..cd89b38e3d9b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -455,18 +455,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, 
struct svm_range *prange,
  * svm_migrate_ram_to_vram - migrate svm range from system to device
  * @prange: range structure
  * @best_loc: the device to migrate to
+ * @mm: the process mm structure
  *
  * Context: Process context, caller hold mmap read lock, svms lock, prange lock
  *
  * Return:
  * 0 - OK, otherwise error code
  */
-int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc)
+int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
+   struct mm_struct *mm)
 {
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
-   struct mm_struct *mm;
int r = 0;
 
if (prange->actual_loc == best_loc) {
@@ -487,8 +488,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
 
-   mm = current->mm;
-
for (addr = start; addr < end;) {
unsigned long next;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
index 95fd7b21791f..9949b55d3b6a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
@@ -37,7 +37,8 @@ enum MIGRATION_COPY_DIR {
FROM_VRAM_TO_RAM
 };
 
-int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc);
+int svm_migrate_ram_to_vram(struct svm_range *prange,  uint32_t best_loc,
+   struct mm_struct *mm);
 int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm);
 unsigned long
 svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 755c0517867d..2ccfdb218198 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -867,6 +867,9 @@ int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
 int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
 int kfd_process_device_from_gpuidx(struct kfd_process *p,
uint32_t gpu_idx, struct kfd_dev **gpu);
+int kfd_process_gpuid_from_kgd(struct kfd_process *p,
+  struct amdgpu_device *adev, uint32_t *gpuid,
+  uint32_t *gpuidx);
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index c8479f6bd68c..48ea6f393353 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1684,6 +1684,22 @@ int kfd_process_device_from_gpuidx(struct kfd_process *p,
return -EINVAL;
 }
 
+int
+kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev,
+  uint32_t *gpuid, uint32_t *gpuidx)
+{
+   struct kgd_dev *kgd = (struct kgd_dev *)adev;
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) {
+   *gpuid = p->pdds[i]->dev->id;
+   *gpuidx = i;
+   return 0;
+   }
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 3a7b842b362c..98c049dc3a63 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -179,8 +179,11 @@ static void svm_range_free_dma_mappings(struct svm_range 
*prange)
int r;
 
p = container_of(prange->svms, struct kfd_process, svms);
-   bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
- MAX_GPU_INSTANCE);
+   if (p->xnack_enabled)
+   

[PATCH 12/44] drm/amdkfd: add xnack enabled flag to kfd_process

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

This flag is useful at cpu invalidation page table
decision. Between select queue eviction or page fault.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  4 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 
 2 files changed, 40 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7d70af26b5c7..d66430740e52 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -824,6 +824,8 @@ struct kfd_process {
/* shared virtual memory registered by this process */
struct svm_range_list svms;
bool svm_disabled;
+
+   bool xnack_enabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
@@ -877,6 +879,8 @@ struct kfd_process_device 
*kfd_get_process_device_data(struct kfd_dev *dev,
 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
struct kfd_process *p);
 
+bool kfd_process_xnack_supported(struct kfd_process *p);
+
 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process,
  struct vm_area_struct *vma);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 321895d7555a..c8479f6bd68c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1193,6 +1193,39 @@ void kfd_process_set_trap_handler(struct 
qcm_process_device *qpd,
}
 }
 
+bool kfd_process_xnack_supported(struct kfd_process *p)
+{
+   int i;
+
+   /* On most GFXv9 GPUs, the retry mode in the SQ must match the
+* boot time retry setting. Mixing processes with different
+* XNACK/retry settings can hang the GPU.
+*
+* Different GPUs can have different noretry settings depending
+* on HW bugs or limitations. We need to find at least one
+* XNACK mode for this process that's compatible with all GPUs.
+* Fortunately GPUs with retry enabled (noretry=0) can run code
+* built for XNACK-off. On GFXv9 it may perform slower.
+*
+* Therefore applications built for XNACK-off can always be
+* supported and will be our fallback if any GPU does not
+* support retry.
+*/
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_dev *dev = p->pdds[i]->dev;
+
+   /* Only consider GFXv9 and higher GPUs. Older GPUs don't
+* support the SVM APIs and don't need to be considered
+* for the XNACK mode selection.
+*/
+   if (dev->device_info->asic_family >= CHIP_VEGA10 &&
+   dev->noretry)
+   return false;
+   }
+
+   return true;
+}
+
 /*
  * On return the kfd_process is fully operational and will be freed when the
  * mm is released
@@ -1232,6 +1265,9 @@ static struct kfd_process *create_process(const struct 
task_struct *thread)
if (err != 0)
goto err_init_apertures;
 
+   /* Check XNACK support after PDDs are created in kfd_init_apertures */
+   process->xnack_enabled = kfd_process_xnack_supported(process);
+
err = svm_range_list_init(process);
if (err)
goto err_init_svm_range_list;
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 24/44] drm/amdkfd: add svm_bo reference for eviction fence

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

[why]
As part of the SVM functionality, the eviction mechanism used for
SVM_BOs is different. This mechanism uses one eviction fence per prange,
instead of one fence per kfd_process.

[how]
A svm_bo reference to amdgpu_amdkfd_fence to allow differentiate between
SVM_BO or regular BO evictions. This also include modifications to set the
reference at the fence creation call.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h   | 4 +++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 5 +++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 --
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 14f68c028126..beb2ef070a0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
+   struct svm_range_bo *svm_bo;
 };
 
 struct amdgpu_kfd_dev {
@@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct 
amdgpu_device *adev,
int queue_bit);
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-   struct 
mm_struct *mm);
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo);
 #if IS_ENABLED(CONFIG_HSA_AMD)
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 5af464933976..53559643c712 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -60,7 +60,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
  */
 
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
-  struct mm_struct *mm)
+   struct mm_struct *mm,
+   struct svm_range_bo *svm_bo)
 {
struct amdgpu_amdkfd_fence *fence;
 
@@ -73,7 +74,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 
context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(>lock);
-
+   fence->svm_bo = svm_bo;
dma_fence_init(>base, _fence_ops, >lock,
   context, atomic_inc_return(_seq));
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e93850f2f3b1..9af644f256e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -970,7 +970,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void 
**process_info,
 
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
-  current->mm);
+  current->mm,
+  NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -2188,7 +2189,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, 
struct dma_fence **ef)
 */
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
-   process_info->eviction_fence->mm);
+   process_info->eviction_fence->mm,
+   NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 13/44] drm/amdkfd: add ioctl to configure and query xnack retries

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

Xnack retries are used for page fault recovery. Some AMD chip
families support continuously retry while page table entries are invalid.
The driver must handle the page fault interrupt and fill in a valid entry
for the GPU to continue.

This ioctl allows to enable/disable XNACK retries per KFD process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 +++
 include/uapi/linux/kfd_ioctl.h   | 43 +++-
 2 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 9511826ac8ae..63eee7ef3355 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1743,6 +1743,31 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_set_xnack_mode(struct file *filep,
+   struct kfd_process *p, void *data)
+{
+   struct kfd_ioctl_set_xnack_mode_args *args = data;
+   int r = 0;
+
+   mutex_lock(>mutex);
+   if (args->xnack_enabled >= 0) {
+   if (!list_empty(>pqm.queues)) {
+   pr_debug("Process has user queues running\n");
+   mutex_unlock(>mutex);
+   return -EBUSY;
+   }
+   if (args->xnack_enabled && !kfd_process_xnack_supported(p))
+   r = -EPERM;
+   else
+   p->xnack_enabled = args->xnack_enabled;
+   } else {
+   args->xnack_enabled = p->xnack_enabled;
+   }
+   mutex_unlock(>mutex);
+
+   return r;
+}
+
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
struct kfd_ioctl_svm_args *args = data;
@@ -1869,6 +1894,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
kfd_ioctl_smi_events, 0),
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE,
+   kfd_ioctl_set_xnack_mode, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 247b57baa94f..3cb5b5dd9f77 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args {
struct kfd_ioctl_svm_attribute attrs[0];
 };
 
+/**
+ * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode
+ *
+ * @xnack_enabled:   [in/out] Whether to enable XNACK mode for this process
+ *
+ * @xnack_enabled indicates whether recoverable page faults should be
+ * enabled for the current process. 0 means disabled, positive means
+ * enabled, negative means leave unchanged. If enabled, virtual address
+ * translations on GFXv9 and later AMD GPUs can return XNACK and retry
+ * the access until a valid PTE is available. This is used to implement
+ * device page faults.
+ *
+ * On output, @xnack_enabled returns the (new) current mode (0 or
+ * positive). Therefore, a negative input value can be used to query
+ * the current mode without changing it.
+ *
+ * The XNACK mode fundamentally changes the way SVM managed memory works
+ * in the driver, with subtle effects on application performance and
+ * functionality.
+ *
+ * Enabling XNACK mode requires shader programs to be compiled
+ * differently. Furthermore, not all GPUs support changing the mode
+ * per-process. Therefore changing the mode is only allowed while no
+ * user mode queues exist in the process. This ensure that no shader
+ * code is running that may be compiled for the wrong mode. And GPUs
+ * that cannot change to the requested mode will prevent the XNACK
+ * mode from occurring. All GPUs used by the process must be in the
+ * same XNACK mode.
+ *
+ * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM.
+ * Therefore those GPUs are not considered for the XNACK mode switch.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+struct kfd_ioctl_set_xnack_mode_args {
+   __s32 xnack_enabled;
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)  _IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)   _IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args {
 
 #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args)
 
+#define AMDKFD_IOC_SET_XNACK_MODE  \
+   AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args)
+
 #define AMDKFD_COMMAND_START   0x01
-#define AMDKFD_COMMAND_END 0x21
+#define AMDKFD_COMMAND_END 0x22
 
 #endif
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 15/44] drm/amdkfd: validate vram svm range from TTM

2021-03-22 Thread Felix Kuehling
If svm range perfetch location is not zero, use TTM to alloc
amdgpu_bo vram nodes to validate svm range, then map vram nodes to GPUs.

Use offset to sub allocate from the same amdgpu_bo to handle overlap
vram range while adding new range or unmapping range.

svm_bo has ref count to trace the shared ranges. If all ranges of shared
amdgpu_bo are migrated to ram, ref count becomes 0, then amdgpu_bo is
released, all ranges svm_bo is set to NULL.

To migrate range from ram back to vram, allocate the same amdgpu_bo
with previous offset if the range has svm_bo.

If prange migrate to VRAM, no CPU mapping exist, then process exit will
not have unmap callback for this prange to free prange and svm bo. Free
outstanding pranges from svms list before process is freed in
svm_range_list_fini.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 338 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  36 +++
 2 files changed, 355 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 55828d6fbea9..9b1c5aa86f4a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -44,7 +44,8 @@ static const struct mmu_interval_notifier_ops 
svm_range_mn_ops = {
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
  *
- * Remove the svm range from svms interval tree and link list
+ * Remove the svm_range from the svms and svm_bo lists and the svms
+ * interval tree.
  *
  * Context: The caller must hold svms->lock
  */
@@ -53,6 +54,12 @@ static void svm_range_unlink(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
 prange, prange->start, prange->last);
 
+   if (prange->svm_bo) {
+   spin_lock(>svm_bo->list_lock);
+   list_del(>svm_bo_list);
+   spin_unlock(>svm_bo->list_lock);
+   }
+
list_del(>list);
if (prange->it_node.start != 0 && prange->it_node.last != 0)
interval_tree_remove(>it_node, >svms->objects);
@@ -193,6 +200,7 @@ static void svm_range_free(struct svm_range *prange)
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_vram_node_free(prange);
svm_range_free_dma_mappings(prange);
kvfree(prange->pages_addr);
kfree(prange);
@@ -227,9 +235,11 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>svm_bo_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
atomic_set(>invalid, 0);
+   mutex_init(>lock);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -277,14 +287,244 @@ svm_range_validate_ram(struct mm_struct *mm, struct 
svm_range *prange)
return 0;
 }
 
+static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo || !kref_get_unless_zero(_bo->kref))
+   return false;
+
+   return true;
+}
+
+static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo)
+{
+   if (svm_bo)
+   kref_get(_bo->kref);
+
+   return svm_bo;
+}
+
+static void svm_range_bo_release(struct kref *kref)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = container_of(kref, struct svm_range_bo, kref);
+   spin_lock(_bo->list_lock);
+   while (!list_empty(_bo->range_list)) {
+   struct svm_range *prange =
+   list_first_entry(_bo->range_list,
+   struct svm_range, svm_bo_list);
+   /* list_del_init tells a concurrent svm_range_vram_node_new when
+* it's safe to reuse the svm_bo pointer and svm_bo_list head.
+*/
+   list_del_init(>svm_bo_list);
+   spin_unlock(_bo->list_lock);
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange->start, prange->last);
+   mutex_lock(>lock);
+   prange->svm_bo = NULL;
+   mutex_unlock(>lock);
+
+   spin_lock(_bo->list_lock);
+   }
+   spin_unlock(_bo->list_lock);
+
+   amdgpu_bo_unref(_bo->bo);
+   kfree(svm_bo);
+}
+
+static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
+{
+   if (!svm_bo)
+   return;
+
+   kref_put(_bo->kref, svm_range_bo_release);
+}
+
+static struct svm_range_bo *svm_range_bo_new(void)
+{
+   struct svm_range_bo *svm_bo;
+
+   svm_bo = kzalloc(sizeof(*svm_bo), 

[PATCH 25/44] drm/amdgpu: add param bit flag to create SVM BOs

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

Add CREATE_SVM_BO define bit for SVM BOs.
Another define flag was moved to concentrate these
KFD type flags in one include file.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h   | 4 
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 9af644f256e9..bc38de8c5c38 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -33,9 +33,6 @@
 #include 
 #include "amdgpu_xgmi.h"
 
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-
 /* Userptr restore delay, just long enough to allow consecutive VM
  * changes to accumulate
  */
@@ -217,7 +214,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo 
*bo)
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 
-   if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+   if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
@@ -1278,7 +1275,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
-   bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+   bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
 
(*mem)->va = va;
(*mem)->domain = domain;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 25411b2c4dd9..b07903d317e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -37,6 +37,10 @@
 #define AMDGPU_BO_INVALID_OFFSET   LONG_MAX
 #define AMDGPU_BO_MAX_PLACEMENTS   3
 
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO(1ULL << 63)
+#define AMDGPU_AMDKFD_CREATE_SVM_BO(1ULL << 62)
+
 #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
 
 struct amdgpu_bo_param {
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 27/44] drm/amdgpu: svm bo enable_signal call condition

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

[why]
To support svm bo eviction mechanism.

[how]
If the BO crated has AMDGPU_AMDKFD_CREATE_SVM_BO flag set,
enable_signal callback will be called inside amdgpu_evict_flags.
This also causes gutting of the BO by removing all placements,
so that TTM won't actually do an eviction. Instead it will discard
the memory held by the BO. This is needed for HMM migration to user
mode system memory pages.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index aca5a29f6d2a..2d80eb3fa571 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -111,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object 
*bo,
}
 
abo = ttm_to_amdgpu_bo(bo);
+   if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+   struct dma_fence *fence;
+   struct dma_resv *resv = >base._resv;
+
+   rcu_read_lock();
+   fence = rcu_dereference(resv->fence_excl);
+   if (fence && !fence->ops->signaled)
+   dma_fence_enable_sw_signaling(fence);
+
+   placement->num_placement = 0;
+   placement->num_busy_placement = 0;
+   rcu_read_unlock();
+   return;
+   }
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 28/44] drm/amdgpu: add svm_bo eviction to enable_signal cb

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

Add to amdgpu_amdkfd_fence.enable_signal callback, support
for svm_bo fence eviction.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 53559643c712..1fe233cddb20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
 
 static const struct dma_fence_ops amdkfd_fence_ops;
 static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -123,9 +124,13 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence 
*f)
if (dma_fence_is_signaled(f))
return true;
 
-   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
-   return true;
-
+   if (!fence->svm_bo) {
+   if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+   return true;
+   } else {
+   if (!svm_range_schedule_evict_svm_bo(fence))
+   return true;
+   }
return false;
 }
 
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 20/44] drm/amdkfd: invalidate tables on page retry fault

2021-03-22 Thread Felix Kuehling
GPU page tables are invalidated by unmapping prange directly at
the mmu notifier, when page fault retry is enabled through
amdgpu_noretry global parameter. The restore page table is
performed at the page fault handler.

If xnack is on, we update GPU mappings after migration to avoid
unnecessary GPUVM faults.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c |  6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 71 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  4 +-
 3 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1243cf02f872..8ce3ff56a0ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -789,7 +789,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
list_for_each_entry_safe(prange, next, _list, update_list) {
enum svm_work_list_ops op;
 
-   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && prange == pmigrate)
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP;
+   else
+   op = SVM_OP_UPDATE_RANGE_NOTIFIER;
 
svm_range_add_list_work(>svms, prange, mm, op);
list_del_init(>update_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 69241ed4a377..fb8ca844d9bd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1006,6 +1006,13 @@ svm_range_split_by_granularity(struct kfd_process *p, 
struct mm_struct *mm,
 
*pmigrate = new;
 
+   /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */
+   if (p->xnack_enabled && (*pmigrate)->work_item.op == SVM_OP_ADD_RANGE) {
+   (*pmigrate)->work_item.op = SVM_OP_ADD_RANGE_AND_MAP;
+   pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n",
+*pmigrate, (*pmigrate)->start, (*pmigrate)->last,
+SVM_OP_ADD_RANGE_AND_MAP);
+   }
return 0;
 }
 
@@ -1407,25 +1414,38 @@ svm_range_evict(struct svm_range *prange, struct 
mm_struct *mm,
unsigned long start, unsigned long last)
 {
struct svm_range_list *svms = prange->svms;
-   int invalid, evicted_ranges;
+   struct kfd_process *p;
int r = 0;
 
-   invalid = atomic_inc_return(>invalid);
-   evicted_ranges = atomic_inc_return(>evicted_ranges);
-   if (evicted_ranges != 1)
-   return r;
+   p = container_of(svms, struct kfd_process, svms);
 
-   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
-prange->svms, prange->start, prange->last);
+   pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+svms, prange->start, prange->last, start, last);
 
-   /* First eviction, stop the queues */
-   r = kgd2kfd_quiesce_mm(mm);
-   if (r)
-   pr_debug("failed to quiesce KFD\n");
+   if (!p->xnack_enabled) {
+   int invalid, evicted_ranges;
+
+   invalid = atomic_inc_return(>invalid);
+   evicted_ranges = atomic_inc_return(>evicted_ranges);
+   if (evicted_ranges != 1)
+   return r;
 
-   pr_debug("schedule to restore svm %p ranges\n", svms);
-   schedule_delayed_work(>restore_work,
-   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   /* First eviction, stop the queues */
+   r = kgd2kfd_quiesce_mm(mm);
+   if (r)
+   pr_debug("failed to quiesce KFD\n");
+
+   pr_debug("schedule to restore svm %p ranges\n", svms);
+   schedule_delayed_work(>restore_work,
+   msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS));
+   } else {
+   pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n",
+prange->svms, start, last);
+   svm_range_unmap_from_gpus(prange, start, last);
+   }
 
return r;
 }
@@ -1621,6 +1641,7 @@ static void
 svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
 {
struct mm_struct *mm = prange->work_item.mm;
+   int r;
 
switch (prange->work_item.op) {
case SVM_OP_NULL:
@@ -1639,12 +1660,32 @@ svm_range_handle_list_op(struct svm_range_list *svms, 
struct svm_range *prange)
 svms, prange, prange->start, prange->last);
svm_range_update_notifier_and_interval_tree(mm, prange);
break;
+   case 

[PATCH 18/44] drm/amdkfd: HMM migrate ram to vram

2021-03-22 Thread Felix Kuehling
Register svm range with same address and size but perferred_location
is changed from CPU to GPU or from GPU to CPU, trigger migration the svm
range from ram to vram or from vram to ram.

If svm range prefetch location is GPU with flags
KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first,
then migrate it from ram to vram.

After migrating to vram is done, CPU access will have cpu page fault,
page fault handler migrate it back to ram and resume cpu access.

Migration steps:

1. migrate_vma_pages get svm range ram pages, notify the
interval is invalidated and unmap from CPU page table, HMM interval
notifier callback evict process queues
2. Allocate new pages in vram using TTM
3. Use svm copy memory to sdma copy data from ram to vram
4. migrate_vma_pages copy ram pages structure to vram pages structure
5. migrate_vma_finalize put ram pages to free ram pages and memory
6. Restore work wait for migration is finished, then update GPUs page
table mapping to new vram pages, resume process queues

If migrate_vma_setup failed to collect all ram pages of range, retry 3
times until success to start migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 278 +++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 187 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +
 4 files changed, 460 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 2a6824ddae88..668c360be0bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -204,6 +204,284 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct 
dma_fence *mfence)
return r;
 }
 
+static uint64_t
+svm_migrate_node_physical_addr(struct amdgpu_device *adev,
+  struct drm_mm_node **mm_node, uint64_t *offset)
+{
+   struct drm_mm_node *node = *mm_node;
+   uint64_t pos = *offset;
+
+   if (node->start == AMDGPU_BO_INVALID_OFFSET) {
+   pr_debug("drm node is not validated\n");
+   return 0;
+   }
+
+   pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start,
+node->size);
+
+   if (pos >= node->size) {
+   do  {
+   pos -= node->size;
+   node++;
+   } while (pos >= node->size);
+
+   *mm_node = node;
+   *offset = pos;
+   }
+
+   return (node->start + pos) << PAGE_SHIFT;
+}
+
+unsigned long
+svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
+{
+   return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
+}
+
+static void
+svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+{
+   struct page *page;
+
+   page = pfn_to_page(pfn);
+   page->zone_device_data = prange;
+   get_page(page);
+   lock_page(page);
+}
+
+static void
+svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr));
+   unlock_page(page);
+   put_page(page);
+}
+
+
+static int
+svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
+struct migrate_vma *migrate, struct dma_fence **mfence,
+dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   struct drm_mm_node *node;
+   dma_addr_t *src;
+   uint64_t *dst;
+   uint64_t vram_addr;
+   uint64_t offset;
+   uint64_t i, j;
+   int r = -ENOMEM;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   src = scratch;
+   dst = (uint64_t *)(scratch + npages);
+
+   r = svm_range_vram_node_new(adev, prange, false);
+   if (r) {
+   pr_debug("failed %d get 0x%llx pages from vram\n", r, npages);
+   goto out;
+   }
+
+   node = prange->ttm_res->mm_node;
+   offset = prange->offset;
+   vram_addr = svm_migrate_node_physical_addr(adev, , );
+   if (!vram_addr) {
+   WARN_ONCE(1, "vram node address is 0\n");
+   r = -ENOMEM;
+   goto out;
+   }
+
+   for (i = j = 0; i < npages; i++) {
+   struct page *spage;
+
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE);
+   r = dma_mapping_error(dev, src[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   goto out_free_vram_pages;
+   }
+
+   pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n",
+src[i] >> 

[PATCH 03/44] drm/amdkfd: add svm ioctl API

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Add svm (shared virtual memory) ioctl data structure and API definition.

The svm ioctl API is designed to be extensible in the future. All
operations are provided by a single IOCTL to preserve ioctl number
space. The arguments structure ends with a variable size array of
attributes that can be used to set or get one or multiple attributes.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  12 ++
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |   4 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 include/uapi/linux/kfd_ioctl.h   | 130 ++-
 5 files changed, 147 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 43de260b2230..dbc824cc6b32 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1742,6 +1742,16 @@ static int kfd_ioctl_smi_events(struct file *filep,
return kfd_smi_event_open(dev, >anon_fd);
 }
 
+static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
+{
+   int r = 0;
+
+   if (p->svm_disabled)
+   return -EPERM;
+
+   return r;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
.cmd_drv = 0, .name = #ioctl}
@@ -1840,6 +1850,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS,
kfd_ioctl_smi_events, 0),
+
+   AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index a2c9063076cc..52da1a3b2c7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
@@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process)
case CHIP_POLARIS12:
case CHIP_VEGAM:
kfd_init_apertures_vi(pdd, id);
+   /* VI GPUs cannot support SVM with only
+* 40 bits of virtual address space.
+*/
+   process->svm_disabled |= true;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 99b4624ef4c7..18fc2ccd1a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -809,6 +809,8 @@ struct kfd_process {
struct kobject *kobj;
struct kobject *kobj_queues;
struct attribute attr_pasid;
+
+   bool svm_disabled;
 };
 
 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 028ebb0deddd..89e7c125d334 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1208,6 +1208,7 @@ static struct kfd_process *create_process(const struct 
task_struct *thread)
process->mm = thread->mm;
process->lead_thread = thread->group_leader;
process->n_pdds = 0;
+   process->svm_disabled = false;
INIT_DELAYED_WORK(>eviction_work, evict_process_worker);
INIT_DELAYED_WORK(>restore_work, restore_process_worker);
process->last_restore_timestamp = get_jiffies_64();
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index bf5e7d7846dd..247b57baa94f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -30,9 +30,10 @@
  * - 1.1 - initial version
  * - 1.3 - Add SMI events support
  * - 1.4 - Indicate new SRAM EDC bit in device properties
+ * - 1.5 - Add SVM API
  */
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 4
+#define KFD_IOCTL_MINOR_VERSION 5
 
 struct kfd_ioctl_get_version_args {
__u32 major_version;/* from KFD */
@@ -473,6 +474,129 @@ enum kfd_mmio_remap {
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
 };
 
+/* Guarantee host access to memory */
+#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x0001
+/* Fine grained coherency between all devices with access */
+#define KFD_IOCTL_SVM_FLAG_COHERENT0x0002
+/* Use any GPU in same hive as preferred device */
+#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL  0x0004
+/* GPUs only read, allows replication */
+#define KFD_IOCTL_SVM_FLAG_GPU_RO  0x0008
+/* Allow execution on GPU */
+#define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010
+/* GPUs mostly read, may allow similar optimizations 

[PATCH 19/44] drm/amdkfd: HMM migrate vram to ram

2021-03-22 Thread Felix Kuehling
If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start
migrate memory from vram to ram in steps:

1. migrate_vma_pages get vram pages, and notify HMM to invalidate the
pages, HMM interval notifier callback evict process queues
2. Allocate system memory pages
3. Use svm copy memory to migrate data from vram to ram
4. migrate_vma_pages copy pages structure from vram pages to ram pages
5. Return VM_FAULT_SIGBUS if migration failed, to notify application
6. migrate_vma_finalize put vram pages, page_free callback free vram
pages and vram nodes
7. Restore work wait for migration is finished, then update GPU page
table mapping to system memory, and resume process queues

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 310 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 158 +++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  12 +
 4 files changed, 473 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 668c360be0bb..1243cf02f872 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -259,6 +259,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, 
unsigned long addr)
put_page(page);
 }
 
+static unsigned long
+svm_migrate_addr(struct amdgpu_device *adev, struct page *page)
+{
+   unsigned long addr;
+
+   addr = page_to_pfn(page) << PAGE_SHIFT;
+   return (addr - adev->kfd.dev->pgmap.range.start);
+}
+
+static struct page *
+svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr)
+{
+   struct page *page;
+
+   page = alloc_page_vma(GFP_HIGHUSER, vma, addr);
+   if (page)
+   lock_page(page);
+
+   return page;
+}
+
+void svm_migrate_put_sys_page(unsigned long addr)
+{
+   struct page *page;
+
+   page = pfn_to_page(addr >> PAGE_SHIFT);
+   unlock_page(page);
+   put_page(page);
+}
 
 static int
 svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
@@ -484,13 +513,222 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, 
uint32_t best_loc)
 
 static void svm_migrate_page_free(struct page *page)
 {
+   /* Keep this function to avoid warning */
+}
+
+static int
+svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange,
+   struct migrate_vma *migrate, struct dma_fence **mfence,
+   dma_addr_t *scratch)
+{
+   uint64_t npages = migrate->cpages;
+   struct device *dev = adev->dev;
+   uint64_t *src;
+   dma_addr_t *dst;
+   struct page *dpage;
+   uint64_t i = 0, j;
+   uint64_t addr;
+   int r = 0;
+
+   pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
+prange->last);
+
+   addr = prange->start << PAGE_SHIFT;
+
+   src = (uint64_t *)(scratch + npages);
+   dst = scratch;
+
+   prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr),
+   GFP_KERNEL | __GFP_ZERO);
+   if (!prange->pages_addr) {
+   r = -ENOMEM;
+   goto out_oom;
+   }
+
+   for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) {
+   struct page *spage;
+
+   spage = migrate_pfn_to_page(migrate->src[i]);
+   if (!spage) {
+   pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+   src[i] = svm_migrate_addr(adev, spage);
+   if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+   r = svm_migrate_copy_memory_gart(adev, dst + i - j,
+src + i - j, j,
+FROM_VRAM_TO_RAM,
+mfence);
+   if (r)
+   goto out_oom;
+   j = 0;
+   }
+
+   dpage = svm_migrate_get_sys_page(migrate->vma, addr);
+   if (!dpage) {
+   pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+   r = -ENOMEM;
+   goto out_oom;
+   }
+
+   dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, 
DMA_FROM_DEVICE);
+   r = dma_mapping_error(dev, dst[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   goto out_oom;
+   }
+
+   pr_debug("dma mapping dst to 0x%llx, page_to_pfn 

[PATCH 22/44] drm/amdkfd: page table restore through svm API

2021-03-22 Thread Felix Kuehling
Page table restore implementation in SVM API. This is called from
the fault handler at amdgpu_vm. To update page tables through
the page fault retry IH.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  2 +
 2 files changed, 71 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index fb8ca844d9bd..c791d91cb45d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -1946,6 +1946,75 @@ svm_range_from_addr(struct svm_range_list *svms, 
unsigned long addr,
return NULL;
 }
 
+int
+svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
+   uint64_t addr)
+{
+   int r = 0;
+   struct mm_struct *mm = NULL;
+   struct svm_range *prange;
+   struct svm_range_list *svms;
+   struct kfd_process *p;
+
+   p = kfd_lookup_process_by_pasid(pasid);
+   if (!p) {
+   pr_debug("kfd process not founded pasid 0x%x\n", pasid);
+   return -ESRCH;
+   }
+   svms = >svms;
+
+   pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr);
+
+   mm = get_task_mm(p->lead_thread);
+   if (!mm) {
+   pr_debug("svms 0x%p failed to get mm\n", svms);
+   r = -ESRCH;
+   goto out;
+   }
+
+   svm_range_list_lock_and_flush_work(svms, mm);
+   mutex_lock(>lock);
+   prange = svm_range_from_addr(svms, addr, NULL);
+
+   mmap_write_downgrade(mm);
+
+   if (!prange) {
+   pr_debug("failed to find prange svms 0x%p address [0x%llx]\n",
+svms, addr);
+   r = -EFAULT;
+   goto out_unlock_svms;
+   }
+
+   mutex_lock(>migrate_mutex);
+
+   r = svm_range_validate(mm, prange);
+   if (r) {
+   pr_debug("failed %d to validate svms 0x%p [0x%lx 0x%lx]\n", r,
+svms, prange->start, prange->last);
+
+   goto out_unlock_range;
+   }
+
+   pr_debug("restoring svms 0x%p [0x%lx %lx] mapping\n",
+svms, prange->start, prange->last);
+
+   r = svm_range_map_to_gpus(prange, true);
+   if (r)
+   pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r,
+svms, prange->start, prange->last);
+
+out_unlock_range:
+   mutex_unlock(>migrate_mutex);
+out_unlock_svms:
+   mutex_unlock(>lock);
+   mmap_read_unlock(mm);
+   mmput(mm);
+out:
+   kfd_unref_process(p);
+
+   return r;
+}
+
 void svm_range_list_fini(struct kfd_process *p)
 {
struct svm_range *prange;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
index 3f945a601546..3aa6f6b97481 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h
@@ -159,6 +159,8 @@ int svm_range_split_by_granularity(struct kfd_process *p, 
struct mm_struct *mm,
   struct svm_range *prange,
   struct svm_range **pmigrate,
   struct list_head *deferred_update_list);
+int svm_range_restore_pages(struct amdgpu_device *adev,
+   unsigned int pasid, uint64_t addr);
 void svm_range_add_list_work(struct svm_range_list *svms,
 struct svm_range *prange, struct mm_struct *mm,
 enum svm_work_list_ops op);
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 21/44] drm/amdgpu: enable 48-bit IH timestamp counter

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

By default this timestamp is 32 bit counter. It gets
overflowed in around 10 minutes.

Change-Id: I7c46604b0272dcfd1ce24351437c16fe53dca0ab
Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
---
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c 
b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ca8efa5c6978..2f17c8a57015 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct 
amdgpu_device *adev,
 
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+   tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == >irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 
0));
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 23/44] drm/amdkfd: SVM API call to restore page tables

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

Use SVM API to restore page tables when retry fault and
compute context are enabled.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0e9ae5f91c7c..a61df234f012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -37,6 +37,7 @@
 #include "amdgpu_gmc.h"
 #include "amdgpu_xgmi.h"
 #include "amdgpu_dma_buf.h"
+#include "kfd_svm.h"
 
 /**
  * DOC: GPUVM
@@ -3302,18 +3303,29 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
uint64_t value, flags;
struct amdgpu_vm *vm;
long r;
+   bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
vm = idr_find(>vm_manager.pasid_idr, pasid);
-   if (vm)
+   if (vm) {
root = amdgpu_bo_ref(vm->root.base.bo);
-   else
+   is_compute_context = vm->is_compute_context;
+   } else {
root = NULL;
+   }
spin_unlock(>vm_manager.pasid_lock);
 
if (!root)
return false;
 
+   addr /= AMDGPU_GPU_PAGE_SIZE;
+
+   if (!amdgpu_noretry && is_compute_context &&
+   !svm_range_restore_pages(adev, pasid, addr)) {
+   amdgpu_bo_unref();
+   return true;
+   }
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@@ -3327,18 +3339,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
if (!vm)
goto error_unlock;
 
-   addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
 
-   if (vm->is_compute_context) {
+   if (is_compute_context) {
/* Intentionally setting invalid PTE flag
 * combination to force a no-retry-fault
 */
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
AMDGPU_PTE_TF;
value = 0;
-
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 06/44] drm/amdgpu: add common HMM get pages function

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Move the HMM get pages function from amdgpu_ttm and to amdgpu_mn. This
common function will be used by new svm APIs.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c  | 83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h  |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 +++---
 3 files changed, 100 insertions(+), 66 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 828b5167ff12..997da4237a10 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -155,3 +155,86 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mmu_interval_notifier_remove(>notifier);
bo->notifier.mm = NULL;
 }
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked)
+{
+   struct hmm_range *hmm_range;
+   unsigned long timeout;
+   unsigned long i;
+   unsigned long *pfns;
+   int r = 0;
+
+   hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+   if (unlikely(!hmm_range))
+   return -ENOMEM;
+
+   pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+   if (unlikely(!pfns)) {
+   r = -ENOMEM;
+   goto out_free_range;
+   }
+
+   hmm_range->notifier = notifier;
+   hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+   if (!readonly)
+   hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+   hmm_range->hmm_pfns = pfns;
+   hmm_range->start = start;
+   hmm_range->end = start + npages * PAGE_SIZE;
+   timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+
+retry:
+   hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+
+   if (likely(!mmap_locked))
+   mmap_read_lock(mm);
+
+   r = hmm_range_fault(hmm_range);
+
+   if (likely(!mmap_locked))
+   mmap_read_unlock(mm);
+   if (unlikely(r)) {
+   /*
+* FIXME: This timeout should encompass the retry from
+* mmu_interval_read_retry() as well.
+*/
+   if (r == -EBUSY && !time_after(jiffies, timeout))
+   goto retry;
+   goto out_free_pfns;
+   }
+
+   /*
+* Due to default_flags, all pages are HMM_PFN_VALID or
+* hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+* the notifier_lock, and mmu_interval_read_retry() must be done first.
+*/
+   for (i = 0; pages && i < npages; i++)
+   pages[i] = hmm_pfn_to_page(pfns[i]);
+
+   *phmm_range = hmm_range;
+
+   return 0;
+
+out_free_pfns:
+   kvfree(pfns);
+out_free_range:
+   kfree(hmm_range);
+
+   return r;
+}
+
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+   int r;
+
+   r = mmu_interval_read_retry(hmm_range->notifier,
+   hmm_range->notifier_seq);
+   kvfree(hmm_range->hmm_pfns);
+   kfree(hmm_range);
+
+   return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index a292238f75eb..7f7d37a457c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,6 +30,13 @@
 #include 
 #include 
 
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+  struct mm_struct *mm, struct page **pages,
+  uint64_t start, uint64_t npages,
+  struct hmm_range **phmm_range, bool readonly,
+  bool mmap_locked);
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
 #if defined(CONFIG_HMM_MIRROR)
 int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
 void amdgpu_mn_unregister(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2bafbd78ba4b..aca5a29f6d2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -32,7 +32,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -692,10 +691,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, 
struct page **pages)
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
-   struct hmm_range *range;
-   unsigned long timeout;
struct mm_struct *mm;
-   unsigned long i;
+   bool readonly;
int r = 0;
 
mm = bo->notifier.mm;
@@ -711,76 +708,26 @@ int 

[PATCH 29/44] drm/amdgpu: reserve fence slot to update page table

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Forgot to reserve a fence slot to use sdma to update page table, cause
below kernel BUG backtrace to handle vm retry fault while application is
exiting.

[  133.048143] kernel BUG at 
/home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281!
[  133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu]
[  133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280
[  133.048672]  amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu]
[  133.048788]  amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu]
[  133.048905]  amdgpu_vm_handle_fault+0x202/0x370 [amdgpu]
[  133.049031]  gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu]
[  133.049165]  ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu]
[  133.049289]  ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049408]  amdgpu_irq_dispatch+0xb6/0x240 [amdgpu]
[  133.049534]  amdgpu_ih_process+0x9b/0x1c0 [amdgpu]
[  133.049657]  amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu]
[  133.049669]  process_one_work+0x29f/0x640
[  133.049678]  worker_thread+0x39/0x3f0
[  133.049685]  ? process_one_work+0x640/0x640

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index a61df234f012..3e32f76cd7bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
-   long r;
+   int r;
bool is_compute_context = false;
 
spin_lock(>vm_manager.pasid_lock);
@@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
value = 0;
}
 
+   r = dma_resv_reserve_shared(root->tbo.base.resv, 1);
+   if (r) {
+   pr_debug("failed %d to reserve fence slot\n", r);
+   goto error_unlock;
+   }
+
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
addr, flags, value, NULL, NULL,
NULL);
@@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, 
u32 pasid,
 error_unlock:
amdgpu_bo_unreserve(root);
if (r < 0)
-   DRM_ERROR("Can't handle page fault (%ld)\n", r);
+   DRM_ERROR("Can't handle page fault (%d)\n", r);
 
 error_unref:
amdgpu_bo_unref();
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 26/44] drm/amdkfd: add svm_bo eviction mechanism support

2021-03-22 Thread Felix Kuehling
svm_bo eviction mechanism is different from regular BOs.
Every SVM_BO created contains one eviction fence and one
worker item for eviction process.
SVM_BOs can be attached to one or more pranges.
For SVM_BO eviction mechanism, TTM will start to call
enable_signal callback for every SVM_BO until VRAM space
is available.
Here, all the ttm_evict calls are synchronous, this guarantees
that each eviction has completed and the fence has signaled before
it returns.

Signed-off-by: Alex Sierra 
Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 186 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  13 +-
 2 files changed, 153 insertions(+), 46 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index c791d91cb45d..3a7b842b362c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -34,6 +34,7 @@
 
 #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
 
+static void svm_range_evict_svm_bo_worker(struct work_struct *work);
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -332,7 +333,15 @@ static void svm_range_bo_release(struct kref *kref)
spin_lock(_bo->list_lock);
}
spin_unlock(_bo->list_lock);
-
+   if (!dma_fence_is_signaled(_bo->eviction_fence->base)) {
+   /* We're not in the eviction worker.
+* Signal the fence and synchronize with any
+* pending eviction work.
+*/
+   dma_fence_signal(_bo->eviction_fence->base);
+   cancel_work_sync(_bo->eviction_work);
+   }
+   dma_fence_put(_bo->eviction_fence->base);
amdgpu_bo_unref(_bo->bo);
kfree(svm_bo);
 }
@@ -345,6 +354,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo)
kref_put(_bo->kref, svm_range_bo_release);
 }
 
+static bool svm_range_validate_svm_bo(struct svm_range *prange)
+{
+   mutex_lock(>lock);
+   if (!prange->svm_bo) {
+   mutex_unlock(>lock);
+   return false;
+   }
+   if (prange->ttm_res) {
+   /* We still have a reference, all is well */
+   mutex_unlock(>lock);
+   return true;
+   }
+   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
+   if (READ_ONCE(prange->svm_bo->evicting)) {
+   struct dma_fence *f;
+   struct svm_range_bo *svm_bo;
+   /* The BO is getting evicted,
+* we need to get a new one
+*/
+   mutex_unlock(>lock);
+   svm_bo = prange->svm_bo;
+   f = dma_fence_get(_bo->eviction_fence->base);
+   svm_range_bo_unref(prange->svm_bo);
+   /* wait for the fence to avoid long spin-loop
+* at list_empty_careful
+*/
+   dma_fence_wait(f, false);
+   dma_fence_put(f);
+   } else {
+   /* The BO was still around and we got
+* a new reference to it
+*/
+   mutex_unlock(>lock);
+   pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange->start, prange->last);
+
+   prange->ttm_res = >svm_bo->bo->tbo.mem;
+   return true;
+   }
+
+   } else {
+   mutex_unlock(>lock);
+   }
+
+   /* We need a new svm_bo. Spin-loop to wait for concurrent
+* svm_range_bo_release to finish removing this range from
+* its range list. After this, it is safe to reuse the
+* svm_bo pointer and svm_bo_list head.
+*/
+   while (!list_empty_careful(>svm_bo_list))
+   ;
+
+   return false;
+}
+
 static struct svm_range_bo *svm_range_bo_new(void)
 {
struct svm_range_bo *svm_bo;
@@ -364,72 +428,56 @@ int
 svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange,
bool clear)
 {
-   struct amdkfd_process_info *process_info;
struct amdgpu_bo_param bp;
struct svm_range_bo *svm_bo;
struct amdgpu_bo_user *ubo;
struct amdgpu_bo *bo;
struct kfd_process *p;
+   struct mm_struct *mm;
int r;
 
-   pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last);
-   mutex_lock(>lock);
-   if (prange->svm_bo) {
-   if (prange->ttm_res) {
-   /* We still have a reference, all is well */
-   mutex_unlock(>lock);
-   return 0;
-   }
-   if (svm_bo_ref_unless_zero(prange->svm_bo)) {
-

[PATCH 16/44] drm/amdkfd: support xgmi same hive mapping

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

amdgpu_gmc_get_vm_pte use bo_va->is_xgmi same hive information to set
pte flags to update GPU mapping. Add local structure variable bo_va, and
update bo_va.is_xgmi, pass it to mapping->bo_va while mapping to GPU.

Assuming xgmi pstate is hi after boot.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 28 +---
 1 file changed, 25 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 9b1c5aa86f4a..de5777330d23 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -26,6 +26,8 @@
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_mn.h"
+#include "amdgpu.h"
+#include "amdgpu_xgmi.h"
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
@@ -1026,10 +1028,12 @@ static int svm_range_bo_validate(void *param, struct 
amdgpu_bo *bo)
 static int
 svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 struct svm_range *prange, dma_addr_t *pages_addr,
-bool reserve_vm, struct dma_fence **fence)
+bool reserve_vm, struct amdgpu_device *bo_adev,
+struct dma_fence **fence)
 {
struct ttm_validate_buffer tv[2];
struct ww_acquire_ctx ticket;
+   struct amdgpu_bo_va bo_va;
struct list_head list;
uint64_t pte_flags;
int r = 0;
@@ -1062,13 +1066,18 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
}
}
 
+   if (prange->svm_bo && prange->ttm_res) {
+   bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev);
+   prange->mapping.bo_va = _va;
+   }
+
prange->mapping.start = prange->start;
prange->mapping.last = prange->last;
prange->mapping.offset = prange->offset;
pte_flags = svm_range_get_pte_flags(adev, prange);
prange->mapping.flags = pte_flags;
 
-   r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL,
+   r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL,
prange->mapping.start,
prange->mapping.last, pte_flags,
prange->mapping.offset,
@@ -1092,6 +1101,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
*fence = dma_fence_get(vm->last_update);
 
 unreserve_out:
+   prange->mapping.bo_va = NULL;
if (reserve_vm)
ttm_eu_backoff_reservation(, );
 out:
@@ -1102,6 +1112,7 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange, bool reserve_vm)
 {
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
struct kfd_process_device *pdd;
+   struct amdgpu_device *bo_adev;
struct amdgpu_device *adev;
struct kfd_process *p;
struct kfd_dev *dev;
@@ -1109,6 +1120,11 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange, bool reserve_vm)
uint32_t gpuidx;
int r = 0;
 
+   if (prange->svm_bo && prange->ttm_res)
+   bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev);
+   else
+   bo_adev = NULL;
+
bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
  MAX_GPU_INSTANCE);
p = container_of(prange->svms, struct kfd_process, svms);
@@ -1125,6 +1141,12 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange, bool reserve_vm)
return -EINVAL;
adev = (struct amdgpu_device *)dev->kgd;
 
+   if (bo_adev && adev != bo_adev &&
+   !amdgpu_xgmi_same_hive(adev, bo_adev)) {
+   pr_debug("cannot map to device idx %d\n", gpuidx);
+   continue;
+   }
+
r = svm_range_dma_map(adev->dev, >dma_addr[gpuidx],
  prange->pages_addr, prange->npages);
if (r)
@@ -1132,7 +1154,7 @@ static int svm_range_map_to_gpus(struct svm_range 
*prange, bool reserve_vm)
 
r = svm_range_map_to_gpu(adev, pdd->vm, prange,
 prange->dma_addr[gpuidx], reserve_vm,
-);
+bo_adev, );
if (r)
break;
 
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 00/44] Add HMM-based SVM memory manager to KFD v2

2021-03-22 Thread Felix Kuehling
Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33
are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about
a week ago. The remaining 11 patches are current work-in-progress with
further cleanup and fixes.

MMU notifiers and CPU page faults now can split ranges and update our range
data structures without taking heavy locks by doing some of the critical
work in a deferred work handler. This includes updating MMU notifiers and
the SVM range interval tree. In the mean time, new ranges can live as
children of their parent ranges until the deferred work handler consolidates
them in the main interval tree.

We also added proper DMA mapping of system memory pages.

Current work in progress is cleaning up all the locking, simplifying our
code and data structures and resolving a few known bugs.

This series and the corresponding ROCm Thunk and KFDTest changes are also
available on gitub:
  https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip
  https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip

An updated Thunk

Alex Sierra (10):
  drm/amdgpu: replace per_device_list by array
  drm/amdkfd: helper to convert gpu id and idx
  drm/amdkfd: add xnack enabled flag to kfd_process
  drm/amdkfd: add ioctl to configure and query xnack retries
  drm/amdgpu: enable 48-bit IH timestamp counter
  drm/amdkfd: SVM API call to restore page tables
  drm/amdkfd: add svm_bo reference for eviction fence
  drm/amdgpu: add param bit flag to create SVM BOs
  drm/amdgpu: svm bo enable_signal call condition
  drm/amdgpu: add svm_bo eviction to enable_signal cb

Felix Kuehling (22):
  drm/amdkfd: map svm range to GPUs
  drm/amdkfd: svm range eviction and restore
  drm/amdkfd: validate vram svm range from TTM
  drm/amdkfd: HMM migrate ram to vram
  drm/amdkfd: HMM migrate vram to ram
  drm/amdkfd: invalidate tables on page retry fault
  drm/amdkfd: page table restore through svm API
  drm/amdkfd: add svm_bo eviction mechanism support
  drm/amdkfd: refine migration policy with xnack on
  drm/amdkfd: add svm range validate timestamp
  drm/amdkfd: multiple gpu migrate vram to vram
  drm/amdkfd: Fix dma unmapping
  drm/amdkfd: Call mutex_destroy
  drm/amdkfd: Fix spurious restore failures
  drm/amdkfd: Fix svm_bo_list locking in eviction worker
  drm/amdkfd: Simplify split_by_granularity
  drm/amdkfd: Point out several race conditions
  drm/amdkfd: Return pdd from kfd_process_device_from_gduid
  drm/amdkfd: Remove broken deferred mapping
  drm/amdkfd: Allow invalid pages in migration.src
  drm/amdkfd: Correct locking during migration and mapping
  drm/amdkfd: Nested locking and invalidation of child ranges

Philip Yang (12):
  drm/amdkfd: add svm ioctl API
  drm/amdkfd: register svm range
  drm/amdkfd: add svm ioctl GET_ATTR op
  drm/amdgpu: add common HMM get pages function
  drm/amdkfd: validate svm range system memory
  drm/amdkfd: deregister svm range
  drm/amdgpu: export vm update mapping interface
  drm/amdkfd: register HMM device private zone
  drm/amdkfd: support xgmi same hive mapping
  drm/amdkfd: copy memory through gart table
  drm/amdgpu: reserve fence slot to update page table
  drm/amdkfd: Add SVM API support capability bits

 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c  |   16 +-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  |   13 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c|   83 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |   90 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|   48 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|   11 +
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 +
 drivers/gpu/drm/amd/amdkfd/Kconfig|1 +
 drivers/gpu/drm/amd/amdkfd/Makefile   |4 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  173 +-
 drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c  |4 +
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c  |  922 ++
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h  |   59 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   54 +-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  |  191 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|6 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 2865 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  175 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c |6 +
 drivers/gpu/drm/amd/amdkfd/kfd_topology.h |   10 +-
 include/uapi/linux/kfd_ioctl.h|  171 +-
 26 files changed, 4681 insertions(+), 249 deletions(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 

[PATCH 05/44] drm/amdkfd: add svm ioctl GET_ATTR op

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Get the intersection of attributes over all memory in the given
range

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 164 +++
 1 file changed, 164 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 22f942bb2b0c..e57103a9025e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -769,6 +769,167 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
return r;
 }
 
+static int
+svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
+  uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
+{
+   DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE);
+   DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE);
+   bool get_preferred_loc = false;
+   bool get_prefetch_loc = false;
+   bool get_granularity = false;
+   bool get_accessible = false;
+   bool get_flags = false;
+   uint64_t last = start + size - 1UL;
+   struct mm_struct *mm = current->mm;
+   uint8_t granularity = 0xff;
+   struct interval_tree_node *node;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   uint32_t flags = 0x;
+   int gpuidx;
+   uint32_t i;
+
+   pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", >svms, start,
+start + size - 1, nattr);
+
+   mmap_read_lock(mm);
+   if (!svm_range_is_valid(mm, start, size)) {
+   pr_debug("invalid range\n");
+   mmap_read_unlock(mm);
+   return -EINVAL;
+   }
+   mmap_read_unlock(mm);
+
+   for (i = 0; i < nattr; i++) {
+   switch (attrs[i].type) {
+   case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC:
+   get_preferred_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC:
+   get_prefetch_loc = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_ACCESS:
+   get_accessible = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_SET_FLAGS:
+   get_flags = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_GRANULARITY:
+   get_granularity = true;
+   break;
+   case KFD_IOCTL_SVM_ATTR_CLR_FLAGS:
+   case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE:
+   case KFD_IOCTL_SVM_ATTR_NO_ACCESS:
+   fallthrough;
+   default:
+   pr_debug("get invalid attr type 0x%x\n", attrs[i].type);
+   return -EINVAL;
+   }
+   }
+
+   svms = >svms;
+
+   mutex_lock(>lock);
+
+   node = interval_tree_iter_first(>objects, start, last);
+   if (!node) {
+   pr_debug("range attrs not found return default values\n");
+   svm_range_set_default_attributes(, _loc,
+, );
+   /* TODO: Automatically create SVM ranges and map them on
+* GPU page faults
+   if (p->xnack_enabled)
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+*/
+
+   goto fill_values;
+   }
+   bitmap_fill(bitmap_access, MAX_GPU_INSTANCE);
+   bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE);
+
+   while (node) {
+   struct interval_tree_node *next;
+
+   prange = container_of(node, struct svm_range, it_node);
+   next = interval_tree_iter_next(node, start, last);
+
+   if (get_preferred_loc) {
+   if (prange->preferred_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+location != prange->preferred_loc)) {
+   location = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_preferred_loc = false;
+   } else {
+   location = prange->preferred_loc;
+   }
+   }
+   if (get_prefetch_loc) {
+   if (prange->prefetch_loc ==
+   KFD_IOCTL_SVM_LOCATION_UNDEFINED ||
+   (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED &&
+prefetch_loc != prange->prefetch_loc)) {
+   prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED;
+   get_prefetch_loc = false;
+   } else {

[PATCH 08/44] drm/amdkfd: deregister svm range

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

When application explicitly call unmap or unmap from mmput when
application exit, driver will receive MMU_NOTIFY_UNMAP event to remove
svm range from process svms object tree and list first, unmap from GPUs
(in the following patch).

Split the svm ranges to handle partial unmapping of svm ranges. To
avoid deadlocks, updating MMU notifiers, range lists and interval trees
is done in a deferred worker. New child ranges are attached to their
parent range's child_list until the worker can update the
svm_range_list. svm_range_set_attr flushes deferred work and takes the
mmap_write_lock to guarantee that it has an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |   3 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c  | 285 +-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h  |  18 ++
 3 files changed, 305 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index f547e1282d69..4101f5341ec9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -735,6 +735,9 @@ struct svm_range_list {
struct mutexlock;
struct rb_root_cached   objects;
struct list_headlist;
+   struct work_struct  deferred_list_work;
+   struct list_headdeferred_range_list;
+   spinlock_t  deferred_list_lock;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 6024caf7373f..e23171ac866a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -136,6 +136,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>update_list);
INIT_LIST_HEAD(>remove_list);
INIT_LIST_HEAD(>insert_list);
+   INIT_LIST_HEAD(>deferred_list);
+   INIT_LIST_HEAD(>child_list);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -512,6 +514,41 @@ svm_range_split_head(struct svm_range *prange, struct 
svm_range *new,
return r;
 }
 
+void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm,
+struct svm_range *pchild, enum svm_work_list_ops op)
+{
+   pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n",
+pchild, pchild->start, pchild->last, prange, op);
+
+   pchild->work_item.mm = mm;
+   pchild->work_item.op = op;
+   list_add_tail(>child_list, >child_list);
+}
+
+/**
+ * svm_range_list_lock_and_flush_work - flush pending deferred work
+ *
+ * @svms: the svm range list
+ * @mm: the mm structure
+ *
+ * Context: Returns with mmap write lock held, pending deferred work flushed
+ *
+ */
+static void
+svm_range_list_lock_and_flush_work(struct svm_range_list *svms,
+  struct mm_struct *mm)
+{
+retry_flush_work:
+   flush_work(>deferred_list_work);
+   mmap_write_lock(mm);
+
+   if (list_empty(>deferred_range_list))
+   return;
+   mmap_write_unlock(mm);
+   pr_debug("retry flush\n");
+   goto retry_flush_work;
+}
+
 struct svm_range *svm_range_clone(struct svm_range *old)
 {
struct svm_range *new;
@@ -664,21 +701,264 @@ svm_range_handle_overlap(struct svm_range_list *svms, 
struct svm_range *new,
return r;
 }
 
+static void
+svm_range_update_notifier_and_interval_tree(struct mm_struct *mm,
+   struct svm_range *prange)
+{
+   unsigned long start;
+   unsigned long last;
+
+   start = prange->notifier.interval_tree.start >> PAGE_SHIFT;
+   last = prange->notifier.interval_tree.last >> PAGE_SHIFT;
+
+   if (prange->start == start && prange->last == last)
+   return;
+
+   pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
+ prange->svms, prange, start, last, prange->start,
+ prange->last);
+
+   if (start != 0 && last != 0) {
+   interval_tree_remove(>it_node, >svms->objects);
+   svm_range_remove_notifier(prange);
+   }
+   prange->it_node.start = prange->start;
+   prange->it_node.last = prange->last;
+
+   interval_tree_insert(>it_node, >svms->objects);
+   svm_range_add_notifier_locked(mm, prange);
+}
+
+static void
+svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange)
+{
+   struct mm_struct *mm = prange->work_item.mm;
+
+   switch (prange->work_item.op) {
+   case SVM_OP_NULL:
+   pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+svms, prange, prange->start, prange->last);
+   

[PATCH 09/44] drm/amdgpu: export vm update mapping interface

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

It will be used by kfd to map svm range to GPU, because svm range does
not have amdgpu_bo and bo_va, cannot use amdgpu_bo_update interface, use
amdgpu vm update interface directly.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++
 2 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9268db1172bd..0e9ae5f91c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1592,15 +1592,15 @@ static int amdgpu_vm_update_ptes(struct 
amdgpu_vm_update_params *params,
  * Returns:
  * 0 for success, -EINVAL for failure.
  */
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
-  struct amdgpu_device *bo_adev,
-  struct amdgpu_vm *vm, bool immediate,
-  bool unlocked, struct dma_resv *resv,
-  uint64_t start, uint64_t last,
-  uint64_t flags, uint64_t offset,
-  struct drm_mm_node *nodes,
-  dma_addr_t *pages_addr,
-  struct dma_fence **fence)
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence)
 {
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 976a12e5a8b9..848e175e99ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -366,6 +366,8 @@ struct amdgpu_vm_manager {
spinlock_t  pasid_lock;
 };
 
+struct amdgpu_bo_va_mapping;
+
 #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) 
((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) 
((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), 
(incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) 
((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), 
(incr), (flags)))
@@ -397,6 +399,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
   struct amdgpu_vm *vm);
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+   struct amdgpu_device *bo_adev,
+   struct amdgpu_vm *vm, bool immediate,
+   bool unlocked, struct dma_resv *resv,
+   uint64_t start, uint64_t last,
+   uint64_t flags, uint64_t offset,
+   struct drm_mm_node *nodes,
+   dma_addr_t *pages_addr,
+   struct dma_fence **fence);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 11/44] drm/amdkfd: svm range eviction and restore

2021-03-22 Thread Felix Kuehling
HMM interval notifier callback notify CPU page table will be updated,
stop process queues if the updated address belongs to svm range
registered in process svms objects tree. Scheduled restore work to
update GPU page table using new pages address in the updated svm range.

The restore worker flushes any deferred work to make sure it restores
an up-to-date svm_range_list.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   2 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   1 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 145 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   2 +
 4 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 4101f5341ec9..7d70af26b5c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -738,6 +738,8 @@ struct svm_range_list {
struct work_struct  deferred_list_work;
struct list_headdeferred_range_list;
spinlock_t  deferred_list_lock;
+   atomic_tevicted_ranges;
+   struct delayed_work restore_work;
 };
 
 /* Process data */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 4d7a67141190..321895d7555a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct 
mmu_notifier *mn,
 
cancel_delayed_work_sync(>eviction_work);
cancel_delayed_work_sync(>restore_work);
+   cancel_delayed_work_sync(>svms.restore_work);
 
mutex_lock(>mutex);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 1244ba380292..55828d6fbea9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -21,6 +21,7 @@
  */
 
 #include 
+#include 
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -28,6 +29,8 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1
+
 static bool
 svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
@@ -226,6 +229,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
INIT_LIST_HEAD(>insert_list);
INIT_LIST_HEAD(>deferred_list);
INIT_LIST_HEAD(>child_list);
+   atomic_set(>invalid, 0);
svm_range_set_default_attributes(>preferred_loc,
 >prefetch_loc,
 >granularity, >flags);
@@ -282,6 +286,9 @@ svm_range_validate(struct mm_struct *mm, struct svm_range 
*prange)
 
r = svm_range_validate_ram(mm, prange);
 
+   pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d invalid %d\n", prange->svms,
+prange->start, prange->last, r, atomic_read(>invalid));
+
return r;
 }
 
@@ -886,6 +893,134 @@ svm_range_list_lock_and_flush_work(struct svm_range_list 
*svms,
goto retry_flush_work;
 }
 
+static void svm_range_restore_work(struct work_struct *work)
+{
+   struct delayed_work *dwork = to_delayed_work(work);
+   struct amdkfd_process_info *process_info;
+   struct svm_range_list *svms;
+   struct svm_range *prange;
+   struct kfd_process *p;
+   struct mm_struct *mm;
+   int evicted_ranges;
+   int invalid;
+   int r;
+
+   svms = container_of(dwork, struct svm_range_list, restore_work);
+   evicted_ranges = atomic_read(>evicted_ranges);
+   if (!evicted_ranges)
+   return;
+
+   pr_debug("restore svm ranges\n");
+
+   /* kfd_process_notifier_release destroys this worker thread. So during
+* the lifetime of this thread, kfd_process and mm will be valid.
+*/
+   p = container_of(svms, struct kfd_process, svms);
+   process_info = p->kgd_process_info;
+   mm = p->mm;
+   if (!mm)
+   return;
+
+   mutex_lock(_info->lock);
+   svm_range_list_lock_and_flush_work(svms, mm);
+   mutex_lock(>lock);
+
+   list_for_each_entry(prange, >list, list) {
+   invalid = atomic_read(>invalid);
+   if (!invalid)
+   continue;
+
+   pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n",
+prange->svms, prange, prange->start, prange->last,
+invalid);
+
+   r = svm_range_validate(mm, prange);
+   if (r) {
+   pr_debug("failed %d to validate [0x%lx 0x%lx]\n", r,
+prange->start, prange->last);
+
+   goto unlock_out;
+   }
+
+   r = 

[PATCH 07/44] drm/amdkfd: validate svm range system memory

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Use HMM to get system memory pages address, which will be used to
map to GPUs or migrate to vram.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 ++
 2 files changed, 106 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index e57103a9025e..6024caf7373f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -28,6 +28,15 @@
 #include "kfd_priv.h"
 #include "kfd_svm.h"
 
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+   const struct mmu_notifier_range *range,
+   unsigned long cur_seq);
+
+static const struct mmu_interval_notifier_ops svm_range_mn_ops = {
+   .invalidate = svm_range_cpu_invalidate_pagetables,
+};
+
 /**
  * svm_range_unlink - unlink svm_range from lists and interval tree
  * @prange: svm range structure to be removed
@@ -46,6 +55,18 @@ static void svm_range_unlink(struct svm_range *prange)
interval_tree_remove(>it_node, >svms->objects);
 }
 
+static void
+svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange)
+{
+   pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms,
+prange, prange->start, prange->last);
+
+   mmu_interval_notifier_insert_locked(>notifier, mm,
+prange->start << PAGE_SHIFT,
+prange->npages << PAGE_SHIFT,
+_range_mn_ops);
+}
+
 /**
  * svm_range_add_to_svms - add svm range to svms
  * @prange: svm range structure to be added
@@ -65,6 +86,18 @@ static void svm_range_add_to_svms(struct svm_range *prange)
interval_tree_insert(>it_node, >svms->objects);
 }
 
+static void svm_range_remove_notifier(struct svm_range *prange)
+{
+   pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n",
+prange->svms, prange,
+prange->notifier.interval_tree.start >> PAGE_SHIFT,
+prange->notifier.interval_tree.last >> PAGE_SHIFT);
+
+   if (prange->notifier.interval_tree.start != 0 &&
+   prange->notifier.interval_tree.last != 0)
+   mmu_interval_notifier_remove(>notifier);
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
@@ -112,6 +145,56 @@ svm_range *svm_range_new(struct svm_range_list *svms, 
uint64_t start,
return prange;
 }
 
+/**
+ * svm_range_validate_ram - get system memory pages of svm range
+ *
+ * @mm: the mm_struct of process
+ * @prange: the range struct
+ *
+ * After mapping system memory to GPU, system memory maybe invalidated anytime
+ * during application running, we use HMM callback to sync GPU with CPU page
+ * table update, so we don't need use lock to prevent CPU invalidation and 
check
+ * hmm_range_get_pages_done return value.
+ *
+ * Return:
+ * 0 - OK, otherwise error code
+ */
+static int
+svm_range_validate_ram(struct mm_struct *mm, struct svm_range *prange)
+{
+   int r;
+
+   r = amdgpu_hmm_range_get_pages(>notifier, mm, NULL,
+  prange->start << PAGE_SHIFT,
+  prange->npages, >hmm_range,
+  false, true);
+   if (r) {
+   pr_debug("failed %d to get svm range pages\n", r);
+   return r;
+   }
+
+   kvfree(prange->pages_addr);
+   prange->pages_addr = prange->hmm_range->hmm_pfns;
+   prange->hmm_range->hmm_pfns = NULL;
+
+   amdgpu_hmm_range_get_pages_done(prange->hmm_range);
+   prange->hmm_range = NULL;
+
+   return 0;
+}
+
+static int
+svm_range_validate(struct mm_struct *mm, struct svm_range *prange)
+{
+   int r = 0;
+
+   pr_debug("actual loc 0x%x\n", prange->actual_loc);
+
+   r = svm_range_validate_ram(mm, prange);
+
+   return r;
+}
+
 static int
 svm_range_check_attr(struct kfd_process *p,
 uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs)
@@ -581,6 +664,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, 
struct svm_range *new,
return r;
 }
 
+/**
+ * svm_range_cpu_invalidate_pagetables - interval notifier callback
+ *
+ */
+static bool
+svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni,
+   const struct mmu_notifier_range *range,
+   unsigned long cur_seq)
+{
+   return true;
+}
+
 void svm_range_list_fini(struct kfd_process *p)
 {
pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, >svms);
@@ -732,6 +827,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, 
uint64_t size,
/* Apply changes as a 

[PATCH 10/44] drm/amdkfd: map svm range to GPUs

2021-03-22 Thread Felix Kuehling
Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap
svm range system memory pages address to GPUs.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 395 +--
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |   4 +
 2 files changed, 374 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index e23171ac866a..1244ba380292 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -98,11 +98,99 @@ static void svm_range_remove_notifier(struct svm_range 
*prange)
mmu_interval_notifier_remove(>notifier);
 }
 
+static int
+svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr,
+ unsigned long *pages_addr, uint64_t npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   dma_addr_t *addr = *dma_addr;
+   struct page *page;
+   int i, r;
+
+   if (!pages_addr)
+   return 0;
+
+   if (!addr) {
+   addr = kvmalloc_array(npages, sizeof(*addr),
+ GFP_KERNEL | __GFP_ZERO);
+   if (!addr)
+   return -ENOMEM;
+   *dma_addr = addr;
+   }
+
+   for (i = 0; i < npages; i++) {
+   if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]),
+ "leaking dma mapping\n"))
+   dma_unmap_page(dev, addr[i], PAGE_SIZE, dir);
+
+   page = hmm_pfn_to_page(pages_addr[i]);
+   addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir);
+   r = dma_mapping_error(dev, addr[i]);
+   if (r) {
+   pr_debug("failed %d dma_map_page\n", r);
+   return r;
+   }
+   pr_debug("dma mapping 0x%llx for page addr 0x%lx\n",
+addr[i] >> PAGE_SHIFT, page_to_pfn(page));
+   }
+   return 0;
+}
+
+void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
+unsigned long offset, unsigned long npages)
+{
+   enum dma_data_direction dir = DMA_BIDIRECTIONAL;
+   int i;
+
+   if (!dma_addr)
+   return;
+
+   for (i = offset; i < offset + npages; i++) {
+   if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i]))
+   continue;
+   pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT);
+   dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir);
+   dma_addr[i] = 0;
+   }
+}
+
+static void svm_range_free_dma_mappings(struct svm_range *prange)
+{
+   DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
+   struct kfd_dev *kfd_dev;
+   dma_addr_t *dma_addr;
+   struct device *dev;
+   struct kfd_process *p;
+   uint32_t gpuidx;
+   int r;
+
+   p = container_of(prange->svms, struct kfd_process, svms);
+   bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
+
+   for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
+   dma_addr = prange->dma_addr[gpuidx];
+   if (!dma_addr)
+   continue;
+
+   r = kfd_process_device_from_gpuidx(p, gpuidx, _dev);
+   if (r) {
+   pr_debug("failed to find device idx %d\n", gpuidx);
+   return;
+   }
+   dev = _dev->pdev->dev;
+   svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+   kvfree(dma_addr);
+   prange->dma_addr[gpuidx] = NULL;
+   }
+}
+
 static void svm_range_free(struct svm_range *prange)
 {
pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange,
 prange->start, prange->last);
 
+   svm_range_free_dma_mappings(prange);
kvfree(prange->pages_addr);
kfree(prange);
 }
@@ -342,41 +430,62 @@ svm_range_is_same_attrs(struct svm_range *old, struct 
svm_range *new)
 }
 
 static int
-svm_range_split_pages(struct svm_range *new, struct svm_range *old,
- uint64_t start, uint64_t last)
+svm_range_split_array(void *ppnew, void *ppold, size_t size,
+ uint64_t old_start, uint64_t old_n,
+ uint64_t new_start, uint64_t new_n)
 {
-   unsigned long old_start;
-   unsigned long *pages_addr;
+   unsigned char *new, *old, *pold;
uint64_t d;
 
-   old_start = old->start;
-   new->pages_addr = kvmalloc_array(new->npages,
-sizeof(*new->pages_addr),
-GFP_KERNEL | __GFP_ZERO);
-   if (!new->pages_addr)
-   return -ENOMEM;
+   if (!ppold)
+   return 0;
+   pold = *(unsigned char **)ppold;
+   if (!pold)
+   

[PATCH 14/44] drm/amdkfd: register HMM device private zone

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to
allocate vram backing pages for page migration.

Signed-off-by: Philip Yang 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c |   4 +
 drivers/gpu/drm/amd/amdkfd/Kconfig |   1 +
 drivers/gpu/drm/amd/amdkfd/Makefile|   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c   | 103 +
 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h   |  48 ++
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |   3 +
 6 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 5f6696a3c778..eb40b3879a0b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,6 +30,7 @@
 #include 
 #include "amdgpu_xgmi.h"
 #include 
+#include "kfd_migrate.h"
 
 /* Total memory size in system memory and all GPU VRAM. Used to
  * estimate worst case amount of memory to reserve for page tables
@@ -167,12 +168,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 
adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
adev_to_drm(adev), 
_resources);
+   if (adev->kfd.init_complete)
+   svm_migrate_init(adev);
}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
if (adev->kfd.dev) {
+   svm_migrate_fini(adev);
kgd2kfd_device_exit(adev->kfd.dev);
adev->kfd.dev = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig 
b/drivers/gpu/drm/amd/amdkfd/Kconfig
index f02c938f75da..7880fc101a3b 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -8,6 +8,7 @@ config HSA_AMD
depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64)
imply AMD_IOMMU_V2 if X86_64
select HMM_MIRROR
+   select DEVICE_PRIVATE
select MMU_NOTIFIER
select DRM_AMDGPU_USERPTR
help
diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 387ce0217d35..a93301dbc464 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -55,7 +55,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
$(AMDKFD_PATH)/kfd_crat.o \
-   $(AMDKFD_PATH)/kfd_svm.o
+   $(AMDKFD_PATH)/kfd_svm.o \
+   $(AMDKFD_PATH)/kfd_migrate.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
new file mode 100644
index ..4bb39c562665
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2020 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include "amdgpu_sync.h"
+#include "amdgpu_object.h"
+#include "amdgpu_vm.h"
+#include "amdgpu_mn.h"
+#include "kfd_priv.h"
+#include "kfd_svm.h"
+#include "kfd_migrate.h"
+
+static void svm_migrate_page_free(struct page *page)
+{
+}
+
+/**
+ * svm_migrate_to_ram - CPU page fault handler
+ * @vmf: CPU vm fault vma, address
+ *
+ * Context: vm fault handler, mm->mmap_sem is taken
+ *
+ * Return:
+ * 0 - OK
+ * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault
+ */
+static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf)
+{
+   return VM_FAULT_SIGBUS;
+}
+
+static const struct dev_pagemap_ops svm_migrate_pgmap_ops = {
+   .page_free  = svm_migrate_page_free,
+   

[PATCH 04/44] drm/amdkfd: register svm range

2021-03-22 Thread Felix Kuehling
From: Philip Yang 

svm range structure stores the range start address, size, attributes,
flags, prefetch location and gpu bitmap which indicates which GPU this
range maps to. Same virtual address is shared by CPU and GPUs.

Process has svm range list which uses both interval tree and list to
store all svm ranges registered by the process. Interval tree is used by
GPU vm fault handler and CPU page fault handler to get svm range
structure from the specific address. List is used to scan all ranges in
eviction restore work.

No overlap range interval [start, last] exist in svms object interval
tree. If process registers new range which has overlap with old range,
the old range split into 2 ranges depending on the overlap happens at
head or tail part of old range.

Apply attributes preferred location, prefetch location, mapping flags,
migration granularity to svm range, store mapping gpu index into bitmap.

Signed-off-by: Philip Yang 
Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/Makefile  |   3 +-
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  17 +
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|   8 +
 drivers/gpu/drm/amd/amdkfd/kfd_process.c |   9 +
 drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 791 +++
 drivers/gpu/drm/amd/amdkfd/kfd_svm.h |  87 +++
 6 files changed, 914 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c
 create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index e1e4115dcf78..387ce0217d35 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -54,7 +54,8 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_dbgdev.o \
$(AMDKFD_PATH)/kfd_dbgmgr.o \
$(AMDKFD_PATH)/kfd_smi_events.o \
-   $(AMDKFD_PATH)/kfd_crat.o
+   $(AMDKFD_PATH)/kfd_crat.o \
+   $(AMDKFD_PATH)/kfd_svm.o
 
 ifneq ($(CONFIG_AMD_IOMMU_V2),)
 AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index dbc824cc6b32..9511826ac8ae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -38,6 +38,7 @@
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
+#include "kfd_svm.h"
 #include "amdgpu_amdkfd.h"
 #include "kfd_smi_events.h"
 
@@ -1744,11 +1745,27 @@ static int kfd_ioctl_smi_events(struct file *filep,
 
 static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data)
 {
+   struct kfd_ioctl_svm_args *args = data;
int r = 0;
 
if (p->svm_disabled)
return -EPERM;
 
+   pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n",
+args->start_addr, args->size, args->op, args->nattr);
+
+   if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK))
+   return -EINVAL;
+   if (!args->start_addr || !args->size)
+   return -EINVAL;
+
+   mutex_lock(>mutex);
+
+   r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr,
+ args->attrs);
+
+   mutex_unlock(>mutex);
+
return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 18fc2ccd1a77..f547e1282d69 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -731,6 +731,12 @@ struct kfd_process_device {
 
 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
 
+struct svm_range_list {
+   struct mutexlock;
+   struct rb_root_cached   objects;
+   struct list_headlist;
+};
+
 /* Process data */
 struct kfd_process {
/*
@@ -810,6 +816,8 @@ struct kfd_process {
struct kobject *kobj_queues;
struct attribute attr_pasid;
 
+   /* shared virtual memory registered by this process */
+   struct svm_range_list svms;
bool svm_disabled;
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 89e7c125d334..4d7a67141190 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -35,6 +35,7 @@
 #include 
 #include "amdgpu_amdkfd.h"
 #include "amdgpu.h"
+#include "kfd_svm.h"
 
 struct mm_struct;
 
@@ -42,6 +43,7 @@ struct mm_struct;
 #include "kfd_device_queue_manager.h"
 #include "kfd_dbgmgr.h"
 #include "kfd_iommu.h"
+#include "kfd_svm.h"
 
 /*
  * List of struct kfd_process (field kfd_process).
@@ -1003,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct 
*work)
kfd_iommu_unbind_process(p);
 
kfd_process_free_outstanding_kfd_bos(p);
+   svm_range_list_fini(p);
 
kfd_process_destroy_pdds(p);

[PATCH 02/44] drm/amdkfd: helper to convert gpu id and idx

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

svm range uses gpu bitmap to store which GPU svm range maps to.
Application pass driver gpu id to specify GPU, the helper is needed to
convert gpu id to gpu bitmap idx.

Access through kfd_process_device pointers array from kfd_process.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  5 
 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 30 
 2 files changed, 35 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 33e56db14327..99b4624ef4c7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -842,6 +842,11 @@ struct kfd_process *kfd_create_process(struct file *filep);
 struct kfd_process *kfd_get_process(const struct task_struct *);
 struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid);
 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
+int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+   uint32_t gpu_idx, uint32_t *gpuid);
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id);
+int kfd_process_device_from_gpuidx(struct kfd_process *p,
+   uint32_t gpu_idx, struct kfd_dev **gpu);
 void kfd_unref_process(struct kfd_process *p);
 int kfd_process_evict_queues(struct kfd_process *p);
 int kfd_process_restore_queues(struct kfd_process *p);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index d4241d29ea94..028ebb0deddd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -1607,6 +1607,36 @@ int kfd_process_restore_queues(struct kfd_process *p)
return ret;
 }
 
+int kfd_process_gpuid_from_gpuidx(struct kfd_process *p,
+   uint32_t gpu_idx, uint32_t *gpuid)
+{
+   if (gpu_idx < p->n_pdds) {
+   *gpuid = p->pdds[gpu_idx]->dev->id;
+   return 0;
+   }
+   return -EINVAL;
+}
+
+int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id)
+{
+   int i;
+
+   for (i = 0; i < p->n_pdds; i++)
+   if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id)
+   return i;
+   return -EINVAL;
+}
+
+int kfd_process_device_from_gpuidx(struct kfd_process *p,
+   uint32_t gpu_idx, struct kfd_dev **gpu)
+{
+   if (gpu_idx < p->n_pdds) {
+   *gpu = p->pdds[gpu_idx]->dev;
+   return 0;
+   }
+   return -EINVAL;
+}
+
 static void evict_process_worker(struct work_struct *work)
 {
int ret;
-- 
2.31.0

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH 01/44] drm/amdgpu: replace per_device_list by array

2021-03-22 Thread Felix Kuehling
From: Alex Sierra 

Remove per_device_list from kfd_process and replace it with a
kfd_process_device pointers array of MAX_GPU_INSTANCES size. This helps
to manage the kfd_process_devices binded to a specific kfd_process.
Also, functions used by kfd_chardev to iterate over the list were
removed, since they are not valid anymore. Instead, it was replaced by a
local loop iterating the array.

Signed-off-by: Alex Sierra 
Signed-off-by: Felix Kuehling 
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  | 116 --
 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|   8 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  20 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 108 
 .../amd/amdkfd/kfd_process_queue_manager.c|   6 +-
 5 files changed, 111 insertions(+), 147 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 6802c616e10e..43de260b2230 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -870,52 +870,47 @@ static int kfd_ioctl_get_process_apertures(struct file 
*filp,
 {
struct kfd_ioctl_get_process_apertures_args *args = data;
struct kfd_process_device_apertures *pAperture;
-   struct kfd_process_device *pdd;
+   int i;
 
dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid);
 
args->num_of_nodes = 0;
 
mutex_lock(>mutex);
+   /* Run over all pdd of the process */
+   for (i = 0; i < p->n_pdds; i++) {
+   struct kfd_process_device *pdd = p->pdds[i];
+
+   pAperture =
+   >process_apertures[args->num_of_nodes];
+   pAperture->gpu_id = pdd->dev->id;
+   pAperture->lds_base = pdd->lds_base;
+   pAperture->lds_limit = pdd->lds_limit;
+   pAperture->gpuvm_base = pdd->gpuvm_base;
+   pAperture->gpuvm_limit = pdd->gpuvm_limit;
+   pAperture->scratch_base = pdd->scratch_base;
+   pAperture->scratch_limit = pdd->scratch_limit;
 
-   /*if the process-device list isn't empty*/
-   if (kfd_has_process_device_data(p)) {
-   /* Run over all pdd of the process */
-   pdd = kfd_get_first_process_device_data(p);
-   do {
-   pAperture =
-   >process_apertures[args->num_of_nodes];
-   pAperture->gpu_id = pdd->dev->id;
-   pAperture->lds_base = pdd->lds_base;
-   pAperture->lds_limit = pdd->lds_limit;
-   pAperture->gpuvm_base = pdd->gpuvm_base;
-   pAperture->gpuvm_limit = pdd->gpuvm_limit;
-   pAperture->scratch_base = pdd->scratch_base;
-   pAperture->scratch_limit = pdd->scratch_limit;
-
-   dev_dbg(kfd_device,
-   "node id %u\n", args->num_of_nodes);
-   dev_dbg(kfd_device,
-   "gpu id %u\n", pdd->dev->id);
-   dev_dbg(kfd_device,
-   "lds_base %llX\n", pdd->lds_base);
-   dev_dbg(kfd_device,
-   "lds_limit %llX\n", pdd->lds_limit);
-   dev_dbg(kfd_device,
-   "gpuvm_base %llX\n", pdd->gpuvm_base);
-   dev_dbg(kfd_device,
-   "gpuvm_limit %llX\n", pdd->gpuvm_limit);
-   dev_dbg(kfd_device,
-   "scratch_base %llX\n", pdd->scratch_base);
-   dev_dbg(kfd_device,
-   "scratch_limit %llX\n", pdd->scratch_limit);
-
-   args->num_of_nodes++;
-
-   pdd = kfd_get_next_process_device_data(p, pdd);
-   } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS));
-   }
+   dev_dbg(kfd_device,
+   "node id %u\n", args->num_of_nodes);
+   dev_dbg(kfd_device,
+   "gpu id %u\n", pdd->dev->id);
+   dev_dbg(kfd_device,
+   "lds_base %llX\n", pdd->lds_base);
+   dev_dbg(kfd_device,
+   "lds_limit %llX\n", pdd->lds_limit);
+   dev_dbg(kfd_device,
+   "gpuvm_base %llX\n", pdd->gpuvm_base);
+   dev_dbg(kfd_device,
+   "gpuvm_limit %llX\n", pdd->gpuvm_limit);
+   dev_dbg(kfd_device,
+   "scratch_base %llX\n", pdd->scratch_base);
+   dev_dbg(kfd_device,
+   "scratch_limit %llX\n", pdd->scratch_limit);
 
+   if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS)
+   break;
+   }
mutex_unlock(>mutex);
 
return 0;
@@ -926,9 +921,8 @@ static 

Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages

2021-03-22 Thread Daniel Gomez
On Mon, 22 Mar 2021 at 11:34, Christian König
 wrote:
>
> Hi Daniel,
>
> Am 22.03.21 um 10:38 schrieb Daniel Gomez:
> > On Fri, 19 Mar 2021 at 21:29, Felix Kuehling  wrote:
> >> This caused a regression in kfdtest in a large-buffer stress test after
> >> memory allocation for user pages fails:
> > I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and
> > not this one.
> > Just some background for the mem leak patch if helps to understand this:
> > The leak was introduce here:
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb
> > where the bound status was introduced for all drm drivers including
> > radeon and amdgpu. So this patch just reverts the logic to the
> > original code but keeping the bound status. In my case, the binding
> > code allocates the user pages memory and returns without bounding (at
> > amdgpu_gtt_mgr_has_gart_addr). So,
> > when the unbinding happens, the memory needs to be cleared to prevent the 
> > leak.
>
> Ah, now I understand what's happening here. Daniel your patch is not
> really correct.
>
> The problem is rather that we don't set the tt object to bound if it
> doesn't have a GTT address.
Okay, I understand.
>
> Going to provide a patch for this.
Looking forward to your patch. Thanks Christian!
>
> Regards,
> Christian.
>
> >
> >> [17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16
> >> [17359.543746] BUG: kernel NULL pointer dereference, address: 
> >> 
> >> [17359.551494] #PF: supervisor read access in kernel mode
> >> [17359.557375] #PF: error_code(0x) - not-present page
> >> [17359.563247] PGD 0 P4D 0
> >> [17359.566514] Oops:  [#1] SMP PTI
> >> [17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted 
> >> 5.11.0-kfd-fkuehlin #193
> >> [17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 
> >> 06/17/2016
> >> [17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
> >> [17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 
> >> 00 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 
> >> <48> 8b 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45
> >> [17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206
> >> [17359.620315] RAX:  RBX: 950e8d4edf00 RCX: 
> >> 
> >> [17359.628204] RDX:  RSI: 950e8d4edf00 RDI: 
> >> 950eadec5e80
> >> [17359.636084] RBP: 950eadec5e80 R08:  R09: 
> >> 
> >> [17359.643958] R10: 0246 R11: 0001 R12: 
> >> 950c03377800
> >> [17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: 
> >> 
> >> [17359.659701] FS:  7febb20cb740() GS:950ebfc0() 
> >> knlGS:
> >> [17359.668528] CS:  0010 DS:  ES:  CR0: 80050033
> >> [17359.675012] CR2:  CR3: 0006d700e005 CR4: 
> >> 001706e0
> >> [17359.682883] Call Trace:
> >> [17359.686063]  amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
> >> [17359.692349]  ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm]
> >> [17359.698307]  ttm_bo_release+0x278/0x5e0 [ttm]
> >> [17359.703385]  amdgpu_bo_unref+0x1a/0x30 [amdgpu]
> >> [17359.708701]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 
> >> [amdgpu]
> >> [17359.716307]  kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu]
> >> [17359.723036]  kfd_ioctl+0x223/0x400 [amdgpu]
> >> [17359.728017]  ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
> >> [17359.734152]  __x64_sys_ioctl+0x8b/0xd0
> >> [17359.738796]  do_syscall_64+0x2d/0x40
> >> [17359.743259]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> >> [17359.749205] RIP: 0033:0x7febb083b6d7
> >> [17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 
> >> c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 
> >> <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48
> >> [17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: 
> >> 0010
> >> [17359.782668] RAX: ffda RBX: 0001 RCX: 
> >> 7febb083b6d7
> >> [17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: 
> >> 0003
> >> [17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: 
> >> c404
> >> [17359.806352] R10:  R11: 0202 R12: 
> >> 559416e4e2aa
> >> [17359.814251] R13:  R14: 0021 R15: 
> >> 
> >> [17359.822140] Modules linked in: ip6table_filter ip6_tables 
> >> iptable_filter amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 
> >> gpu_sched ip_tables x_tables
> >> [17359.837776] CR2: 
> >> [17359.841888] ---[ end trace a6f27d64475b28c8 ]---
> >> [17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
> >> [17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 
> >> 00 

Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages

2021-03-22 Thread Christian König

Hi Daniel,

Am 22.03.21 um 10:38 schrieb Daniel Gomez:

On Fri, 19 Mar 2021 at 21:29, Felix Kuehling  wrote:

This caused a regression in kfdtest in a large-buffer stress test after
memory allocation for user pages fails:

I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and
not this one.
Just some background for the mem leak patch if helps to understand this:
The leak was introduce here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb
where the bound status was introduced for all drm drivers including
radeon and amdgpu. So this patch just reverts the logic to the
original code but keeping the bound status. In my case, the binding
code allocates the user pages memory and returns without bounding (at
amdgpu_gtt_mgr_has_gart_addr). So,
when the unbinding happens, the memory needs to be cleared to prevent the leak.


Ah, now I understand what's happening here. Daniel your patch is not 
really correct.


The problem is rather that we don't set the tt object to bound if it 
doesn't have a GTT address.


Going to provide a patch for this.

Regards,
Christian.




[17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16
[17359.543746] BUG: kernel NULL pointer dereference, address: 
[17359.551494] #PF: supervisor read access in kernel mode
[17359.557375] #PF: error_code(0x) - not-present page
[17359.563247] PGD 0 P4D 0
[17359.566514] Oops:  [#1] SMP PTI
[17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted 5.11.0-kfd-fkuehlin 
#193
[17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 
06/17/2016
[17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
[17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 00 85 
c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 30 48 85 
f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45
[17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206
[17359.620315] RAX:  RBX: 950e8d4edf00 RCX: 
[17359.628204] RDX:  RSI: 950e8d4edf00 RDI: 950eadec5e80
[17359.636084] RBP: 950eadec5e80 R08:  R09: 
[17359.643958] R10: 0246 R11: 0001 R12: 950c03377800
[17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: 
[17359.659701] FS:  7febb20cb740() GS:950ebfc0() 
knlGS:
[17359.668528] CS:  0010 DS:  ES:  CR0: 80050033
[17359.675012] CR2:  CR3: 0006d700e005 CR4: 001706e0
[17359.682883] Call Trace:
[17359.686063]  amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
[17359.692349]  ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm]
[17359.698307]  ttm_bo_release+0x278/0x5e0 [ttm]
[17359.703385]  amdgpu_bo_unref+0x1a/0x30 [amdgpu]
[17359.708701]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 [amdgpu]
[17359.716307]  kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu]
[17359.723036]  kfd_ioctl+0x223/0x400 [amdgpu]
[17359.728017]  ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
[17359.734152]  __x64_sys_ioctl+0x8b/0xd0
[17359.738796]  do_syscall_64+0x2d/0x40
[17359.743259]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
[17359.749205] RIP: 0033:0x7febb083b6d7
[17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff 
ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff 
ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48
[17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: 
0010
[17359.782668] RAX: ffda RBX: 0001 RCX: 7febb083b6d7
[17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: 0003
[17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: c404
[17359.806352] R10:  R11: 0202 R12: 559416e4e2aa
[17359.814251] R13:  R14: 0021 R15: 
[17359.822140] Modules linked in: ip6table_filter ip6_tables iptable_filter 
amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 gpu_sched ip_tables 
x_tables
[17359.837776] CR2: 
[17359.841888] ---[ end trace a6f27d64475b28c8 ]---
[17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
[17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 00 85 
c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 30 48 85 
f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45
[17359.874929] RSP: 0018:a4764971fc98 EFLAGS: 00010206
[17359.881014] RAX:  RBX: 950e8d4edf00 RCX: 
[17359.889007] RDX:  RSI: 950e8d4edf00 RDI: 950eadec5e80
[17359.897008] RBP: 950eadec5e80 R08:  R09: 
[17359.905020] R10: 0246 R11: 0001 R12: 950c03377800
[17359.913034] R13: 950eadec5e80 

Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages

2021-03-22 Thread Daniel Gomez
On Fri, 19 Mar 2021 at 21:29, Felix Kuehling  wrote:
>
> This caused a regression in kfdtest in a large-buffer stress test after
> memory allocation for user pages fails:

I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and
not this one.
Just some background for the mem leak patch if helps to understand this:
The leak was introduce here:
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb
where the bound status was introduced for all drm drivers including
radeon and amdgpu. So this patch just reverts the logic to the
original code but keeping the bound status. In my case, the binding
code allocates the user pages memory and returns without bounding (at
amdgpu_gtt_mgr_has_gart_addr). So,
when the unbinding happens, the memory needs to be cleared to prevent the leak.

>
> [17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16
> [17359.543746] BUG: kernel NULL pointer dereference, address: 
> [17359.551494] #PF: supervisor read access in kernel mode
> [17359.557375] #PF: error_code(0x) - not-present page
> [17359.563247] PGD 0 P4D 0
> [17359.566514] Oops:  [#1] SMP PTI
> [17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted 5.11.0-kfd-fkuehlin 
> #193
> [17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 
> 06/17/2016
> [17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
> [17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 
> 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 
> 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45
> [17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206
> [17359.620315] RAX:  RBX: 950e8d4edf00 RCX: 
> 
> [17359.628204] RDX:  RSI: 950e8d4edf00 RDI: 
> 950eadec5e80
> [17359.636084] RBP: 950eadec5e80 R08:  R09: 
> 
> [17359.643958] R10: 0246 R11: 0001 R12: 
> 950c03377800
> [17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: 
> 
> [17359.659701] FS:  7febb20cb740() GS:950ebfc0() 
> knlGS:
> [17359.668528] CS:  0010 DS:  ES:  CR0: 80050033
> [17359.675012] CR2:  CR3: 0006d700e005 CR4: 
> 001706e0
> [17359.682883] Call Trace:
> [17359.686063]  amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu]
> [17359.692349]  ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm]
> [17359.698307]  ttm_bo_release+0x278/0x5e0 [ttm]
> [17359.703385]  amdgpu_bo_unref+0x1a/0x30 [amdgpu]
> [17359.708701]  amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 [amdgpu]
> [17359.716307]  kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu]
> [17359.723036]  kfd_ioctl+0x223/0x400 [amdgpu]
> [17359.728017]  ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu]
> [17359.734152]  __x64_sys_ioctl+0x8b/0xd0
> [17359.738796]  do_syscall_64+0x2d/0x40
> [17359.743259]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
> [17359.749205] RIP: 0033:0x7febb083b6d7
> [17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 c7 
> c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 
> 01 f0 ff ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48
> [17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: 
> 0010
> [17359.782668] RAX: ffda RBX: 0001 RCX: 
> 7febb083b6d7
> [17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: 
> 0003
> [17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: 
> c404
> [17359.806352] R10:  R11: 0202 R12: 
> 559416e4e2aa
> [17359.814251] R13:  R14: 0021 R15: 
> 
> [17359.822140] Modules linked in: ip6table_filter ip6_tables iptable_filter 
> amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 gpu_sched ip_tables 
> x_tables
> [17359.837776] CR2: 
> [17359.841888] ---[ end trace a6f27d64475b28c8 ]---
> [17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu]
> [17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 
> 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 
> 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45
> [17359.874929] RSP: 0018:a4764971fc98 EFLAGS: 00010206
> [17359.881014] RAX:  RBX: 950e8d4edf00 RCX: 
> 
> [17359.889007] RDX:  RSI: 950e8d4edf00 RDI: 
> 950eadec5e80
> [17359.897008] RBP: 950eadec5e80 R08:  R09: 
> 
> [17359.905020] R10: 0246 R11: 0001 R12: 
> 950c03377800
> [17359.913034] R13: 950eadec5e80 R14: 950c03377858 R15: 
> 
> [17359.921050] FS:  7febb20cb740() GS:950ebfc0() 
> 

Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.

2021-03-22 Thread Michel Dänzer
On 2021-03-20 1:31 a.m., R, Bindu wrote:
> 
> The Update patch has been submitted.

Submitted where? Still can't see it.


-- 
Earthling Michel Dänzer   |   https://redhat.com
Libre software enthusiast | Mesa and X developer
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: fix modprobe failure on vega series

2021-03-22 Thread Guchun Chen
Fixes: d88b34caee83 ("Remove some large variables from the stack")

[   41.232097] Call Trace:
[   41.232105]  kvasprintf+0x66/0xd0
[   41.232122]  kasprintf+0x49/0x70
[   41.232136]  __drm_crtc_init_with_planes+0x2e1/0x340 [drm]
[   41.232219]  ? create_object+0x263/0x3b0
[   41.232231]  drm_crtc_init_with_planes+0x46/0x60 [drm]
[   41.232303]  amdgpu_dm_init+0x69c/0x1750 [amdgpu]
[   41.232998]  ? phm_wait_for_register_unequal.part.1+0x58/0x90 [amdgpu]
[   41.233662]  ? smu9_wait_for_response+0x7d/0xa0 [amdgpu]
[   41.234294]  ? smu9_send_msg_to_smc_with_parameter+0x77/0xd0 [amdgpu]
[   41.234912]  ? smum_send_msg_to_smc_with_parameter+0x96/0x100 [amdgpu]
[   41.235520]  ? psm_set_states+0x5c/0x60 [amdgpu]
[   41.236165]  dm_hw_init+0x12/0x20 [amdgpu]
[   41.236834]  amdgpu_device_init+0x1402/0x1df0 [amdgpu]
[   41.237314]  amdgpu_driver_load_kms+0x65/0x320 [amdgpu]
[   41.237780]  amdgpu_pci_probe+0x150/0x250 [amdgpu]
[   41.238240]  local_pci_probe+0x47/0xa0
[   41.238253]  pci_device_probe+0x10b/0x1c0
[   41.238265]  really_probe+0xf5/0x4c0
[   41.238275]  driver_probe_device+0xe8/0x150
[   41.238284]  device_driver_attach+0x58/0x60
[   41.238293]  __driver_attach+0xa3/0x140
[   41.238301]  ? device_driver_attach+0x60/0x60
[   41.238309]  ? device_driver_attach+0x60/0x60
[   41.238317]  bus_for_each_dev+0x74/0xb0
[   41.238330]  ? kmem_cache_alloc_trace+0x31a/0x470
[   41.238341]  driver_attach+0x1e/0x20
[   41.238348]  bus_add_driver+0x14a/0x220
[   41.238357]  ? 0xc0f09000
[   41.238364]  driver_register+0x60/0x100
[   41.238373]  ? 0xc0f09000
[   41.238379]  __pci_register_driver+0x54/0x60
[   41.238389]  amdgpu_init+0x68/0x1000 [amdgpu]
[   41.238836]  do_one_initcall+0x48/0x1e0
[   41.238850]  ? kmem_cache_alloc_trace+0x31a/0x470
[   41.238862]  do_init_module+0x5f/0x224
[   41.238876]  load_module+0x266b/0x2ec0
[   41.238887]  ? security_kernel_post_read_file+0x5c/0x70
[   41.238905]  __do_sys_finit_module+0xc1/0x120
[   41.238913]  ? __do_sys_finit_module+0xc1/0x120
[   41.238924]  __x64_sys_finit_module+0x1a/0x20
[   41.238932]  do_syscall_64+0x37/0x80
[   41.238942]  entry_SYSCALL_64_after_hwframe+0x44/0xa9

Signed-off-by: Guchun Chen 
---
 drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c 
b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
index 556ecfabc8d2..1244fcb0f446 100644
--- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
+++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c
@@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip,
 
enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id);
 
-   dceip = kzalloc(sizeof(dceip), GFP_KERNEL);
+   dceip = kzalloc(sizeof(*dceip), GFP_KERNEL);
if (!dceip)
return;
 
-   vbios = kzalloc(sizeof(vbios), GFP_KERNEL);
+   vbios = kzalloc(sizeof(*vbios), GFP_KERNEL);
if (!vbios) {
kfree(dceip);
return;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdgpu/swsmu: fix typo (memlk -> memclk)

2021-03-22 Thread Tobias Jakobi
- no functional changes

Signed-off-by: Tobias Jakobi 
---
 drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++--
 drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
index 6e641f1513d8..66d69c13f915 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
@@ -1433,7 +1433,7 @@ static int navi10_get_power_profile_mode(struct 
smu_context *smu, char *buf)
size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d 
%7d %7d %7d %7d\n",
" ",
2,
-   "MEMLK",
+   "MEMCLK",
activity_monitor.Mem_FPS,
activity_monitor.Mem_MinFreqStep,
activity_monitor.Mem_MinActiveFreqType,
@@ -1493,7 +1493,7 @@ static int navi10_set_power_profile_mode(struct 
smu_context *smu, long *input, u
activity_monitor.Soc_PD_Data_error_coeff = input[8];
activity_monitor.Soc_PD_Data_error_rate_coeff = 
input[9];
break;
-   case 2: /* Memlk */
+   case 2: /* Memclk */
activity_monitor.Mem_FPS = input[1];
activity_monitor.Mem_MinFreqStep = input[2];
activity_monitor.Mem_MinActiveFreqType = input[3];
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c 
b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index af73e1430af5..f21679acd9af 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -1366,7 +1366,7 @@ static int sienna_cichlid_get_power_profile_mode(struct 
smu_context *smu, char *
size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d 
%7d %7d %7d %7d\n",
" ",
2,
-   "MEMLK",
+   "MEMCLK",
activity_monitor->Mem_FPS,
activity_monitor->Mem_MinFreqStep,
activity_monitor->Mem_MinActiveFreqType,
@@ -1429,7 +1429,7 @@ static int sienna_cichlid_set_power_profile_mode(struct 
smu_context *smu, long *
activity_monitor->Fclk_PD_Data_error_coeff = input[8];
activity_monitor->Fclk_PD_Data_error_rate_coeff = 
input[9];
break;
-   case 2: /* Memlk */
+   case 2: /* Memclk */
activity_monitor->Mem_FPS = input[1];
activity_monitor->Mem_MinFreqStep = input[2];
activity_monitor->Mem_MinActiveFreqType = input[3];
-- 
2.26.2

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/radeon: don't evict if not initialized

2021-03-22 Thread Tong Zhang
Thanks,
Fixed as suggested and sent as v2.
- Tong

On Sun, Mar 21, 2021 at 9:26 AM Christian König
 wrote:
>
>
>
> Am 20.03.21 um 21:10 schrieb Tong Zhang:
> > TTM_PL_VRAM may not initialized at all when calling
> > radeon_bo_evict_vram(). We need to check before doing eviction.
> >
> > [2.160837] BUG: kernel NULL pointer dereference, address: 
> > 0020
> > [2.161212] #PF: supervisor read access in kernel mode
> > [2.161490] #PF: error_code(0x) - not-present page
> > [2.161767] PGD 0 P4D 0
> > [2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm]
> > [2.168506] Call Trace:
> > [2.168641]  radeon_bo_evict_vram+0x1c/0x20 [radeon]
> > [2.168936]  radeon_device_fini+0x28/0xf9 [radeon]
> > [2.169224]  radeon_driver_unload_kms+0x44/0xa0 [radeon]
> > [2.169534]  radeon_driver_load_kms+0x174/0x210 [radeon]
> > [2.169843]  drm_dev_register+0xd9/0x1c0 [drm]
> > [2.170104]  radeon_pci_probe+0x117/0x1a0 [radeon]
> >
> > Signed-off-by: Tong Zhang 
> > ---
> >   drivers/gpu/drm/radeon/radeon_object.c | 4 +++-
> >   1 file changed, 3 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
> > b/drivers/gpu/drm/radeon/radeon_object.c
> > index 9b81786782de..04e9a8118b0e 100644
> > --- a/drivers/gpu/drm/radeon/radeon_object.c
> > +++ b/drivers/gpu/drm/radeon/radeon_object.c
> > @@ -384,7 +384,9 @@ int radeon_bo_evict_vram(struct radeon_device *rdev)
> >   }
> >   #endif
> >   man = ttm_manager_type(bdev, TTM_PL_VRAM);
> > - return ttm_resource_manager_evict_all(bdev, man);
> > + if (man)
> > + return ttm_resource_manager_evict_all(bdev, man);
> > + return 0;
>
> You should probably code this the other way around, e.g.
>
> If (!man)
>  return 0;
> ...
>
> Apart from that looks good to me.
>
> Christian.
>
> >   }
> >
> >   void radeon_bo_force_delete(struct radeon_device *rdev)
>
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amdkfd: Fix cat debugfs hang_hws file causes system crash bug

2021-03-22 Thread Qu Huang
Here is the system crash log:
[ 1272.884438] BUG: unable to handle kernel NULL pointer dereference at
(null)
[ 1272.88] IP: [<  (null)>]   (null)
[ 1272.884447] PGD 825b09067 PUD 8267c8067 PMD 0
[ 1272.884452] Oops: 0010 [#1] SMP
[ 1272.884509] CPU: 13 PID: 3485 Comm: cat Kdump: loaded Tainted: G
[ 1272.884515] task: 9a38dbd4d140 ti: 9a37cd3b8000 task.ti:
9a37cd3b8000
[ 1272.884517] RIP: 0010:[<>]  [<  (null)>]
(null)
[ 1272.884520] RSP: 0018:9a37cd3bbe68  EFLAGS: 00010203
[ 1272.884522] RAX:  RBX:  RCX:
00014d5f
[ 1272.884524] RDX: fff4 RSI: 0001 RDI:
9a38aca4d200
[ 1272.884526] RBP: 9a37cd3bbed0 R08: 9a38dcd5f1a0 R09:
9a31ffc07300
[ 1272.884527] R10: 9a31ffc07300 R11: addd5e9d R12:
9a38b4e0fb00
[ 1272.884529] R13: 0001 R14: 9a37cd3bbf18 R15:
9a38aca4d200
[ 1272.884532] FS:  7feccaa67740() GS:9a38dcd4()
knlGS:
[ 1272.884534] CS:  0010 DS:  ES:  CR0: 80050033
[ 1272.884536] CR2:  CR3: 0008267c CR4:
003407e0
[ 1272.884537] Call Trace:
[ 1272.884544]  [] ? seq_read+0x130/0x440
[ 1272.884548]  [] vfs_read+0x9f/0x170
[ 1272.884552]  [] SyS_read+0x7f/0xf0
[ 1272.884557]  [] system_call_fastpath+0x22/0x27
[ 1272.884558] Code:  Bad RIP value.
[ 1272.884562] RIP  [<  (null)>]   (null)
[ 1272.884564]  RSP 
[ 1272.884566] CR2: 

Signed-off-by: Qu Huang 
---
 drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
index 511712c..673d5e3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c
@@ -33,6 +33,11 @@ static int kfd_debugfs_open(struct inode *inode, struct file 
*file)

return single_open(file, show, NULL);
 }
+static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data)
+{
+   seq_printf(m, "echo gpu_id > hang_hws\n");
+   return 0;
+}

 static ssize_t kfd_debugfs_hang_hws_write(struct file *file,
const char __user *user_buf, size_t size, loff_t *ppos)
@@ -94,7 +99,7 @@ void kfd_debugfs_init(void)
debugfs_create_file("rls", S_IFREG | 0444, debugfs_root,
kfd_debugfs_rls_by_device, _debugfs_fops);
debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root,
-   NULL, _debugfs_hang_hws_fops);
+   kfd_debugfs_hang_hws_read, 
_debugfs_hang_hws_fops);
 }

 void kfd_debugfs_fini(void)
--
1.8.3.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH v2] drm/radeon: don't evict if not initialized

2021-03-22 Thread Tong Zhang
TTM_PL_VRAM may not initialized at all when calling
radeon_bo_evict_vram(). We need to check before doing eviction.

[2.160837] BUG: kernel NULL pointer dereference, address: 0020
[2.161212] #PF: supervisor read access in kernel mode
[2.161490] #PF: error_code(0x) - not-present page
[2.161767] PGD 0 P4D 0
[2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm]
[2.168506] Call Trace:
[2.168641]  radeon_bo_evict_vram+0x1c/0x20 [radeon]
[2.168936]  radeon_device_fini+0x28/0xf9 [radeon]
[2.169224]  radeon_driver_unload_kms+0x44/0xa0 [radeon]
[2.169534]  radeon_driver_load_kms+0x174/0x210 [radeon]
[2.169843]  drm_dev_register+0xd9/0x1c0 [drm]
[2.170104]  radeon_pci_probe+0x117/0x1a0 [radeon]

Suggested-by: Christian König 
Signed-off-by: Tong Zhang 
---
v2: coding style fix

 drivers/gpu/drm/radeon/radeon_object.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 9b81786782de..499ce55e34cc 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -384,6 +384,8 @@ int radeon_bo_evict_vram(struct radeon_device *rdev)
}
 #endif
man = ttm_manager_type(bdev, TTM_PL_VRAM);
+   if (!man)
+   return 0;
return ttm_resource_manager_evict_all(bdev, man);
 }
 
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH V2] drm/amdgpu: Fix a typo

2021-03-22 Thread Randy Dunlap




On Fri, 19 Mar 2021, Bhaskar Chowdhury wrote:


s/traing/training/

...Plus the entire sentence construction for better readability.

Signed-off-by: Bhaskar Chowdhury 
---
Changes from V1:
 Alex and Randy's suggestions incorporated.

drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 8 
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c 
b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index c325d6f53a71..bf3857867f51 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -661,10 +661,10 @@ static int psp_v11_0_memory_training(struct psp_context 
*psp, uint32_t ops)

if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) {
/*
-* Long traing will encroach certain mount of bottom VRAM,
-* saving the content of this bottom VRAM to system memory
-* before training, and restoring it after training to avoid
-* VRAM corruption.
+* Long training will encroach a certain amount on the bottom 
of VRAM;
+ * save the content from the bottom VRAM to system memory
+ * before training, and restore it after training to avoid
+ * VRAM corruption.


These 3 new lines are indented with spaces instead of tabs. Oops.  :(

(I may be too late with this comment -- sorry about that.)


 */
sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE;

--
2.26.2



___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/radeon: don't evict if not initialized

2021-03-22 Thread Tong Zhang
TTM_PL_VRAM may not initialized at all when calling
radeon_bo_evict_vram(). We need to check before doing eviction.

[2.160837] BUG: kernel NULL pointer dereference, address: 0020
[2.161212] #PF: supervisor read access in kernel mode
[2.161490] #PF: error_code(0x) - not-present page
[2.161767] PGD 0 P4D 0
[2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm]
[2.168506] Call Trace:
[2.168641]  radeon_bo_evict_vram+0x1c/0x20 [radeon]
[2.168936]  radeon_device_fini+0x28/0xf9 [radeon]
[2.169224]  radeon_driver_unload_kms+0x44/0xa0 [radeon]
[2.169534]  radeon_driver_load_kms+0x174/0x210 [radeon]
[2.169843]  drm_dev_register+0xd9/0x1c0 [drm]
[2.170104]  radeon_pci_probe+0x117/0x1a0 [radeon]

Signed-off-by: Tong Zhang 
---
 drivers/gpu/drm/radeon/radeon_object.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/radeon_object.c 
b/drivers/gpu/drm/radeon/radeon_object.c
index 9b81786782de..04e9a8118b0e 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -384,7 +384,9 @@ int radeon_bo_evict_vram(struct radeon_device *rdev)
}
 #endif
man = ttm_manager_type(bdev, TTM_PL_VRAM);
-   return ttm_resource_manager_evict_all(bdev, man);
+   if (man)
+   return ttm_resource_manager_evict_all(bdev, man);
+   return 0;
 }
 
 void radeon_bo_force_delete(struct radeon_device *rdev)
-- 
2.25.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/display: Remove unnecessary conversion to bool

2021-03-22 Thread Jiapeng Chong
Fix the following coccicheck warnings:

./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c:875:62-67: WARNING:
conversion to bool not needed here.

Reported-by: Abaci Robot 
Signed-off-by: Jiapeng Chong 
---
 drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c 
b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
index 3e6f760..5692a8b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c
@@ -872,7 +872,7 @@ bool mpc3_program_shaper(
else
next_mode = LUT_RAM_A;
 
-   mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A ? true:false, 
rmu_idx);
+   mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, rmu_idx);
 
if (next_mode == LUT_RAM_A)
mpc3_program_shaper_luta_settings(mpc, params, rmu_idx);
-- 
1.8.3.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


Re: [PATCH] drm/amd/display: Set AMDGPU_DM_DEFAULT_MIN_BACKLIGHT to 0

2021-03-22 Thread Evan Benn
On Sat, Mar 20, 2021 at 8:36 AM Alex Deucher  wrote:
>
> On Fri, Mar 19, 2021 at 5:31 PM Evan Benn  wrote:
> >
> > On Sat, 20 Mar 2021 at 02:10, Harry Wentland  wrote:
> > > On 2021-03-19 10:22 a.m., Alex Deucher wrote:
> > > > On Fri, Mar 19, 2021 at 3:23 AM Evan Benn  wrote:
> > > >>
> > > >> AMDGPU_DM_DEFAULT_MIN_BACKLIGHT was set to the value of 12
> > > >> to ensure no display backlight will flicker at low user brightness
> > > >> settings. However this value is quite bright, so for devices that do 
> > > >> not
> > > >> implement the ACPI ATIF
> > > >> ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS
> > > >> functionality the user cannot set the brightness to a low level even if
> > > >> the display would support such a low PWM.
> > > >>
> > > >> This ATIF feature is not implemented on for example AMD grunt 
> > > >> chromebooks.
> > > >>
> > > >> Signed-off-by: Evan Benn 
> > > >>
> > > >> ---
> > > >> I could not find a justification for the reason for the value. It has
> > > >> caused some noticable regression for users: 
> > > >> https://bugzilla.kernel.org/show_bug.cgi?id=203439>>>
> > > >> Maybe this can be either user controlled or userspace configured, but
> > > >> preventing users from turning their backlight dim seems wrong.
> > > >
> > > > My understanding is that some panels flicker if you set the min to a
> > > > value too low.  This was a safe minimum if the platform didn't specify
> > > > it's own safe minimum.  I think we'd just be trading one bug for
> > > > another (flickering vs not dim enough).  Maybe a whitelist or
> > > > blacklist would be a better solution?
> > > >
> > >
> > > Yeah, this is a NACK from me as-is for the reasons Alex described.
> >
> > Thanks Harry + Alex,
> >
> > I agree this solution is not the best.
> >
> > >
> > > I agree a whitelist approach might be best.
> >
> > Do you have any idea what an allowlist could be keyed on?
> > Is the flickering you observed here a function of the panel or the gpu
> > or some other component?
> > Maybe we could move the minimum level into the logic for that hardware.
> >
>
> Maybe the panel string from the EDID?  Either that or something from
> dmi data?  Harry would probably have a better idea.

One problem with keying from panel EDID is that for example the grunt chromebook
platform has more than 100 different panels already shipped. Add to that that
repair centers or people repairing their own device will use 'compatible'
panels. I'm sure the AMD windows laptops have even more variety!

>
> Alex
>
> > >
> > > Is this fix perhaps for OLED panels? If so we could use a different
> > > min-value for OLED panels that don't do PWM, but use 12 for everything 
> > > else.
> >
> > All the chromebooks I have worked with LCD + LED backlight have been
> > fine with a backlight set to 0.
> > We do have OLED panels too, but I'm not aware of what they do.
> >
> > > Harry
> > >
> > > > Alex
> > > >
> > > >
> > > >>
> > > >> Also reviewed here: 
> > > >> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2748377>>>
> > > >>   drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +-
> > > >>   1 file changed, 1 insertion(+), 1 deletion(-)
> > > >>
> > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
> > > >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >> index 573cf17262da..0129bd69b94e 100644
> > > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
> > > >> @@ -3151,7 +3151,7 @@ static int amdgpu_dm_mode_config_init(struct 
> > > >> amdgpu_device *adev)
> > > >>  return 0;
> > > >>   }
> > > >>
> > > >> -#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12
> > > >> +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 0
> > > >>   #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255
> > > >>   #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50
> > > >>
> > > >> --
> > > >> 2.31.0.291.g576ba9dcdaf-goog
> > > >>
> > > >> ___
> > > >> dri-devel mailing list
> > > >> dri-de...@lists.freedesktop.org
> > > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel>> 
> > > >> ___
> > > > dri-devel mailing list
> > > > dri-de...@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel>>
> > >
___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset

2021-03-22 Thread Lang Yu
In amdggpu reset, while dm.dc_lock is held by dm_suspend,
handle_hpd_rx_irq tries to acquire it. Deadlock occurred!

Deadlock log:

[  104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin!

[  104.640084] ==
[  104.640092] WARNING: possible circular locking dependency detected
[  104.640099] 5.11.0-custom #1 Tainted: GW   E
[  104.640107] --
[  104.640114] cat/1158 is trying to acquire lock:
[  104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: 
__flush_work+0x2e3/0x450
[  104.640144]
   but task is already holding lock:
[  104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]
[  104.640581]
   which lock already depends on the new lock.

[  104.640590]
   the existing dependency chain (in reverse order) is:
[  104.640598]
   -> #2 (>dm.dc_lock){+.+.}-{3:3}:
[  104.640611]lock_acquire+0xca/0x390
[  104.640623]__mutex_lock+0x9b/0x930
[  104.640633]mutex_lock_nested+0x1b/0x20
[  104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu]
[  104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641264]process_one_work+0x2a7/0x5b0
[  104.641275]worker_thread+0x4a/0x3d0
[  104.641283]kthread+0x125/0x160
[  104.641290]ret_from_fork+0x22/0x30
[  104.641300]
   -> #1 (>hpd_lock){+.+.}-{3:3}:
[  104.641312]lock_acquire+0xca/0x390
[  104.641321]__mutex_lock+0x9b/0x930
[  104.641328]mutex_lock_nested+0x1b/0x20
[  104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu]
[  104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu]
[  104.641931]process_one_work+0x2a7/0x5b0
[  104.641940]worker_thread+0x4a/0x3d0
[  104.641948]kthread+0x125/0x160
[  104.641954]ret_from_fork+0x22/0x30
[  104.641963]
   -> #0 ((work_completion)(>work)){+.+.}-{0:0}:
[  104.641975]check_prev_add+0x94/0xbf0
[  104.641983]__lock_acquire+0x130d/0x1ce0
[  104.641992]lock_acquire+0xca/0x390
[  104.642000]__flush_work+0x303/0x450
[  104.642008]flush_work+0x10/0x20
[  104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu]
[  104.642312]dm_suspend+0x181/0x1d0 [amdgpu]
[  104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu]
[  104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu]
[  104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu]
[  104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu]
[  104.643715]gpu_recover_get+0x2e/0x60 [amdgpu]
[  104.643951]simple_attr_read+0x6d/0x110
[  104.643960]debugfs_attr_read+0x49/0x70
[  104.643970]full_proxy_read+0x5f/0x90
[  104.643979]vfs_read+0xa3/0x190
[  104.643986]ksys_read+0x70/0xf0
[  104.643992]__x64_sys_read+0x1a/0x20
[  104.643999]do_syscall_64+0x38/0x90
[  104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9
[  104.644017]
   other info that might help us debug this:

[  104.644026] Chain exists of:
 (work_completion)(>work) --> >hpd_lock --> 
>dm.dc_lock

[  104.644043]  Possible unsafe locking scenario:

[  104.644049]CPU0CPU1
[  104.644055]
[  104.644060]   lock(>dm.dc_lock);
[  104.644066]lock(>hpd_lock);
[  104.644075]lock(>dm.dc_lock);
[  104.644083]   lock((work_completion)(>work));
[  104.644090]
*** DEADLOCK ***

[  104.644096] 3 locks held by cat/1158:
[  104.644103]  #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: 
simple_attr_read+0x4e/0x110
[  104.644119]  #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: 
amdgpu_device_lock_adev+0x42/0x94 [amdgpu]
[  104.644489]  #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: 
dm_suspend+0xb2/0x1d0 [amdgpu]

Signed-off-by: Lang Yu 
---
 drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c 
b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index e176ea84d75b..8727488df769 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param)
}
}
 
-   mutex_lock(>dm.dc_lock);
+   if (!amdgpu_in_reset(adev))
+   mutex_lock(>dm.dc_lock);
 #ifdef CONFIG_DRM_AMD_DC_HDCP
result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL);
 #else
result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL);
 #endif
-   mutex_unlock(>dm.dc_lock);
+   if (!amdgpu_in_reset(adev))
+   mutex_unlock(>dm.dc_lock);
 
 out:
if (result && 

Re: [PATCH] drm/amdgpu: Use correct size when access vram

2021-03-22 Thread Christian König

Am 22.03.21 um 01:53 schrieb xinhui pan:

To make size is 4 byte aligned. Use &~0x3ULL instead of &3ULL.

Signed-off-by: xinhui pan 


Good catch. Patch is Reviewed-by: Christian König 


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 6d5cf0525325..41a4c456961c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1503,7 +1503,7 @@ static int amdgpu_ttm_access_memory(struct 
ttm_buffer_object *bo,
memcpy(buf, , bytes);
}
} else {
-   bytes = cursor.size & 0x3ull;
+   bytes = cursor.size & ~0x3ULL;
amdgpu_device_vram_access(adev, cursor.start,
  (uint32_t *)buf, bytes,
  write);


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


  1   2   >