[PATCH] drm/amd/pm: fix gpu reset failure by MP1 state setting
Instead of blocking varied unsupported MP1 state in upper level, defer and skip such MP1 state handling in specific ASIC. Signed-off-by: Lijo Lazar Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/pm/amdgpu_dpm.c| 3 --- .../gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c| 10 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index 15e239582a97..0a6bb3311f0f 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -1027,9 +1027,6 @@ int amdgpu_dpm_set_mp1_state(struct amdgpu_device *adev, int ret = 0; const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; - if (mp1_state == PP_MP1_STATE_NONE) - return 0; - if (pp_funcs && pp_funcs->set_mp1_state) { ret = pp_funcs->set_mp1_state( adev->powerplay.pp_handle, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 722fe067ac2c..72d9c1be1835 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -3113,14 +3113,18 @@ static int sienna_cichlid_system_features_control(struct smu_context *smu, static int sienna_cichlid_set_mp1_state(struct smu_context *smu, enum pp_mp1_state mp1_state) { + int ret; + switch (mp1_state) { case PP_MP1_STATE_UNLOAD: - return smu_cmn_set_mp1_state(smu, mp1_state); + ret = smu_cmn_set_mp1_state(smu, mp1_state); + break; default: - return -EINVAL; + /* Ignore others */ + ret = 0; } - return 0; + return ret; } static const struct pptable_funcs sienna_cichlid_ppt_funcs = { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset
Typo in the title: s/dispaly/display - Joshie ✨ On 3/22/21 8:11 AM, Lang Yu wrote: In amdggpu reset, while dm.dc_lock is held by dm_suspend, handle_hpd_rx_irq tries to acquire it. Deadlock occurred! Deadlock log: [ 104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin! [ 104.640084] == [ 104.640092] WARNING: possible circular locking dependency detected [ 104.640099] 5.11.0-custom #1 Tainted: GW E [ 104.640107] -- [ 104.640114] cat/1158 is trying to acquire lock: [ 104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: __flush_work+0x2e3/0x450 [ 104.640144] but task is already holding lock: [ 104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] [ 104.640581] which lock already depends on the new lock. [ 104.640590] the existing dependency chain (in reverse order) is: [ 104.640598] -> #2 (>dm.dc_lock){+.+.}-{3:3}: [ 104.640611]lock_acquire+0xca/0x390 [ 104.640623]__mutex_lock+0x9b/0x930 [ 104.640633]mutex_lock_nested+0x1b/0x20 [ 104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu] [ 104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641264]process_one_work+0x2a7/0x5b0 [ 104.641275]worker_thread+0x4a/0x3d0 [ 104.641283]kthread+0x125/0x160 [ 104.641290]ret_from_fork+0x22/0x30 [ 104.641300] -> #1 (>hpd_lock){+.+.}-{3:3}: [ 104.641312]lock_acquire+0xca/0x390 [ 104.641321]__mutex_lock+0x9b/0x930 [ 104.641328]mutex_lock_nested+0x1b/0x20 [ 104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu] [ 104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641931]process_one_work+0x2a7/0x5b0 [ 104.641940]worker_thread+0x4a/0x3d0 [ 104.641948]kthread+0x125/0x160 [ 104.641954]ret_from_fork+0x22/0x30 [ 104.641963] -> #0 ((work_completion)(>work)){+.+.}-{0:0}: [ 104.641975]check_prev_add+0x94/0xbf0 [ 104.641983]__lock_acquire+0x130d/0x1ce0 [ 104.641992]lock_acquire+0xca/0x390 [ 104.642000]__flush_work+0x303/0x450 [ 104.642008]flush_work+0x10/0x20 [ 104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu] [ 104.642312]dm_suspend+0x181/0x1d0 [amdgpu] [ 104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu] [ 104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu] [ 104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu] [ 104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu] [ 104.643715]gpu_recover_get+0x2e/0x60 [amdgpu] [ 104.643951]simple_attr_read+0x6d/0x110 [ 104.643960]debugfs_attr_read+0x49/0x70 [ 104.643970]full_proxy_read+0x5f/0x90 [ 104.643979]vfs_read+0xa3/0x190 [ 104.643986]ksys_read+0x70/0xf0 [ 104.643992]__x64_sys_read+0x1a/0x20 [ 104.643999]do_syscall_64+0x38/0x90 [ 104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 104.644017] other info that might help us debug this: [ 104.644026] Chain exists of: (work_completion)(>work) --> >hpd_lock --> >dm.dc_lock [ 104.644043] Possible unsafe locking scenario: [ 104.644049]CPU0CPU1 [ 104.644055] [ 104.644060] lock(>dm.dc_lock); [ 104.644066]lock(>hpd_lock); [ 104.644075]lock(>dm.dc_lock); [ 104.644083] lock((work_completion)(>work)); [ 104.644090] *** DEADLOCK *** [ 104.644096] 3 locks held by cat/1158: [ 104.644103] #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110 [ 104.644119] #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu] [ 104.644489] #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e176ea84d75b..8727488df769 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param) } } - mutex_lock(>dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_lock(>dm.dc_lock); #ifdef CONFIG_DRM_AMD_DC_HDCP result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL); #else result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL); #endif - mutex_unlock(>dm.dc_lock); + if
RE: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset
[AMD Official Use Only - Internal Distribution Only] -Original Message- From: Grodzovsky, Andrey Sent: Monday, March 22, 2021 11:01 PM To: Yu, Lang ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Huang, Ray Subject: Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset On 2021-03-22 4:11 a.m., Lang Yu wrote: > In amdggpu reset, while dm.dc_lock is held by dm_suspend, > handle_hpd_rx_irq tries to acquire it. Deadlock occurred! > > Deadlock log: > > [ 104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin! > > [ 104.640084] == > [ 104.640092] WARNING: possible circular locking dependency detected > [ 104.640099] 5.11.0-custom #1 Tainted: GW E > [ 104.640107] -- > [ 104.640114] cat/1158 is trying to acquire lock: > [ 104.640120] 88810a09ce00 > ((work_completion)(>work)){+.+.}-{0:0}, at: __flush_work+0x2e3/0x450 [ > 104.640144] > but task is already holding lock: > [ 104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: > dm_suspend+0xb2/0x1d0 [amdgpu] [ 104.640581] > which lock already depends on the new lock. > > [ 104.640590] > the existing dependency chain (in reverse order) is: > [ 104.640598] > -> #2 (>dm.dc_lock){+.+.}-{3:3}: > [ 104.640611]lock_acquire+0xca/0x390 > [ 104.640623]__mutex_lock+0x9b/0x930 > [ 104.640633]mutex_lock_nested+0x1b/0x20 > [ 104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu] > [ 104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu] > [ 104.641264]process_one_work+0x2a7/0x5b0 > [ 104.641275]worker_thread+0x4a/0x3d0 > [ 104.641283]kthread+0x125/0x160 > [ 104.641290]ret_from_fork+0x22/0x30 > [ 104.641300] > -> #1 (>hpd_lock){+.+.}-{3:3}: > [ 104.641312]lock_acquire+0xca/0x390 > [ 104.641321]__mutex_lock+0x9b/0x930 > [ 104.641328]mutex_lock_nested+0x1b/0x20 > [ 104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu] > [ 104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu] > [ 104.641931]process_one_work+0x2a7/0x5b0 > [ 104.641940]worker_thread+0x4a/0x3d0 > [ 104.641948]kthread+0x125/0x160 > [ 104.641954]ret_from_fork+0x22/0x30 > [ 104.641963] > -> #0 ((work_completion)(>work)){+.+.}-{0:0}: > [ 104.641975]check_prev_add+0x94/0xbf0 > [ 104.641983]__lock_acquire+0x130d/0x1ce0 > [ 104.641992]lock_acquire+0xca/0x390 > [ 104.642000]__flush_work+0x303/0x450 > [ 104.642008]flush_work+0x10/0x20 > [ 104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu] > [ 104.642312]dm_suspend+0x181/0x1d0 [amdgpu] > [ 104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu] > [ 104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu] > [ 104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu] > [ 104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu] > [ 104.643715]gpu_recover_get+0x2e/0x60 [amdgpu] > [ 104.643951]simple_attr_read+0x6d/0x110 > [ 104.643960]debugfs_attr_read+0x49/0x70 > [ 104.643970]full_proxy_read+0x5f/0x90 > [ 104.643979]vfs_read+0xa3/0x190 > [ 104.643986]ksys_read+0x70/0xf0 > [ 104.643992]__x64_sys_read+0x1a/0x20 > [ 104.643999]do_syscall_64+0x38/0x90 > [ 104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [ 104.644017] > other info that might help us debug this: > > [ 104.644026] Chain exists of: > (work_completion)(>work) --> > >hpd_lock --> >dm.dc_lock > > [ 104.644043] Possible unsafe locking scenario: > > [ 104.644049]CPU0CPU1 > [ 104.644055] > [ 104.644060] lock(>dm.dc_lock); > [ 104.644066]lock(>hpd_lock); > [ 104.644075]lock(>dm.dc_lock); > [ 104.644083] lock((work_completion)(>work)); > [ 104.644090] > *** DEADLOCK *** > > [ 104.644096] 3 locks held by cat/1158: > [ 104.644103] #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: > simple_attr_read+0x4e/0x110 [ 104.644119] #1: 88810a0a1600 > (>reset_sem){}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 > [amdgpu] [ 104.644489] #2: 88810a09cc70 > (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] > > Signed-off-by: Lang Yu > --- > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 -- > 1 file changed, 4 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index e176ea84d75b..8727488df769 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++
[PATCH] drm/amd/display: Use DRM_DEBUG_DP
Convert IRQ-based prints from DRM_DEBUG_DRIVER to DRM_DEBUG_DP, as the latter is not used in drm/amd prior to this patch and since IRQ-based prints drown out the rest of the driver's DRM_DEBUG_DRIVER messages. Cc: Harry Wentland Cc: Alex Deucher Signed-off-by: Luben Tuikov --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 67 +-- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index f455fc3aa561..aabaa652f6dc 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -449,9 +449,9 @@ static void dm_pflip_high_irq(void *interrupt_params) amdgpu_crtc->pflip_status = AMDGPU_FLIP_NONE; spin_unlock_irqrestore(_to_drm(adev)->event_lock, flags); - DRM_DEBUG_DRIVER("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n", -amdgpu_crtc->crtc_id, amdgpu_crtc, -vrr_active, (int) !e); + DRM_DEBUG_DP("crtc:%d[%p], pflip_stat:AMDGPU_FLIP_NONE, vrr[%d]-fp %d\n", +amdgpu_crtc->crtc_id, amdgpu_crtc, +vrr_active, (int) !e); } static void dm_vupdate_high_irq(void *interrupt_params) @@ -993,8 +993,7 @@ static void event_mall_stutter(struct work_struct *work) dc_allow_idle_optimizations( dm->dc, dm->active_vblank_irq_count == 0); - DRM_DEBUG_DRIVER("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); - + DRM_DEBUG_DP("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); mutex_unlock(>dc_lock); } @@ -1810,8 +1809,8 @@ static void dm_gpureset_toggle_interrupts(struct amdgpu_device *adev, if (acrtc && state->stream_status[i].plane_count != 0) { irq_source = IRQ_TYPE_PFLIP + acrtc->otg_inst; rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - DRM_DEBUG("crtc %d - vupdate irq %sabling: r=%d\n", - acrtc->crtc_id, enable ? "en" : "dis", rc); + DRM_DEBUG_DP("crtc %d - vupdate irq %sabling: r=%d\n", +acrtc->crtc_id, enable ? "en" : "dis", rc); if (rc) DRM_WARN("Failed to %s pflip interrupts\n", enable ? "enable" : "disable"); @@ -4966,8 +4965,8 @@ static void update_stream_scaling_settings(const struct drm_display_mode *mode, stream->src = src; stream->dst = dst; - DRM_DEBUG_DRIVER("Destination Rectangle x:%d y:%d width:%d height:%d\n", - dst.x, dst.y, dst.width, dst.height); + DRM_DEBUG_DP("Destination Rectangle x:%d y:%d width:%d height:%d\n", +dst.x, dst.y, dst.width, dst.height); } @@ -5710,8 +5709,8 @@ static inline int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable) rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; - DRM_DEBUG_DRIVER("crtc %d - vupdate irq %sabling: r=%d\n", -acrtc->crtc_id, enable ? "en" : "dis", rc); + DRM_DEBUG_DP("crtc %d - vupdate irq %sabling: r=%d\n", +acrtc->crtc_id, enable ? "en" : "dis", rc); return rc; } @@ -6664,7 +6663,7 @@ static int dm_plane_helper_prepare_fb(struct drm_plane *plane, int r; if (!new_state->fb) { - DRM_DEBUG_DRIVER("No FB bound\n"); + DRM_DEBUG_DP("No FB bound\n"); return 0; } @@ -7896,11 +7895,11 @@ static void handle_cursor_update(struct drm_plane *plane, if (!plane->state->fb && !old_plane_state->fb) return; - DRM_DEBUG_DRIVER("%s: crtc_id=%d with size %d to %d\n", -__func__, -amdgpu_crtc->crtc_id, -plane->state->crtc_w, -plane->state->crtc_h); + DRM_DEBUG_DP("%s: crtc_id=%d with size %d to %d\n", +__func__, +amdgpu_crtc->crtc_id, +plane->state->crtc_w, +plane->state->crtc_h); ret = get_cursor_position(plane, crtc, ); if (ret) @@ -7958,8 +7957,8 @@ static void prepare_flip_isr(struct amdgpu_crtc *acrtc) /* Mark this event as consumed */ acrtc->base.state->event = NULL; - DRM_DEBUG_DRIVER("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n", -acrtc->crtc_id); + DRM_DEBUG_DP("crtc:%d, pflip_stat:AMDGPU_FLIP_SUBMITTED\n", +acrtc->crtc_id); } static void update_freesync_state_on_stream( @@ -8265,9 +8264,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.
[AMD Official Use Only - Internal Distribution Only] Hi, The updated patch has been merged and is available with commit ID "ef5c594461650de0a18aa0bfd240189991790d7e". Somehow missed to mail the updated version, attached is the updated patch, please review and let me know if any changes required. Thanks, Bindu From: Michel Dänzer Sent: Monday, March 22, 2021 5:32 AM To: R, Bindu ; Lakha, Bhawanpreet ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Zhou1, Tao ; Feng, Kenneth Subject: Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank. On 2021-03-20 1:31 a.m., R, Bindu wrote: > > The Update patch has been submitted. Submitted where? Still can't see it. -- Earthling Michel Dänzer | https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fredhat.com%2Fdata=04%7C01%7CBindu.R%40amd.com%7C5ecbf65b60ec491fbc4408d8ed156b10%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637520023540798291%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C0sdata=VcoJRCxTATVs8JZZU%2FzcYxM1yfFFq1Z8perrtVU1PKE%3Dreserved=0 Libre software enthusiast | Mesa and X developer From ef5c594461650de0a18aa0bfd240189991790d7e Mon Sep 17 00:00:00 2001 From: Bindu Ramamurthy Date: Tue, 16 Mar 2021 17:08:47 -0400 Subject: [PATCH] drm/amd/display: Allow idle optimization based on vblank. [Why] idle optimization was being disabled after commit. [How] check vblank count for display off and enable idle optimization based on this count. Also,check added to ensure vblank count does not decrement, when count reaches 0. Signed-off-by: Bindu Ramamurthy --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 6e7a333abbe0..f455fc3aa561 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -987,7 +987,7 @@ static void event_mall_stutter(struct work_struct *work) if (vblank_work->enable) dm->active_vblank_irq_count++; - else + else if(dm->active_vblank_irq_count) dm->active_vblank_irq_count--; dc_allow_idle_optimizations( @@ -8705,9 +8705,14 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) dm_enable_per_frame_crtc_master_sync(dc_state); mutex_lock(>dc_lock); WARN_ON(!dc_commit_state(dm->dc, dc_state)); +#if defined(CONFIG_DRM_AMD_DC_DCN) + /* Allow idle optimization when vblank count is 0 for display off */ + if (dm->active_vblank_irq_count == 0) + dc_allow_idle_optimizations(dm->dc,true); +#endif mutex_unlock(>dc_lock); } - + for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) { struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
On Mon 22-03-21 14:05:48, Matthew Wilcox wrote: > On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote: > > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: > > > Am 20.03.21 um 14:17 schrieb Daniel Vetter: > > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König > > > > wrote: > > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter: > > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: > > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter: > > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote: > > > > > > > > > Don't print a warning when we fail to allocate a page for > > > > > > > > > swapping things out. > > > > > > > > > > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead > > > > > > > > > of GFP_NOFS. > > > > > > > > Uh this part doesn't make sense. Especially since you only do > > > > > > > > it for the > > > > > > > > debugfs file, not in general. Which means you've just > > > > > > > > completely broken > > > > > > > > the shrinker. > > > > > > > Are you sure? My impression is that GFP_NOFS should now work much > > > > > > > more out > > > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore(). > > > > > > Yeah, if you'd put it in the right place :-) > > > > > > > > > > > > But also -mm folks are very clear that memalloc_no*() family is for > > > > > > dire > > > > > > situation where there's really no other way out. For anything where > > > > > > you > > > > > > know what you're doing, you really should use explicit gfp flags. > > > > > My impression is just the other way around. You should try to avoid > > > > > the > > > > > NOFS/NOIO flags and use the memalloc_no* approach instead. > > > > Where did you get that idea? > > > > > > Well from the kernel comment on GFP_NOFS: > > > > > > * %GFP_NOFS will use direct reclaim but will not use any filesystem > > > interfaces. > > > * Please try to avoid using this flag directly and instead use > > > * memalloc_nofs_{save,restore} to mark the whole scope which > > > cannot/shouldn't > > > * recurse into the FS layer with a short explanation why. All allocation > > > * requests will inherit GFP_NOFS implicitly. > > > > Huh that's interesting, since iirc Willy or Dave told me the opposite, and > > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs > > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). > > > > Adding them, maybe I got confused. > > My impression is that the scoped API is preferred these days. > > https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html > > I'd probably need to spend a few months learning the DRM subsystem to > have a more detailed opinion on whether passing GFP flags around explicitly > or using the scope API is the better approach for your situation. yes, in an ideal world we would have a clearly defined scope of the reclaim recursion wrt FS/IO associated with it. I've got back to https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/ and there are two things standing out. Why does ttm_tt_debugfs_shrink_show really require NOFS semantic? And why does it play with fs_reclaim_acquire? -- Michal Hocko SUSE Labs ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
Am 22.03.21 um 18:02 schrieb Daniel Vetter: On Mon, Mar 22, 2021 at 5:06 PM Michal Hocko wrote: On Mon 22-03-21 14:05:48, Matthew Wilcox wrote: On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote: On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: Am 20.03.21 um 14:17 schrieb Daniel Vetter: On Sat, Mar 20, 2021 at 10:04 AM Christian König wrote: Am 19.03.21 um 20:06 schrieb Daniel Vetter: On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: Am 19.03.21 um 18:52 schrieb Daniel Vetter: On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote: Don't print a warning when we fail to allocate a page for swapping things out. Also rely on memalloc_nofs_save/memalloc_nofs_restore instead of GFP_NOFS. Uh this part doesn't make sense. Especially since you only do it for the debugfs file, not in general. Which means you've just completely broken the shrinker. Are you sure? My impression is that GFP_NOFS should now work much more out of the box with the memalloc_nofs_save()/memalloc_nofs_restore(). Yeah, if you'd put it in the right place :-) But also -mm folks are very clear that memalloc_no*() family is for dire situation where there's really no other way out. For anything where you know what you're doing, you really should use explicit gfp flags. My impression is just the other way around. You should try to avoid the NOFS/NOIO flags and use the memalloc_no* approach instead. Where did you get that idea? Well from the kernel comment on GFP_NOFS: * %GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. * Please try to avoid using this flag directly and instead use * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't * recurse into the FS layer with a short explanation why. All allocation * requests will inherit GFP_NOFS implicitly. Huh that's interesting, since iirc Willy or Dave told me the opposite, and the memalloc_no* stuff is for e.g. nfs calling into network layer (needs GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). Adding them, maybe I got confused. My impression is that the scoped API is preferred these days. https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html I'd probably need to spend a few months learning the DRM subsystem to have a more detailed opinion on whether passing GFP flags around explicitly or using the scope API is the better approach for your situation. yes, in an ideal world we would have a clearly defined scope of the reclaim recursion wrt FS/IO associated with it. I've got back to https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/ and there are two things standing out. Why does ttm_tt_debugfs_shrink_show really require NOFS semantic? And why does it play with fs_reclaim_acquire? It's our shrinker. shrink_show simply triggers that specific shrinker asking it to shrink everything it can, which helps a lot with testing without having to drive the entire system against the OOM wall. fs_reclaim_acquire is there to make sure lockdep understands that this is a shrinker and that it checks all the dependencies for us like if we'd be in real reclaim. There is some drop caches interfaces in proc iirc, but those drop everything, and they don't have the fs_reclaim annotations to teach lockdep about what we're doing. To summarize the debugfs code is basically to test if that stuff really works with GFP_NOFS. My only concern is that if I could rely on memalloc_no* being used we could optimize this quite a bit further. Regards, Christian. -Daniel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: Amdgpu kernel oops and freezing on system suspend and hibernate
On Thu, Mar 18, 2021 at 8:19 AM Harvey wrote: > > Alex, > > I waited for kernel 5.11.7 to hit our repos yesterday evening and tested > again: > > 1. The suspend issue is gone - suspend and resume now work as expected. > > 2. System hibernation seems to be a different beast - still freezing You need this patch: https://gitlab.freedesktop.org/agd5f/linux/-/commit/711c13547aad08f2cfe996e0cddc3d56f1233081 Alex ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2
On Mon, Mar 22, 2021 at 5:07 PM Felix Kuehling wrote: > > Am 2021-03-22 um 10:15 a.m. schrieb Daniel Vetter: > > On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote: > >> Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33 > >> are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about > >> a week ago. The remaining 11 patches are current work-in-progress with > >> further cleanup and fixes. > >> > >> MMU notifiers and CPU page faults now can split ranges and update our range > >> data structures without taking heavy locks by doing some of the critical > >> work in a deferred work handler. This includes updating MMU notifiers and > >> the SVM range interval tree. In the mean time, new ranges can live as > >> children of their parent ranges until the deferred work handler > >> consolidates > >> them in the main interval tree. > > I'm totally swammped with intel stuff unfortunately, so not really time to > > dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault > > inversion is planned to be handled now? Or that still tbd? > > Navi is still TBD. This patch series focuses on GFXv9 because that's the > IP our data center GPUs are on. The code here has two modes of > operations, one that relies on page faults and one that relies on > preemptions. The latter should work on Navi just fine. So that's our > minimal fallback option. > > > > > > Other thing I noticed is that amdkfd still uses the mmu_notifier directly, > > and not the mmu_interval_notifier. But you're talking a lot about managing > > intervals here, and so I'm wondering whether we shouldn't do this in core > > code? Everyone will have the same painful locking problems here (well atm > > everyone = you only I think), sharing this imo would make a ton of > > sense. > > We use mmu_interval_notifiers in all the range-based code, including > even our legacy userptr code. The only non-interval notifier that's > still in use in KFD is the one we use for cleanup on process termination. I guess my git grep got wrong, I thought I've only found it in the amdgpu userptr code, not on the amdkfd side of things. Sounds all good. -Daniel > > > > > > I think the other one is moving over more generic pasid code, but I think > > that's going to be less useful here and maybe more a long term project. > > Yes, it's unrelated to this work. > > Regards, > Felix > > > > > > Cheers, Daniel > > > >> We also added proper DMA mapping of system memory pages. > >> > >> Current work in progress is cleaning up all the locking, simplifying our > >> code and data structures and resolving a few known bugs. > >> > >> This series and the corresponding ROCm Thunk and KFDTest changes are also > >> available on gitub: > >> > >> https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip > >> > >> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip > >> > >> An updated Thunk > >> > >> Alex Sierra (10): > >> drm/amdgpu: replace per_device_list by array > >> drm/amdkfd: helper to convert gpu id and idx > >> drm/amdkfd: add xnack enabled flag to kfd_process > >> drm/amdkfd: add ioctl to configure and query xnack retries > >> drm/amdgpu: enable 48-bit IH timestamp counter > >> drm/amdkfd: SVM API call to restore page tables > >> drm/amdkfd: add svm_bo reference for eviction fence > >> drm/amdgpu: add param bit flag to create SVM BOs > >> drm/amdgpu: svm bo enable_signal call condition > >> drm/amdgpu: add svm_bo eviction to enable_signal cb > >> > >> Felix Kuehling (22): > >> drm/amdkfd: map svm range to GPUs > >> drm/amdkfd: svm range eviction and restore > >> drm/amdkfd: validate vram svm range from TTM > >> drm/amdkfd: HMM migrate ram to vram > >> drm/amdkfd: HMM migrate vram to ram > >> drm/amdkfd: invalidate tables on page retry fault > >> drm/amdkfd: page table restore through svm API > >> drm/amdkfd: add svm_bo eviction mechanism support > >> drm/amdkfd: refine migration policy with xnack on > >> drm/amdkfd: add svm range validate timestamp > >> drm/amdkfd: multiple gpu migrate vram to vram > >> drm/amdkfd: Fix dma unmapping > >> drm/amdkfd: Call mutex_destroy > >> drm/amdkfd: Fix spurious restore failures > >> drm/amdkfd: Fix svm_bo_list locking in eviction worker > >> drm/amdkfd: Simplify split_by_granularity > >> drm/amdkfd: Point out several race conditions > >> drm/amdkfd: Return pdd from kfd_process_device_from_gduid > >> drm/amdkfd: Remove broken deferred mapping > >> drm/amdkfd: Allow invalid pages in migration.src > >> drm/amdkfd: Correct locking during migration and mapping > >> drm/amdkfd: Nested locking and invalidation of child ranges > >> > >> Philip Yang (12): > >> drm/amdkfd: add svm ioctl API > >> drm/amdkfd: register svm range > >> drm/amdkfd: add svm ioctl GET_ATTR op > >> drm/amdgpu: add common HMM get pages function > >> drm/amdkfd: validate svm range system memory > >>
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
On Mon, Mar 22, 2021 at 5:06 PM Michal Hocko wrote: > > On Mon 22-03-21 14:05:48, Matthew Wilcox wrote: > > On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote: > > > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: > > > > Am 20.03.21 um 14:17 schrieb Daniel Vetter: > > > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König > > > > > wrote: > > > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter: > > > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: > > > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter: > > > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König > > > > > > > > > wrote: > > > > > > > > > > Don't print a warning when we fail to allocate a page for > > > > > > > > > > swapping things out. > > > > > > > > > > > > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore > > > > > > > > > > instead of GFP_NOFS. > > > > > > > > > Uh this part doesn't make sense. Especially since you only do > > > > > > > > > it for the > > > > > > > > > debugfs file, not in general. Which means you've just > > > > > > > > > completely broken > > > > > > > > > the shrinker. > > > > > > > > Are you sure? My impression is that GFP_NOFS should now work > > > > > > > > much more out > > > > > > > > of the box with the > > > > > > > > memalloc_nofs_save()/memalloc_nofs_restore(). > > > > > > > Yeah, if you'd put it in the right place :-) > > > > > > > > > > > > > > But also -mm folks are very clear that memalloc_no*() family is > > > > > > > for dire > > > > > > > situation where there's really no other way out. For anything > > > > > > > where you > > > > > > > know what you're doing, you really should use explicit gfp flags. > > > > > > My impression is just the other way around. You should try to avoid > > > > > > the > > > > > > NOFS/NOIO flags and use the memalloc_no* approach instead. > > > > > Where did you get that idea? > > > > > > > > Well from the kernel comment on GFP_NOFS: > > > > > > > > * %GFP_NOFS will use direct reclaim but will not use any filesystem > > > > interfaces. > > > > * Please try to avoid using this flag directly and instead use > > > > * memalloc_nofs_{save,restore} to mark the whole scope which > > > > cannot/shouldn't > > > > * recurse into the FS layer with a short explanation why. All > > > > allocation > > > > * requests will inherit GFP_NOFS implicitly. > > > > > > Huh that's interesting, since iirc Willy or Dave told me the opposite, and > > > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs > > > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). > > > > > > Adding them, maybe I got confused. > > > > My impression is that the scoped API is preferred these days. > > > > https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html > > > > I'd probably need to spend a few months learning the DRM subsystem to > > have a more detailed opinion on whether passing GFP flags around explicitly > > or using the scope API is the better approach for your situation. > > yes, in an ideal world we would have a clearly defined scope of the > reclaim recursion wrt FS/IO associated with it. I've got back to > https://lore.kernel.org/amd-gfx/20210319140857.2262-1-christian.koe...@amd.com/ > and there are two things standing out. Why does ttm_tt_debugfs_shrink_show > really require NOFS semantic? And why does it play with > fs_reclaim_acquire? It's our shrinker. shrink_show simply triggers that specific shrinker asking it to shrink everything it can, which helps a lot with testing without having to drive the entire system against the OOM wall. fs_reclaim_acquire is there to make sure lockdep understands that this is a shrinker and that it checks all the dependencies for us like if we'd be in real reclaim. There is some drop caches interfaces in proc iirc, but those drop everything, and they don't have the fs_reclaim annotations to teach lockdep about what we're doing. -Daniel -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: Amdgpu kernel oops and freezing on system suspend and hibernate
Still freezing on 5.11.8 and 5.12-rc4. Log on 5.12-rc4 looks a little different: Mär 22 17:40:26 obelix systemd[1]: Reached target Sleep. Mär 22 17:40:26 obelix systemd[1]: Starting Hibernate... Mär 22 17:40:26 obelix kernel: PM: hibernation: hibernation entry Mär 22 17:40:26 obelix systemd-sleep[2380]: Suspending system... Mär 22 17:40:46 obelix kernel: Filesystems sync: 0.012 seconds Mär 22 17:40:46 obelix kernel: Freezing user space processes ... Mär 22 17:40:46 obelix kernel: Freezing of tasks failed after 20.003 seconds (1 tasks refusing to freeze, wq_busy=0): Mär 22 17:40:46 obelix kernel: task:Xorgstate:D stack:0 pid: 1635 ppid: 1633 flags:0x0004 Mär 22 17:40:46 obelix kernel: Call Trace: Mär 22 17:40:46 obelix kernel: __schedule+0x2fc/0x8b0 Mär 22 17:40:46 obelix kernel: schedule+0x5b/0xc0 Mär 22 17:40:46 obelix kernel: rpm_resume+0x18c/0x810 Mär 22 17:40:46 obelix kernel: ? wait_woken+0x80/0x80 Mär 22 17:40:46 obelix kernel: __pm_runtime_resume+0x4a/0x80 Mär 22 17:40:46 obelix kernel: amdgpu_drm_ioctl+0x33/0x80 [amdgpu] Mär 22 17:40:46 obelix kernel: __x64_sys_ioctl+0x83/0xb0 Mär 22 17:40:46 obelix kernel: do_syscall_64+0x33/0x40 Mär 22 17:40:46 obelix kernel: entry_SYSCALL_64_after_hwframe+0x44/0xae Mär 22 17:40:46 obelix kernel: RIP: 0033:0x7f7647d4de6b Mär 22 17:40:46 obelix kernel: RSP: 002b:7ffec3671e88 EFLAGS: 0246 ORIG_RAX: 0010 Mär 22 17:40:46 obelix kernel: RAX: ffda RBX: 7ffec3671ec0 RCX: 7f7647d4de6b Mär 22 17:40:46 obelix kernel: RDX: 7ffec3671ec0 RSI: c06864a2 RDI: 000d Mär 22 17:40:46 obelix kernel: RBP: c06864a2 R08: R09: Mär 22 17:40:46 obelix kernel: R10: R11: 0246 R12: 5609594eedf0 Mär 22 17:40:46 obelix kernel: R13: 000d R14: R15: Mär 22 17:40:46 obelix kernel: Mär 22 17:40:46 obelix kernel: OOM killer enabled. Mär 22 17:40:46 obelix kernel: Restarting tasks ... done. Mär 22 17:40:46 obelix kernel: thermal thermal_zone1: failed to read out thermal zone (-61) Mär 22 17:40:46 obelix rtkit-daemon[1381]: The canary thread is apparently starving. Taking action. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Demoting known real-time threads. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 2346 of process 1780. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 1811 of process 1780. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 1810 of process 1780. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Successfully demoted thread 1780 of process 1780. Mär 22 17:40:46 obelix rtkit-daemon[1381]: Demoted 4 threads. Mär 22 17:40:46 obelix systemd-sleep[2380]: Failed to suspend system. System resumed again: Device or resource busy Mär 22 17:40:46 obelix systemd[1]: systemd-hibernate.service: Main process exited, code=exited, status=1/FAILURE Mär 22 17:40:46 obelix systemd[1]: systemd-hibernate.service: Failed with result 'exit-code'. Mär 22 17:40:46 obelix systemd[1]: Failed to start Hibernate. Mär 22 17:40:46 obelix kernel: PM: hibernation: hibernation exit Mär 22 17:40:46 obelix systemd[1]: Dependency failed for Hibernate. Mär 22 17:40:46 obelix audit[1]: SERVICE_START pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-hibernate comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=failed' Mär 22 17:40:46 obelix systemd[1]: hibernate.target: Job hibernate.target/start failed with result 'dependency'. Mär 22 17:40:46 obelix systemd-logind[1091]: Operation 'sleep' finished. Mär 22 17:40:46 obelix systemd[1]: Stopped target Sleep. Mär 22 17:40:46 obelix NetworkManager[1089]: [1616431246.8706] manager: sleep: wake requested (sleeping: yes enabled: yes) Mär 22 17:40:46 obelix kernel: audit: type=1130 audit(1616431246.867:108): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=systemd-hibernate comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=failed' Mär 22 17:40:46 obelix NetworkManager[1089]: [1616431246.8708] device (wlp4s0): state change: unmanaged -> unavailable (reason 'managed', sys-iface-state: 'external') Mär 22 17:40:47 obelix NetworkManager[1089]: [1616431247.1288] device (p2p-dev-wlp4s0): state change: unmanaged -> unavailable (reason 'managed', sys-iface-state: 'external') Mär 22 17:40:47 obelix NetworkManager[1089]: [1616431247.1296] manager: NetworkManager state is now DISCONNECTED Mär 22 17:40:47 obelix NetworkManager[1089]: [1616431247.2208] device (wlp4s0): supplicant interface state: internal-starting -> disconnected Mär 22 17:40:47 obelix NetworkManager[1089]: [1616431247.2209] device (p2p-dev-wlp4s0): state change: unavailable -> unmanaged (reason 'removed', sys-iface-state: 'removed') Mär 22 17:40:47 obelix NetworkManager[1089]: [1616431247.2216] Wi-Fi P2P device controlled by
Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2
Am 2021-03-22 um 10:15 a.m. schrieb Daniel Vetter: > On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote: >> Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33 >> are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about >> a week ago. The remaining 11 patches are current work-in-progress with >> further cleanup and fixes. >> >> MMU notifiers and CPU page faults now can split ranges and update our range >> data structures without taking heavy locks by doing some of the critical >> work in a deferred work handler. This includes updating MMU notifiers and >> the SVM range interval tree. In the mean time, new ranges can live as >> children of their parent ranges until the deferred work handler consolidates >> them in the main interval tree. > I'm totally swammped with intel stuff unfortunately, so not really time to > dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault > inversion is planned to be handled now? Or that still tbd? Navi is still TBD. This patch series focuses on GFXv9 because that's the IP our data center GPUs are on. The code here has two modes of operations, one that relies on page faults and one that relies on preemptions. The latter should work on Navi just fine. So that's our minimal fallback option. > > Other thing I noticed is that amdkfd still uses the mmu_notifier directly, > and not the mmu_interval_notifier. But you're talking a lot about managing > intervals here, and so I'm wondering whether we shouldn't do this in core > code? Everyone will have the same painful locking problems here (well atm > everyone = you only I think), sharing this imo would make a ton of > sense. We use mmu_interval_notifiers in all the range-based code, including even our legacy userptr code. The only non-interval notifier that's still in use in KFD is the one we use for cleanup on process termination. > > I think the other one is moving over more generic pasid code, but I think > that's going to be less useful here and maybe more a long term project. Yes, it's unrelated to this work. Regards, Felix > > Cheers, Daniel > >> We also added proper DMA mapping of system memory pages. >> >> Current work in progress is cleaning up all the locking, simplifying our >> code and data structures and resolving a few known bugs. >> >> This series and the corresponding ROCm Thunk and KFDTest changes are also >> available on gitub: >> https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip >> >> https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip >> >> An updated Thunk >> >> Alex Sierra (10): >> drm/amdgpu: replace per_device_list by array >> drm/amdkfd: helper to convert gpu id and idx >> drm/amdkfd: add xnack enabled flag to kfd_process >> drm/amdkfd: add ioctl to configure and query xnack retries >> drm/amdgpu: enable 48-bit IH timestamp counter >> drm/amdkfd: SVM API call to restore page tables >> drm/amdkfd: add svm_bo reference for eviction fence >> drm/amdgpu: add param bit flag to create SVM BOs >> drm/amdgpu: svm bo enable_signal call condition >> drm/amdgpu: add svm_bo eviction to enable_signal cb >> >> Felix Kuehling (22): >> drm/amdkfd: map svm range to GPUs >> drm/amdkfd: svm range eviction and restore >> drm/amdkfd: validate vram svm range from TTM >> drm/amdkfd: HMM migrate ram to vram >> drm/amdkfd: HMM migrate vram to ram >> drm/amdkfd: invalidate tables on page retry fault >> drm/amdkfd: page table restore through svm API >> drm/amdkfd: add svm_bo eviction mechanism support >> drm/amdkfd: refine migration policy with xnack on >> drm/amdkfd: add svm range validate timestamp >> drm/amdkfd: multiple gpu migrate vram to vram >> drm/amdkfd: Fix dma unmapping >> drm/amdkfd: Call mutex_destroy >> drm/amdkfd: Fix spurious restore failures >> drm/amdkfd: Fix svm_bo_list locking in eviction worker >> drm/amdkfd: Simplify split_by_granularity >> drm/amdkfd: Point out several race conditions >> drm/amdkfd: Return pdd from kfd_process_device_from_gduid >> drm/amdkfd: Remove broken deferred mapping >> drm/amdkfd: Allow invalid pages in migration.src >> drm/amdkfd: Correct locking during migration and mapping >> drm/amdkfd: Nested locking and invalidation of child ranges >> >> Philip Yang (12): >> drm/amdkfd: add svm ioctl API >> drm/amdkfd: register svm range >> drm/amdkfd: add svm ioctl GET_ATTR op >> drm/amdgpu: add common HMM get pages function >> drm/amdkfd: validate svm range system memory >> drm/amdkfd: deregister svm range >> drm/amdgpu: export vm update mapping interface >> drm/amdkfd: register HMM device private zone >> drm/amdkfd: support xgmi same hive mapping >> drm/amdkfd: copy memory through gart table >> drm/amdgpu: reserve fence slot to update page table >> drm/amdkfd: Add SVM API support capability bits >> >> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 + >>
Re: [PATCH] drm/amdgpu: Ensure that the modifier requested is supported by plane.
"friendly ping" On Wed, Mar 10, 2021 at 11:14 AM Mark Yacoub wrote: > From: Mark Yacoub > > On initializing the framebuffer, call drm_any_plane_has_format to do a > check if the modifier is supported. drm_any_plane_has_format calls > dm_plane_format_mod_supported which is extended to validate that the > modifier is on the list of the plane's supported modifiers. > > The bug was caught using igt-gpu-tools test: > kms_addfb_basic.addfb25-bad-modifier > > Tested on ChromeOS Zork by turning on the display, running an overlay > test, and running a YT video. > > Cc: Alex Deucher > Cc: Bas Nieuwenhuizen > Signed-off-by: default avatarMark Yacoub > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_display.c | 13 + > drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 9 + > 2 files changed, 22 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > index afa5f8ad0f563..a947b5aa420d2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c > @@ -908,6 +908,19 @@ int amdgpu_display_gem_fb_verify_and_init( > _fb_funcs); > if (ret) > goto err; > + /* Verify that the modifier is supported. */ > + if (!drm_any_plane_has_format(dev, mode_cmd->pixel_format, > + mode_cmd->modifier[0])) { > + struct drm_format_name_buf format_name; > + drm_dbg_kms(dev, > + "unsupported pixel format %s / modifier > 0x%llx\n", > + drm_get_format_name(mode_cmd->pixel_format, > + _name), > + mode_cmd->modifier[0]); > + > + ret = -EINVAL; > + goto err; > + } > > ret = amdgpu_display_framebuffer_init(dev, rfb, mode_cmd, obj); > if (ret) > diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > index 961abf1cf040c..21314024a83ce 100644 > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > @@ -3939,6 +3939,7 @@ static bool dm_plane_format_mod_supported(struct > drm_plane *plane, > { > struct amdgpu_device *adev = drm_to_adev(plane->dev); > const struct drm_format_info *info = drm_format_info(format); > + int i; > > enum dm_micro_swizzle microtile = > modifier_gfx9_swizzle_mode(modifier) & 3; > > @@ -3952,6 +3953,14 @@ static bool dm_plane_format_mod_supported(struct > drm_plane *plane, > if (modifier == DRM_FORMAT_MOD_LINEAR) > return true; > > + /* Check that the modifier is on the list of the plane's supported > modifiers. */ > + for (i = 0; i < plane->modifier_count; i++) { > + if (modifier == plane->modifiers[i]) > + break; > + } > + if (i == plane->modifier_count) > + return false; > + > /* > * The arbitrary tiling support for multiplane formats has not > been hooked > * up. > -- > 2.30.1.766.gb4fecdf3b7-goog > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote: > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: > > Am 20.03.21 um 14:17 schrieb Daniel Vetter: > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König > > > wrote: > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter: > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter: > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote: > > > > > > > > Don't print a warning when we fail to allocate a page for > > > > > > > > swapping things out. > > > > > > > > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead > > > > > > > > of GFP_NOFS. > > > > > > > Uh this part doesn't make sense. Especially since you only do it > > > > > > > for the > > > > > > > debugfs file, not in general. Which means you've just completely > > > > > > > broken > > > > > > > the shrinker. > > > > > > Are you sure? My impression is that GFP_NOFS should now work much > > > > > > more out > > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore(). > > > > > Yeah, if you'd put it in the right place :-) > > > > > > > > > > But also -mm folks are very clear that memalloc_no*() family is for > > > > > dire > > > > > situation where there's really no other way out. For anything where > > > > > you > > > > > know what you're doing, you really should use explicit gfp flags. > > > > My impression is just the other way around. You should try to avoid the > > > > NOFS/NOIO flags and use the memalloc_no* approach instead. > > > Where did you get that idea? > > > > Well from the kernel comment on GFP_NOFS: > > > > * %GFP_NOFS will use direct reclaim but will not use any filesystem > > interfaces. > > * Please try to avoid using this flag directly and instead use > > * memalloc_nofs_{save,restore} to mark the whole scope which > > cannot/shouldn't > > * recurse into the FS layer with a short explanation why. All allocation > > * requests will inherit GFP_NOFS implicitly. > > Huh that's interesting, since iirc Willy or Dave told me the opposite, and > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). > > Adding them, maybe I got confused. My impression is that the scoped API is preferred these days. https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html I'd probably need to spend a few months learning the DRM subsystem to have a more detailed opinion on whether passing GFP flags around explicitly or using the scope API is the better approach for your situation. I usually defer to Michal on these kinds of questions. > > > The kernel is full of explicit gfp_t flag > > > passing to make this as explicit as possible. The memalloc_no* stuff > > > is just for when you go through entire subsystems and really can't > > > wire it through. I can't find the discussion anymore, but that was the > > > advice I got from mm/fs people. > > > > > > One reason is that generally a small GFP_KERNEL allocation never > > > fails. But it absolutely can fail if it's in a memalloc_no* section, > > > and these kind of non-obvious non-local effects are a real pain in > > > testing and review. Hence explicit gfp_flag passing as much as > > > possible. I agree with this; it's definitely a problem with the scope API. I wanted to extend it to include GFP_NOWAIT, but if you do that, your chances of memory allocation failure go way up, so you really want to set __GFP_NOWARN too, but now you need to audit all the places that you're calling to be sure they really handle errors correctly. So I think I'm giving up on that patch set. > > > > > > > If this is just to paper over the seq_printf doing the wrong > > > > > > > allocations, > > > > > > > then just move that out from under the fs_reclaim_acquire/release > > > > > > > part. > > > > > > No, that wasn't the problem. > > > > > > > > > > > > We have just seen to many failures to allocate pages for swapout > > > > > > and I think > > > > > > that would improve this because in a lot of cases we can then > > > > > > immediately > > > > > > swap things out instead of having to rely on upper layers. > > > > > Yeah, you broke it. Now the real shrinker is running with GFP_KERNEL, > > > > > because your memalloc_no is only around the debugfs function. And ofc > > > > > it's > > > > > much easier to allocate with GFP_KERNEL, right until you deadlock :-) > > > > The problem here is that for example kswapd calls the shrinker without > > > > holding a FS lock as far as I can see. > > > > > > > > And it is rather sad that we can't optimize this case directly. > > > I'm still not clear what you want to optimize? You can check for "is > > > this kswapd" in pf flags, but that sounds very hairy and fragile. > > > > Well we only need the NOFS flag when the shrinker callback really comes
Re: [PATCH] amdgpu: avoid incorrect %hu format string
On 3/22/21 4:54 AM, Arnd Bergmann wrote: > From: Arnd Bergmann > > clang points out that the %hu format string does not match the type > of the variables here: > > drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type > 'unsigned short' but the argument has type 'unsigned int' [-Wformat] > version_major, version_minor); > ^ > include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR' > __drm_err(fmt, ##__VA_ARGS__) > ~~~^~~ > > Change it to a regular %u, the same way a previous patch did for > another instance of the same warning. It would be good to explicitly call out the change. ex/ do you mean mine ? 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier") This was for a different reason. imo, you do not need to include what another patch did. so you could also just remove this bit from the commit log. The change itself looks good. Reviewed-by: Tom Rix > > Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier") > Signed-off-by: Arnd Bergmann > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > index e2ed4689118a..c6dbc0801604 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c > @@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) > if ((adev->asic_type == CHIP_POLARIS10 || >adev->asic_type == CHIP_POLARIS11) && > (adev->uvd.fw_version < FW_1_66_16)) > - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is > too old.\n", > + DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is > too old.\n", > version_major, version_minor); > } else { > unsigned int enc_major, enc_minor, dec_minor; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()
On 3/22/21 09:04, Chen, Guchun wrote: > [AMD Public Use] > > Thanks for your patch, Silva. The issue has been fixed by " a5c6007e20e1 > drm/amd/display: fix modprobe failure on vega series". Great. :) Good to know this is already fixed. Thanks! -- Gustavo ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: 16 bpc fixed point (RGBA16) framebuffer support for core and AMD.
On Fri, Mar 19, 2021 at 10:03:12PM +0100, Mario Kleiner wrote: > Hi, > > this patch series adds the fourcc's for 16 bit fixed point unorm > framebuffers to the core, and then an implementation for AMD gpu's > with DisplayCore. > > This is intended to allow for pageflipping to, and direct scanout of, > Vulkan swapchain images in the format VK_FORMAT_R16G16B16A16_UNORM. > I have patched AMD's GPUOpen amdvlk OSS driver to enable this format > for swapchains, mapping to DRM_FORMAT_XBGR16161616: > Link: > https://github.com/kleinerm/pal/commit/a25d4802074b13a8d5f7edc96ae45469ecbac3c4 We should also add support for these formats into igt.a Should be semi-easy by just adding the suitable float<->uint16 conversion stuff. -- Ville Syrjälä Intel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/pm: drop redundant and unneeded BACO APIs V2
[AMD Official Use Only - Internal Distribution Only] Reviewed-by: Alex Deucher From: amd-gfx on behalf of Evan Quan Sent: Monday, March 22, 2021 2:11 AM To: amd-gfx@lists.freedesktop.org Cc: Quan, Evan Subject: [PATCH] drm/amd/pm: drop redundant and unneeded BACO APIs V2 Use other APIs which are with the same functionality but much more clean. V2: drop mediate unneeded interface Change-Id: I5e9e0ab5d39b49b02434f18e12392b13931396be Signed-off-by: Evan Quan --- drivers/gpu/drm/amd/amdgpu/nv.c | 25 +- drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h | 9 --- drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 95 --- 3 files changed, 3 insertions(+), 126 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index e9cc3201054f..46d4bbabce75 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -515,21 +515,9 @@ static int nv_asic_mode2_reset(struct amdgpu_device *adev) return ret; } -static bool nv_asic_supports_baco(struct amdgpu_device *adev) -{ - struct smu_context *smu = >smu; - - if (smu_baco_is_support(smu)) - return true; - else - return false; -} - static enum amd_reset_method nv_asic_reset_method(struct amdgpu_device *adev) { - struct smu_context *smu = >smu; - if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || amdgpu_reset_method == AMD_RESET_METHOD_BACO || @@ -548,7 +536,7 @@ nv_asic_reset_method(struct amdgpu_device *adev) case CHIP_DIMGREY_CAVEFISH: return AMD_RESET_METHOD_MODE1; default: - if (smu_baco_is_support(smu)) + if (amdgpu_dpm_is_baco_supported(adev)) return AMD_RESET_METHOD_BACO; else return AMD_RESET_METHOD_MODE1; @@ -558,7 +546,6 @@ nv_asic_reset_method(struct amdgpu_device *adev) static int nv_asic_reset(struct amdgpu_device *adev) { int ret = 0; - struct smu_context *smu = >smu; switch (nv_asic_reset_method(adev)) { case AMD_RESET_METHOD_PCI: @@ -567,13 +554,7 @@ static int nv_asic_reset(struct amdgpu_device *adev) break; case AMD_RESET_METHOD_BACO: dev_info(adev->dev, "BACO reset\n"); - - ret = smu_baco_enter(smu); - if (ret) - return ret; - ret = smu_baco_exit(smu); - if (ret) - return ret; + ret = amdgpu_dpm_baco_reset(adev); break; case AMD_RESET_METHOD_MODE2: dev_info(adev->dev, "MODE2 reset\n"); @@ -981,7 +962,7 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .need_full_reset = _need_full_reset, .need_reset_on_init = _need_reset_on_init, .get_pcie_replay_count = _get_pcie_replay_count, - .supports_baco = _asic_supports_baco, + .supports_baco = _dpm_is_baco_supported, .pre_asic_init = _pre_asic_init, .update_umd_stable_pstate = _update_umd_stable_pstate, .query_video_codecs = _query_video_codecs, diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 517f333fbc4b..02675155028d 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -1285,15 +1285,6 @@ int smu_get_power_limit(struct smu_context *smu, uint32_t *limit, enum smu_ppt_limit_level limit_level); -int smu_set_azalia_d3_pme(struct smu_context *smu); - -bool smu_baco_is_support(struct smu_context *smu); - -int smu_baco_get_state(struct smu_context *smu, enum smu_baco_state *state); - -int smu_baco_enter(struct smu_context *smu); -int smu_baco_exit(struct smu_context *smu); - bool smu_mode1_reset_is_support(struct smu_context *smu); bool smu_mode2_reset_is_support(struct smu_context *smu); int smu_mode1_reset(struct smu_context *smu); diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 4120d28f782b..1bb0c0966e3d 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -2682,48 +2682,6 @@ static int smu_set_xgmi_pstate(void *handle, return ret; } -int smu_set_azalia_d3_pme(struct smu_context *smu) -{ - int ret = 0; - - if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled) - return -EOPNOTSUPP; - - mutex_lock(>mutex); - - if (smu->ppt_funcs->set_azalia_d3_pme) - ret = smu->ppt_funcs->set_azalia_d3_pme(smu); - - mutex_unlock(>mutex); - - return ret; -} - -/* - * On system suspending or resetting, the dpm_enabled - * flag will be cleared. So that those SMU services which - * are not supported
Re: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak
On 15/03/2021 05:23, Zhang, Jack (Jian) wrote: [AMD Public Use] Hi, Rob/Tomeu/Steven, Would you please help to review this patch for panfrost driver? Thanks, Jack Zhang -Original Message- From: Jack Zhang Sent: Monday, March 15, 2021 1:21 PM To: dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; Koenig, Christian ; Grodzovsky, Andrey ; Liu, Monk ; Deng, Emily Cc: Zhang, Jack (Jian) Subject: [PATCH v3] drm/scheduler re-insert Bailing job to avoid memleak re-insert Bailing jobs to avoid memory leak. V2: move re-insert step to drm/scheduler logic V3: add panfrost's return value for bailing jobs in case it hits the memleak issue. This commit message could do with some work - it's really hard to decipher what the actual problem you're solving is. Signed-off-by: Jack Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c| 8 ++-- drivers/gpu/drm/panfrost/panfrost_job.c| 4 ++-- drivers/gpu/drm/scheduler/sched_main.c | 8 +++- include/drm/gpu_scheduler.h| 1 + 5 files changed, 19 insertions(+), 6 deletions(-) [...] diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index 6003cfeb1322..e2cb4f32dae1 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -444,7 +444,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job * spurious. Bail out. */ if (dma_fence_is_signaled(job->done_fence)) - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_BAILING; dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p", js, @@ -456,7 +456,7 @@ static enum drm_gpu_sched_stat panfrost_job_timedout(struct drm_sched_job /* Scheduler is already stopped, nothing to do. */ if (!panfrost_scheduler_stop(>js->queue[js], sched_job)) - return DRM_GPU_SCHED_STAT_NOMINAL; + return DRM_GPU_SCHED_STAT_BAILING; /* Schedule a reset if there's no reset in progress. */ if (!atomic_xchg(>reset.pending, 1)) This looks correct to me - in these two cases drm_sched_stop() is not called on the sched_job, so it looks like currently the job will be leaked. diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 92d8de24d0a1..a44f621fb5c4 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -314,6 +314,7 @@ static void drm_sched_job_timedout(struct work_struct *work) { struct drm_gpu_scheduler *sched; struct drm_sched_job *job; + int ret; sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work); @@ -331,8 +332,13 @@ static void drm_sched_job_timedout(struct work_struct *work) list_del_init(>list); spin_unlock(>job_list_lock); - job->sched->ops->timedout_job(job); + ret = job->sched->ops->timedout_job(job); + if (ret == DRM_GPU_SCHED_STAT_BAILING) { + spin_lock(>job_list_lock); + list_add(>node, >ring_mirror_list); + spin_unlock(>job_list_lock); + } I think we could really do with a comment somewhere explaining what "bailing" means in this context. For the Panfrost case we have two cases: * The GPU job actually finished while the timeout code was running (done_fence is signalled). * The GPU is already in the process of being reset (Panfrost has multiple queues, so mostly like a bad job in another queue). I'm also not convinced that (for Panfrost) it makes sense to be adding the jobs back to the list. For the first case above clearly the job could just be freed (it's complete). The second case is more interesting and Panfrost currently doesn't handle this well. In theory the driver could try to rescue the job ('soft stop' in Mali language) so that it could be resubmitted. Panfrost doesn't currently support that, so attempting to resubmit the job is almost certainly going to fail. It's on my TODO list to look at improving Panfrost in this regard, but sadly still quite far down. Steve /* * Guilty job did complete and hence needs to be manually removed * See drm_sched_stop doc. diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 4ea8606d91fe..8093ac2427ef 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -210,6 +210,7 @@ enum drm_gpu_sched_stat { DRM_GPU_SCHED_STAT_NONE, /* Reserve 0 */ DRM_GPU_SCHED_STAT_NOMINAL, DRM_GPU_SCHED_STAT_ENODEV, + DRM_GPU_SCHED_STAT_BAILING, }; /** ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org
Re: [PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset
On 2021-03-22 4:11 a.m., Lang Yu wrote: In amdggpu reset, while dm.dc_lock is held by dm_suspend, handle_hpd_rx_irq tries to acquire it. Deadlock occurred! Deadlock log: [ 104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin! [ 104.640084] == [ 104.640092] WARNING: possible circular locking dependency detected [ 104.640099] 5.11.0-custom #1 Tainted: GW E [ 104.640107] -- [ 104.640114] cat/1158 is trying to acquire lock: [ 104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: __flush_work+0x2e3/0x450 [ 104.640144] but task is already holding lock: [ 104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] [ 104.640581] which lock already depends on the new lock. [ 104.640590] the existing dependency chain (in reverse order) is: [ 104.640598] -> #2 (>dm.dc_lock){+.+.}-{3:3}: [ 104.640611]lock_acquire+0xca/0x390 [ 104.640623]__mutex_lock+0x9b/0x930 [ 104.640633]mutex_lock_nested+0x1b/0x20 [ 104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu] [ 104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641264]process_one_work+0x2a7/0x5b0 [ 104.641275]worker_thread+0x4a/0x3d0 [ 104.641283]kthread+0x125/0x160 [ 104.641290]ret_from_fork+0x22/0x30 [ 104.641300] -> #1 (>hpd_lock){+.+.}-{3:3}: [ 104.641312]lock_acquire+0xca/0x390 [ 104.641321]__mutex_lock+0x9b/0x930 [ 104.641328]mutex_lock_nested+0x1b/0x20 [ 104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu] [ 104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641931]process_one_work+0x2a7/0x5b0 [ 104.641940]worker_thread+0x4a/0x3d0 [ 104.641948]kthread+0x125/0x160 [ 104.641954]ret_from_fork+0x22/0x30 [ 104.641963] -> #0 ((work_completion)(>work)){+.+.}-{0:0}: [ 104.641975]check_prev_add+0x94/0xbf0 [ 104.641983]__lock_acquire+0x130d/0x1ce0 [ 104.641992]lock_acquire+0xca/0x390 [ 104.642000]__flush_work+0x303/0x450 [ 104.642008]flush_work+0x10/0x20 [ 104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu] [ 104.642312]dm_suspend+0x181/0x1d0 [amdgpu] [ 104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu] [ 104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu] [ 104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu] [ 104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu] [ 104.643715]gpu_recover_get+0x2e/0x60 [amdgpu] [ 104.643951]simple_attr_read+0x6d/0x110 [ 104.643960]debugfs_attr_read+0x49/0x70 [ 104.643970]full_proxy_read+0x5f/0x90 [ 104.643979]vfs_read+0xa3/0x190 [ 104.643986]ksys_read+0x70/0xf0 [ 104.643992]__x64_sys_read+0x1a/0x20 [ 104.643999]do_syscall_64+0x38/0x90 [ 104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 104.644017] other info that might help us debug this: [ 104.644026] Chain exists of: (work_completion)(>work) --> >hpd_lock --> >dm.dc_lock [ 104.644043] Possible unsafe locking scenario: [ 104.644049]CPU0CPU1 [ 104.644055] [ 104.644060] lock(>dm.dc_lock); [ 104.644066]lock(>hpd_lock); [ 104.644075]lock(>dm.dc_lock); [ 104.644083] lock((work_completion)(>work)); [ 104.644090] *** DEADLOCK *** [ 104.644096] 3 locks held by cat/1158: [ 104.644103] #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110 [ 104.644119] #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu] [ 104.644489] #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e176ea84d75b..8727488df769 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param) } } - mutex_lock(>dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_lock(>dm.dc_lock); #ifdef CONFIG_DRM_AMD_DC_HDCP result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL); #else result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL); #endif - mutex_unlock(>dm.dc_lock); + if (!amdgpu_in_reset(adev)) +
Re: [PATCH] drm/amd/display: Set AMDGPU_DM_DEFAULT_MIN_BACKLIGHT to 0
On Sun, Mar 21, 2021 at 8:12 PM Evan Benn wrote: > > On Sat, Mar 20, 2021 at 8:36 AM Alex Deucher wrote: > > > > On Fri, Mar 19, 2021 at 5:31 PM Evan Benn wrote: > > > > > > On Sat, 20 Mar 2021 at 02:10, Harry Wentland > > > wrote: > > > > On 2021-03-19 10:22 a.m., Alex Deucher wrote: > > > > > On Fri, Mar 19, 2021 at 3:23 AM Evan Benn > > > > > wrote: > > > > >> > > > > >> AMDGPU_DM_DEFAULT_MIN_BACKLIGHT was set to the value of 12 > > > > >> to ensure no display backlight will flicker at low user brightness > > > > >> settings. However this value is quite bright, so for devices that do > > > > >> not > > > > >> implement the ACPI ATIF > > > > >> ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS > > > > >> functionality the user cannot set the brightness to a low level even > > > > >> if > > > > >> the display would support such a low PWM. > > > > >> > > > > >> This ATIF feature is not implemented on for example AMD grunt > > > > >> chromebooks. > > > > >> > > > > >> Signed-off-by: Evan Benn > > > > >> > > > > >> --- > > > > >> I could not find a justification for the reason for the value. It has > > > > >> caused some noticable regression for users: > > > > >> https://bugzilla.kernel.org/show_bug.cgi?id=203439>>> > > > > >> Maybe this can be either user controlled or userspace configured, but > > > > >> preventing users from turning their backlight dim seems wrong. > > > > > > > > > > My understanding is that some panels flicker if you set the min to a > > > > > value too low. This was a safe minimum if the platform didn't specify > > > > > it's own safe minimum. I think we'd just be trading one bug for > > > > > another (flickering vs not dim enough). Maybe a whitelist or > > > > > blacklist would be a better solution? > > > > > > > > > > > > > Yeah, this is a NACK from me as-is for the reasons Alex described. > > > > > > Thanks Harry + Alex, > > > > > > I agree this solution is not the best. > > > > > > > > > > > I agree a whitelist approach might be best. > > > > > > Do you have any idea what an allowlist could be keyed on? > > > Is the flickering you observed here a function of the panel or the gpu > > > or some other component? > > > Maybe we could move the minimum level into the logic for that hardware. > > > > > > > Maybe the panel string from the EDID? Either that or something from > > dmi data? Harry would probably have a better idea. > > One problem with keying from panel EDID is that for example the grunt > chromebook > platform has more than 100 different panels already shipped. Add to that that > repair centers or people repairing their own device will use 'compatible' > panels. I'm sure the AMD windows laptops have even more variety! > Do all of those "compatible" panels work with the min backlight level of 0? If so, maybe something platform specific like a DMI string would make more sense. Alex > > > > Alex > > > > > > > > > > Is this fix perhaps for OLED panels? If so we could use a different > > > > min-value for OLED panels that don't do PWM, but use 12 for everything > > > > else. > > > > > > All the chromebooks I have worked with LCD + LED backlight have been > > > fine with a backlight set to 0. > > > We do have OLED panels too, but I'm not aware of what they do. > > > > > > > Harry > > > > > > > > > Alex > > > > > > > > > > > > > > >> > > > > >> Also reviewed here: > > > > >> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2748377>>> > > > > >> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- > > > > >> 1 file changed, 1 insertion(+), 1 deletion(-) > > > > >> > > > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > >> index 573cf17262da..0129bd69b94e 100644 > > > > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > >> @@ -3151,7 +3151,7 @@ static int amdgpu_dm_mode_config_init(struct > > > > >> amdgpu_device *adev) > > > > >> return 0; > > > > >> } > > > > >> > > > > >> -#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 > > > > >> +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 0 > > > > >> #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 > > > > >> #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 > > > > >> > > > > >> -- > > > > >> 2.31.0.291.g576ba9dcdaf-goog > > > > >> > > > > >> ___ > > > > >> dri-devel mailing list > > > > >> dri-de...@lists.freedesktop.org > > > > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel>> > > > > >> ___ > > > > > dri-devel mailing list > > > > > dri-de...@lists.freedesktop.org > > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel>> > > > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
On Mon, Mar 22, 2021 at 02:05:48PM +, Matthew Wilcox wrote: > On Mon, Mar 22, 2021 at 02:49:27PM +0100, Daniel Vetter wrote: > > On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: > > > Am 20.03.21 um 14:17 schrieb Daniel Vetter: > > > > On Sat, Mar 20, 2021 at 10:04 AM Christian König > > > > wrote: > > > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter: > > > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: > > > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter: > > > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote: > > > > > > > > > Don't print a warning when we fail to allocate a page for > > > > > > > > > swapping things out. > > > > > > > > > > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead > > > > > > > > > of GFP_NOFS. > > > > > > > > Uh this part doesn't make sense. Especially since you only do > > > > > > > > it for the > > > > > > > > debugfs file, not in general. Which means you've just > > > > > > > > completely broken > > > > > > > > the shrinker. > > > > > > > Are you sure? My impression is that GFP_NOFS should now work much > > > > > > > more out > > > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore(). > > > > > > Yeah, if you'd put it in the right place :-) > > > > > > > > > > > > But also -mm folks are very clear that memalloc_no*() family is for > > > > > > dire > > > > > > situation where there's really no other way out. For anything where > > > > > > you > > > > > > know what you're doing, you really should use explicit gfp flags. > > > > > My impression is just the other way around. You should try to avoid > > > > > the > > > > > NOFS/NOIO flags and use the memalloc_no* approach instead. > > > > Where did you get that idea? > > > > > > Well from the kernel comment on GFP_NOFS: > > > > > > * %GFP_NOFS will use direct reclaim but will not use any filesystem > > > interfaces. > > > * Please try to avoid using this flag directly and instead use > > > * memalloc_nofs_{save,restore} to mark the whole scope which > > > cannot/shouldn't > > > * recurse into the FS layer with a short explanation why. All allocation > > > * requests will inherit GFP_NOFS implicitly. > > > > Huh that's interesting, since iirc Willy or Dave told me the opposite, and > > the memalloc_no* stuff is for e.g. nfs calling into network layer (needs > > GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). > > > > Adding them, maybe I got confused. > > My impression is that the scoped API is preferred these days. > > https://www.kernel.org/doc/html/latest/core-api/gfp_mask-from-fs-io.html > > I'd probably need to spend a few months learning the DRM subsystem to > have a more detailed opinion on whether passing GFP flags around explicitly > or using the scope API is the better approach for your situation. Atm it's a single allocation in the ttm shrinker that's already explicitly using GFP_NOFS that we're talking about here. The scoped api might make sense for gpu scheduler, where we really operate under GFP_NOWAIT for somewhat awkward reasons. But also I thought at least for GFP_NOIO you generally need a mempool and think about how you guarantee forward progress anyway. Is that also a bit outdated thinking, and nowadays we could operate under the assumption that this Just Works? Given that GFP_NOFS seems to fall over already for us I'm not super sure about that ... > I usually defer to Michal on these kinds of questions. > > > > > The kernel is full of explicit gfp_t flag > > > > passing to make this as explicit as possible. The memalloc_no* stuff > > > > is just for when you go through entire subsystems and really can't > > > > wire it through. I can't find the discussion anymore, but that was the > > > > advice I got from mm/fs people. > > > > > > > > One reason is that generally a small GFP_KERNEL allocation never > > > > fails. But it absolutely can fail if it's in a memalloc_no* section, > > > > and these kind of non-obvious non-local effects are a real pain in > > > > testing and review. Hence explicit gfp_flag passing as much as > > > > possible. > > I agree with this; it's definitely a problem with the scope API. I wanted > to extend it to include GFP_NOWAIT, but if you do that, your chances of > memory allocation failure go way up, so you really want to set __GFP_NOWARN > too, but now you need to audit all the places that you're calling to be > sure they really handle errors correctly. > > So I think I'm giving up on that patch set. Yeah the auditing is what scares me, and why at least personally I prefer explicit gfp flags. It's much easier to debug a lockdep splat involving fs_reclaim than memory allocation failures leading to very strange bugs because we're not handling the allocation failure properly (or maybe not even at all). -Daniel > > > > > > > > > If this is just to paper over the seq_printf doing the wrong > > > > > > > >
Re: [PATCH 00/44] Add HMM-based SVM memory manager to KFD v2
On Mon, Mar 22, 2021 at 06:58:16AM -0400, Felix Kuehling wrote: > Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33 > are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about > a week ago. The remaining 11 patches are current work-in-progress with > further cleanup and fixes. > > MMU notifiers and CPU page faults now can split ranges and update our range > data structures without taking heavy locks by doing some of the critical > work in a deferred work handler. This includes updating MMU notifiers and > the SVM range interval tree. In the mean time, new ranges can live as > children of their parent ranges until the deferred work handler consolidates > them in the main interval tree. I'm totally swammped with intel stuff unfortunately, so not really time to dig in. Can you give me the spoiler on how the (gfx10+ iirc) page fault inversion is planned to be handled now? Or that still tbd? Other thing I noticed is that amdkfd still uses the mmu_notifier directly, and not the mmu_interval_notifier. But you're talking a lot about managing intervals here, and so I'm wondering whether we shouldn't do this in core code? Everyone will have the same painful locking problems here (well atm everyone = you only I think), sharing this imo would make a ton of sense. I think the other one is moving over more generic pasid code, but I think that's going to be less useful here and maybe more a long term project. Cheers, Daniel > > We also added proper DMA mapping of system memory pages. > > Current work in progress is cleaning up all the locking, simplifying our > code and data structures and resolving a few known bugs. > > This series and the corresponding ROCm Thunk and KFDTest changes are also > available on gitub: > https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip > > https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip > > An updated Thunk > > Alex Sierra (10): > drm/amdgpu: replace per_device_list by array > drm/amdkfd: helper to convert gpu id and idx > drm/amdkfd: add xnack enabled flag to kfd_process > drm/amdkfd: add ioctl to configure and query xnack retries > drm/amdgpu: enable 48-bit IH timestamp counter > drm/amdkfd: SVM API call to restore page tables > drm/amdkfd: add svm_bo reference for eviction fence > drm/amdgpu: add param bit flag to create SVM BOs > drm/amdgpu: svm bo enable_signal call condition > drm/amdgpu: add svm_bo eviction to enable_signal cb > > Felix Kuehling (22): > drm/amdkfd: map svm range to GPUs > drm/amdkfd: svm range eviction and restore > drm/amdkfd: validate vram svm range from TTM > drm/amdkfd: HMM migrate ram to vram > drm/amdkfd: HMM migrate vram to ram > drm/amdkfd: invalidate tables on page retry fault > drm/amdkfd: page table restore through svm API > drm/amdkfd: add svm_bo eviction mechanism support > drm/amdkfd: refine migration policy with xnack on > drm/amdkfd: add svm range validate timestamp > drm/amdkfd: multiple gpu migrate vram to vram > drm/amdkfd: Fix dma unmapping > drm/amdkfd: Call mutex_destroy > drm/amdkfd: Fix spurious restore failures > drm/amdkfd: Fix svm_bo_list locking in eviction worker > drm/amdkfd: Simplify split_by_granularity > drm/amdkfd: Point out several race conditions > drm/amdkfd: Return pdd from kfd_process_device_from_gduid > drm/amdkfd: Remove broken deferred mapping > drm/amdkfd: Allow invalid pages in migration.src > drm/amdkfd: Correct locking during migration and mapping > drm/amdkfd: Nested locking and invalidation of child ranges > > Philip Yang (12): > drm/amdkfd: add svm ioctl API > drm/amdkfd: register svm range > drm/amdkfd: add svm ioctl GET_ATTR op > drm/amdgpu: add common HMM get pages function > drm/amdkfd: validate svm range system memory > drm/amdkfd: deregister svm range > drm/amdgpu: export vm update mapping interface > drm/amdkfd: register HMM device private zone > drm/amdkfd: support xgmi same hive mapping > drm/amdkfd: copy memory through gart table > drm/amdgpu: reserve fence slot to update page table > drm/amdkfd: Add SVM API support capability bits > > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 16 +- > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 83 + > drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 + > drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 + > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 90 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 48 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 11 + > drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 + > drivers/gpu/drm/amd/amdkfd/Kconfig|1 + > drivers/gpu/drm/amd/amdkfd/Makefile |4 +- >
RE: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()
[AMD Public Use] Thanks for your patch, Silva. The issue has been fixed by " a5c6007e20e1 drm/amd/display: fix modprobe failure on vega series". Regards, Guchun -Original Message- From: amd-gfx On Behalf Of Gustavo A. R. Silva Sent: Monday, March 22, 2021 8:51 PM To: Lee Jones ; Wentland, Harry ; Li, Sun peng (Leo) ; Deucher, Alexander ; Koenig, Christian ; David Airlie ; Daniel Vetter Cc: Gustavo A. R. Silva ; dri-de...@lists.freedesktop.org; amd-gfx@lists.freedesktop.org; linux-ker...@vger.kernel.org Subject: [PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init() The wrong sizeof values are currently being used as arguments to kzalloc(). Fix this by using the right arguments *dceip and *vbios, correspondingly. Addresses-Coverity-ID: 1502901 ("Wrong sizeof argument") Fixes: fca1e079055e ("drm/amd/display/dc/calcs/dce_calcs: Remove some large variables from the stack") Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 556ecfabc8d2..1244fcb0f446 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id); - dceip = kzalloc(sizeof(dceip), GFP_KERNEL); + dceip = kzalloc(sizeof(*dceip), GFP_KERNEL); if (!dceip) return; - vbios = kzalloc(sizeof(vbios), GFP_KERNEL); + vbios = kzalloc(sizeof(*vbios), GFP_KERNEL); if (!vbios) { kfree(dceip); return; -- 2.27.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=04%7C01%7Cguchun.chen%40amd.com%7C4ec6ae20f70a488fd2dd08d8ed3987cd%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637520178643844637%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000sdata=YKVR3n%2FnX50dwuP91T1xPxW%2FvgisWDY0dvF8PxO4P4A%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdgpu: Introduce new SETUP_TMR interface
[AMD Official Use Only - Internal Distribution Only] Hello all, Can someone help to review below patches? We verified with firmware team and want to check-in together with psp firmware Regards, Oak On 2021-03-12, 4:24 PM, "Zeng, Oak" wrote: This new interface passes both virtual and physical address to PSP. It is backword compatible with old interface. Signed-off-by: Oak Zeng --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 13 ++--- drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h | 11 ++- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index cd3eda9..99e1a3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -328,8 +328,13 @@ psp_cmd_submit_buf(struct psp_context *psp, static void psp_prep_tmr_cmd_buf(struct psp_context *psp, struct psp_gfx_cmd_resp *cmd, - uint64_t tmr_mc, uint32_t size) + uint64_t tmr_mc, struct amdgpu_bo *tmr_bo) { +struct amdgpu_device *adev = psp->adev; +uint32_t size = amdgpu_bo_size(tmr_bo); +uint64_t tmr_pa = amdgpu_bo_gpu_offset(tmr_bo) + +adev->vm_manager.vram_base_offset - adev->gmc.vram_start; + if (amdgpu_sriov_vf(psp->adev)) cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; else @@ -337,6 +342,9 @@ static void psp_prep_tmr_cmd_buf(struct psp_context *psp, cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; +cmd->cmd.cmd_setup_tmr.bitfield.virt_phy_addr = 1; +cmd->cmd.cmd_setup_tmr.system_phy_addr_lo = lower_32_bits(tmr_pa); +cmd->cmd.cmd_setup_tmr.system_phy_addr_hi = upper_32_bits(tmr_pa); } static void psp_prep_load_toc_cmd_buf(struct psp_gfx_cmd_resp *cmd, @@ -456,8 +464,7 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; -psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, - amdgpu_bo_size(psp->tmr_bo)); +psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, psp->tmr_bo); DRM_INFO("reserve 0x%lx from 0x%llx for PSP TMR\n", amdgpu_bo_size(psp->tmr_bo), psp->tmr_mc_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index a41b054..604a1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -170,10 +170,19 @@ struct psp_gfx_cmd_setup_tmr uint32_tbuf_phy_addr_lo; /* bits [31:0] of GPU Virtual address of TMR buffer (must be 4 KB aligned) */ uint32_tbuf_phy_addr_hi; /* bits [63:32] of GPU Virtual address of TMR buffer */ uint32_tbuf_size; /* buffer size in bytes (must be multiple of 4 KB) */ +union { +struct { +uint32_tsriov_enabled:1; /* whether the device runs under SR-IOV*/ +uint32_tvirt_phy_addr:1; /* driver passes both virtual and physical address to PSP*/ +uint32_treserved:30; +} bitfield; +uint32_ttmr_flags; +}; +uint32_tsystem_phy_addr_lo;/* bits [31:0] of system physical address of TMR buffer (must be 4 KB aligned) */ +uint32_tsystem_phy_addr_hi;/* bits [63:32] of system physical address of TMR buffer */ }; - /* FW types for GFX_CMD_ID_LOAD_IP_FW command. Limit 31. */ enum psp_gfx_fw_type { GFX_FW_TYPE_NONE= 0,/* */ -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH V2] drm/amdgpu: Fix a typo
On Sat, Mar 20, 2021 at 3:52 AM Randy Dunlap wrote: > > > > On Fri, 19 Mar 2021, Bhaskar Chowdhury wrote: > > > s/traing/training/ > > > > ...Plus the entire sentence construction for better readability. > > > > Signed-off-by: Bhaskar Chowdhury > > --- > > Changes from V1: > > Alex and Randy's suggestions incorporated. > > > > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 8 > > 1 file changed, 4 insertions(+), 4 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > > b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > > index c325d6f53a71..bf3857867f51 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > > +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c > > @@ -661,10 +661,10 @@ static int psp_v11_0_memory_training(struct > > psp_context *psp, uint32_t ops) > > > > if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { > > /* > > - * Long traing will encroach certain mount of bottom VRAM, > > - * saving the content of this bottom VRAM to system memory > > - * before training, and restoring it after training to avoid > > - * VRAM corruption. > > + * Long training will encroach a certain amount on the bottom > > of VRAM; > > + * save the content from the bottom VRAM to system memory > > + * before training, and restore it after training to avoid > > + * VRAM corruption. > > These 3 new lines are indented with spaces instead of tabs. Oops. :( > > (I may be too late with this comment -- sorry about that.) I fixed that up when I applied it. Thanks, Alex > > >*/ > > sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; > > > > -- > > 2.26.2 > > > > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH][next] drm/amd/display: Fix sizeof arguments in bw_calcs_init()
The wrong sizeof values are currently being used as arguments to kzalloc(). Fix this by using the right arguments *dceip and *vbios, correspondingly. Addresses-Coverity-ID: 1502901 ("Wrong sizeof argument") Fixes: fca1e079055e ("drm/amd/display/dc/calcs/dce_calcs: Remove some large variables from the stack") Signed-off-by: Gustavo A. R. Silva --- drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 556ecfabc8d2..1244fcb0f446 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id); - dceip = kzalloc(sizeof(dceip), GFP_KERNEL); + dceip = kzalloc(sizeof(*dceip), GFP_KERNEL); if (!dceip) return; - vbios = kzalloc(sizeof(vbios), GFP_KERNEL); + vbios = kzalloc(sizeof(*vbios), GFP_KERNEL); if (!vbios) { kfree(dceip); return; -- 2.27.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure
On Sun, Mar 21, 2021 at 03:18:28PM +0100, Christian König wrote: > Am 20.03.21 um 14:17 schrieb Daniel Vetter: > > On Sat, Mar 20, 2021 at 10:04 AM Christian König > > wrote: > > > Am 19.03.21 um 20:06 schrieb Daniel Vetter: > > > > On Fri, Mar 19, 2021 at 07:53:48PM +0100, Christian König wrote: > > > > > Am 19.03.21 um 18:52 schrieb Daniel Vetter: > > > > > > On Fri, Mar 19, 2021 at 03:08:57PM +0100, Christian König wrote: > > > > > > > Don't print a warning when we fail to allocate a page for > > > > > > > swapping things out. > > > > > > > > > > > > > > Also rely on memalloc_nofs_save/memalloc_nofs_restore instead of > > > > > > > GFP_NOFS. > > > > > > Uh this part doesn't make sense. Especially since you only do it > > > > > > for the > > > > > > debugfs file, not in general. Which means you've just completely > > > > > > broken > > > > > > the shrinker. > > > > > Are you sure? My impression is that GFP_NOFS should now work much > > > > > more out > > > > > of the box with the memalloc_nofs_save()/memalloc_nofs_restore(). > > > > Yeah, if you'd put it in the right place :-) > > > > > > > > But also -mm folks are very clear that memalloc_no*() family is for dire > > > > situation where there's really no other way out. For anything where you > > > > know what you're doing, you really should use explicit gfp flags. > > > My impression is just the other way around. You should try to avoid the > > > NOFS/NOIO flags and use the memalloc_no* approach instead. > > Where did you get that idea? > > Well from the kernel comment on GFP_NOFS: > > * %GFP_NOFS will use direct reclaim but will not use any filesystem > interfaces. > * Please try to avoid using this flag directly and instead use > * memalloc_nofs_{save,restore} to mark the whole scope which > cannot/shouldn't > * recurse into the FS layer with a short explanation why. All allocation > * requests will inherit GFP_NOFS implicitly. Huh that's interesting, since iirc Willy or Dave told me the opposite, and the memalloc_no* stuff is for e.g. nfs calling into network layer (needs GFP_NOFS) or swap on top of a filesystems (even needs GFP_NOIO I think). Adding them, maybe I got confused. > > The kernel is full of explicit gfp_t flag > > passing to make this as explicit as possible. The memalloc_no* stuff > > is just for when you go through entire subsystems and really can't > > wire it through. I can't find the discussion anymore, but that was the > > advice I got from mm/fs people. > > > > One reason is that generally a small GFP_KERNEL allocation never > > fails. But it absolutely can fail if it's in a memalloc_no* section, > > and these kind of non-obvious non-local effects are a real pain in > > testing and review. Hence explicit gfp_flag passing as much as > > possible. > > > > > > > > If this is just to paper over the seq_printf doing the wrong > > > > > > allocations, > > > > > > then just move that out from under the fs_reclaim_acquire/release > > > > > > part. > > > > > No, that wasn't the problem. > > > > > > > > > > We have just seen to many failures to allocate pages for swapout and > > > > > I think > > > > > that would improve this because in a lot of cases we can then > > > > > immediately > > > > > swap things out instead of having to rely on upper layers. > > > > Yeah, you broke it. Now the real shrinker is running with GFP_KERNEL, > > > > because your memalloc_no is only around the debugfs function. And ofc > > > > it's > > > > much easier to allocate with GFP_KERNEL, right until you deadlock :-) > > > The problem here is that for example kswapd calls the shrinker without > > > holding a FS lock as far as I can see. > > > > > > And it is rather sad that we can't optimize this case directly. > > I'm still not clear what you want to optimize? You can check for "is > > this kswapd" in pf flags, but that sounds very hairy and fragile. > > Well we only need the NOFS flag when the shrinker callback really comes from > a memory shortage in the FS subsystem, and that is rather unlikely. > > When we would allow all other cases to be able to directly IO the freed up > pages to swap it would certainly help. tbh I'm not sure. i915-gem code has played tricks with special casing the kswapd path, and they do kinda scare me at least. I'm not sure whether there's not some hidden dependencies there that would make this a bad idea. Like afaik direct reclaim can sometimes stall for kswapd to catch up a bit, or at least did in the past (I think, really not much clue about this) The other thing is that the fs_reclaim_acquire/release annotation really only works well if you use it outside of the direct reclaim path too. Otherwise it's not much better than just lots of testing. That pretty much means you have to annotate the kswapd path. -Daniel > > Christian. > > > -Daniel > > > > > Anyway you are right if some caller doesn't use the memalloc_no*() > > > approach we are busted. > > > > > > Going to change the patch to only not
Re: [PATCH] drm/ttm: stop warning on TT shrinker failure v2
On Mon, Mar 22, 2021 at 12:22 PM Christian König wrote: > > Don't print a warning when we fail to allocate a page for swapping things out. > > v2: only stop the warning > > Signed-off-by: Christian König Reviewed-by: Daniel Vetter It is kinda surprising that page allocator warns here even though we explicitly asked for non-GFP_KERNEL (which is the only one where you pretty much can assume you will get memory no matter what, since worst case the OOM killer makes space for you). But then with memalloc_no* and friends these failures could happen in unexpected places, and I think the code that warns isn't aware of the original gfp flags, so makes some sense from an implementation pov. -Daniel > --- > drivers/gpu/drm/ttm/ttm_tt.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c > index 2f0833c98d2c..48b9a650630b 100644 > --- a/drivers/gpu/drm/ttm/ttm_tt.c > +++ b/drivers/gpu/drm/ttm/ttm_tt.c > @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker > *shrink, > }; > int ret; > > - ret = ttm_bo_swapout(, GFP_NOFS); > + ret = ttm_bo_swapout(, GFP_NOFS | __GFP_NOWARN); > return ret < 0 ? SHRINK_EMPTY : ret; > } > > -- > 2.25.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 29/44] drm/amdgpu: reserve fence slot to update page table
Am 22.03.21 um 11:58 schrieb Felix Kuehling: From: Philip Yang Forgot to reserve a fence slot to use sdma to update page table, cause below kernel BUG backtrace to handle vm retry fault while application is exiting. [ 133.048143] kernel BUG at /home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281! [ 133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu] [ 133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280 [ 133.048672] amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu] [ 133.048788] amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu] [ 133.048905] amdgpu_vm_handle_fault+0x202/0x370 [amdgpu] [ 133.049031] gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu] [ 133.049165] ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu] [ 133.049289] ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049408] amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049534] amdgpu_ih_process+0x9b/0x1c0 [amdgpu] [ 133.049657] amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu] [ 133.049669] process_one_work+0x29f/0x640 [ 133.049678] worker_thread+0x39/0x3f0 [ 133.049685] ? process_one_work+0x640/0x640 Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling Reviewed-by: Christian König I'm going to push this to amd-staging-drm-next since it is really an independent bug fix. Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a61df234f012..3e32f76cd7bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; - long r; + int r; bool is_compute_context = false; spin_lock(>vm_manager.pasid_lock); @@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } + r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + if (r) { + pr_debug("failed %d to reserve fence slot\n", r); + goto error_unlock; + } + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, addr, flags, value, NULL, NULL, NULL); @@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%ld)\n", r); + DRM_ERROR("Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 00/14] DC Patches March 22, 2021
[AMD Public Use] Hi all, This week this patchset was tested on a HP Envy 360, with Ryzen 5 4500U, on the following display types (via usb-c to dp/dvi/hdmi/vga): 4k 60z, 1440p 144hz, 1680*1050 60hz, internal eDP 1080p 60hz Tested on a Sapphire Pulse RX5700XT on the following display types (via DP): 4k60 60hz, 1440p 144hz, 1680x1050 60hz. Also tested on a Reference AMD RX6800 on the following display types (via DP): 4k60 60hz, 1440p 144hz. Also used a MST hub at 2x 4k 30hz on all systems. Did not see a visual impact from the patchset tested. Thank you, Dan Wheeler Technologist | AMD SW Display O +(1) 905-882-2600 ext. 74665 -- 1 Commerce Valley Dr E, Thornhill, ON L3T 7X6 Facebook | Twitter | amd.com -Original Message- From: amd-gfx On Behalf Of Solomon Chiu Sent: March 19, 2021 9:47 PM To: amd-gfx@lists.freedesktop.org Cc: Brol, Eryk ; Chiu, Solomon ; Li, Sun peng (Leo) ; Wentland, Harry ; Zhuo, Qingqing ; Siqueira, Rodrigo ; Jacob, Anson ; Pillai, Aurabindo ; Lakha, Bhawanpreet ; R, Bindu Subject: [PATCH 00/14] DC Patches March 22, 2021 This DC patchset brings improvements in multiple areas. In summary, we highlight: * Populate socclk entries for dcn2.1 * hide VGH asic specific structs * Add kernel doc to crc_rd_wrk field * revert max lb lines change * Log DMCUB trace buffer events * Fix debugfs link_settings entry * revert max lb use by default for n10 * Deallocate IRQ handlers on amdgpu_dm_irq_fini * Fixed Clock Recovery Sequence * Fix UBSAN: shift-out-of-bounds warning * [FW Promotion] Release 0.0.57 * Change input parameter for set_drr * Use pwrseq instance to determine eDP instance Alvin Lee (1): drm/amd/display: Change input parameter for set_drr Anson Jacob (1): drm/amd/display: Fix UBSAN: shift-out-of-bounds warning Anthony Koo (1): drm/amd/display: [FW Promotion] Release 0.0.57 Aric Cyr (1): drm/amd/display: 3.2.128 David Galiffi (1): drm/amd/display: Fixed Clock Recovery Sequence Dmytro Laktyushkin (3): drm/amd/display: hide VGH asic specific structs drm/amd/display: revert max lb lines change drm/amd/display: revert max lb use by default for n10 Fangzhi Zuo (1): drm/amd/display: Fix debugfs link_settings entry Jake Wang (1): drm/amd/display: Use pwrseq instance to determine eDP instance Leo (Hanghong) Ma (1): drm/amd/display: Log DMCUB trace buffer events Roman Li (1): drm/amd/display: Populate socclk entries for dcn2.1 Victor Lu (1): drm/amd/display: Deallocate IRQ handlers on amdgpu_dm_irq_fini Wayne Lin (1): drm/amd/display: Add kernel doc to crc_rd_wrk field .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 48 .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 14 +++ .../amd/display/amdgpu_dm/amdgpu_dm_debugfs.c | 15 +-- .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 12 +- .../drm/amd/display/amdgpu_dm/amdgpu_dm_irq.c | 71 +++ .../amd/display/amdgpu_dm/amdgpu_dm_trace.h | 21 .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 116 +- .../amd/display/dc/clk_mgr/dcn21/rn_clk_mgr.c | 13 ++ .../display/dc/clk_mgr/dcn301/vg_clk_mgr.c| 101 +++ .../display/dc/clk_mgr/dcn301/vg_clk_mgr.h| 28 ++--- drivers/gpu/drm/amd/display/dc/core/dc.c | 10 +- drivers/gpu/drm/amd/display/dc/core/dc_link.c | 8 +- .../gpu/drm/amd/display/dc/core/dc_link_dp.c | 10 +- drivers/gpu/drm/amd/display/dc/dc.h | 2 +- .../display/dc/dce110/dce110_hw_sequencer.c | 9 +- .../amd/display/dc/dcn10/dcn10_hw_sequencer.c | 14 +-- .../amd/display/dc/dcn10/dcn10_hw_sequencer.h | 3 +- .../drm/amd/display/dc/dcn10/dcn10_resource.c | 4 +- .../drm/amd/display/dc/dcn20/dcn20_resource.c | 6 +- .../drm/amd/display/dc/dcn21/dcn21_resource.c | 3 +- .../drm/amd/display/dc/dcn30/dcn30_resource.c | 2 +- .../amd/display/dc/dcn301/dcn301_resource.c | 2 +- .../amd/display/dc/dcn302/dcn302_resource.c | 2 +- .../dc/dml/dcn20/display_rq_dlg_calc_20.c | 28 - .../dc/dml/dcn20/display_rq_dlg_calc_20v2.c | 28 - .../dc/dml/dcn21/display_rq_dlg_calc_21.c | 28 - .../dc/dml/dcn30/display_rq_dlg_calc_30.c | 28 - .../display/dc/dml/dml1_display_rq_dlg_calc.c | 28 - .../gpu/drm/amd/display/dc/inc/hw/clk_mgr.h | 9 -- .../gpu/drm/amd/display/dc/inc/hw_sequencer.h | 3 +- .../display/dc/irq/dcn21/irq_service_dcn21.c | 32 - .../display/dc/irq/dcn30/irq_service_dcn30.c | 32 - .../gpu/drm/amd/display/dmub/inc/dmub_cmd.h | 4 +- .../amd/display/modules/freesync/freesync.c | 37 -- .../amd/display/modules/inc/mod_freesync.h| 7 +- 35 files changed, 581 insertions(+), 197 deletions(-) -- 2.29.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org
Re: [RESEND 00/53] Rid GPU from W=1 warnings
On Fri, 19 Mar 2021, Daniel Vetter wrote: > On Fri, Mar 19, 2021 at 08:24:07AM +, Lee Jones wrote: > > On Thu, 18 Mar 2021, Daniel Vetter wrote: > > > > > On Wed, Mar 17, 2021 at 9:32 PM Daniel Vetter wrote: > > > > > > > > On Wed, Mar 17, 2021 at 9:17 AM Lee Jones wrote: > > > > > > > > > > On Thu, 11 Mar 2021, Lee Jones wrote: > > > > > > > > > > > On Thu, 11 Mar 2021, Daniel Vetter wrote: > > > > > > > > > > > > > On Mon, Mar 08, 2021 at 09:19:32AM +, Lee Jones wrote: > > > > > > > > On Fri, 05 Mar 2021, Roland Scheidegger wrote: > > > > > > > > > > > > > > > > > The vmwgfx ones look all good to me, so for > > > > > > > > > 23-53: Reviewed-by: Roland Scheidegger > > > > > > > > > That said, they were already signed off by Zack, so not sure > > > > > > > > > what > > > > > > > > > happened here. > > > > > > > > > > > > > > > > Yes, they were accepted at one point, then dropped without a > > > > > > > > reason. > > > > > > > > > > > > > > > > Since I rebased onto the latest -next, I had to pluck them back > > > > > > > > out of > > > > > > > > a previous one. > > > > > > > > > > > > > > They should show up in linux-next again. We merge patches for > > > > > > > next merge > > > > > > > window even during the current merge window, but need to make > > > > > > > sure they > > > > > > > don't pollute linux-next. Occasionally the cut off is wrong so > > > > > > > patches > > > > > > > show up, and then get pulled again. > > > > > > > > > > > > > > Unfortunately especially the 5.12 merge cycle was very wobbly due > > > > > > > to some > > > > > > > confusion here. But your patches should all be in linux-next > > > > > > > again (they > > > > > > > are queued up for 5.13 in drm-misc-next, I checked that). > > > > > > > > > > > > > > Sorry for the confusion here. > > > > > > > > > > > > Oh, I see. Well so long as they don't get dropped, I'll be happy. > > > > > > > > > > > > Thanks for the explanation Daniel > > > > > > > > > > After rebasing today, all of my GPU patches have remained. Would > > > > > someone be kind enough to check that everything is still in order > > > > > please? > > > > > > > > It's still broken somehow. I've kiced Maxime and Maarten again, > > > > they're also on this thread. > > > > > > You're patches have made it into drm-next meanwhile, so they should > > > show up in linux-next through that tree at least. Except if that one > > > also has some trouble. > > > > Thanks for letting me know. > > > > I see some patches made it back in, others didn't. > > > > I'll resend the stragglers - bear with. > > The vmwgfx ones should all be back, the others I guess just werent ever > applied. I'll vacuum them all up if you resend. Apologies for the wobbly > ride. NP, it happens. -- Lee Jones [李琼斯] Senior Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/display: fix modprobe failure on vega series
On Mon, 22 Mar 2021, Guchun Chen wrote: > Fixes: d88b34caee83 ("Remove some large variables from the stack") > > [ 41.232097] Call Trace: > [ 41.232105] kvasprintf+0x66/0xd0 > [ 41.232122] kasprintf+0x49/0x70 > [ 41.232136] __drm_crtc_init_with_planes+0x2e1/0x340 [drm] > [ 41.232219] ? create_object+0x263/0x3b0 > [ 41.232231] drm_crtc_init_with_planes+0x46/0x60 [drm] > [ 41.232303] amdgpu_dm_init+0x69c/0x1750 [amdgpu] > [ 41.232998] ? phm_wait_for_register_unequal.part.1+0x58/0x90 [amdgpu] > [ 41.233662] ? smu9_wait_for_response+0x7d/0xa0 [amdgpu] > [ 41.234294] ? smu9_send_msg_to_smc_with_parameter+0x77/0xd0 [amdgpu] > [ 41.234912] ? smum_send_msg_to_smc_with_parameter+0x96/0x100 [amdgpu] > [ 41.235520] ? psm_set_states+0x5c/0x60 [amdgpu] > [ 41.236165] dm_hw_init+0x12/0x20 [amdgpu] > [ 41.236834] amdgpu_device_init+0x1402/0x1df0 [amdgpu] > [ 41.237314] amdgpu_driver_load_kms+0x65/0x320 [amdgpu] > [ 41.237780] amdgpu_pci_probe+0x150/0x250 [amdgpu] > [ 41.238240] local_pci_probe+0x47/0xa0 > [ 41.238253] pci_device_probe+0x10b/0x1c0 > [ 41.238265] really_probe+0xf5/0x4c0 > [ 41.238275] driver_probe_device+0xe8/0x150 > [ 41.238284] device_driver_attach+0x58/0x60 > [ 41.238293] __driver_attach+0xa3/0x140 > [ 41.238301] ? device_driver_attach+0x60/0x60 > [ 41.238309] ? device_driver_attach+0x60/0x60 > [ 41.238317] bus_for_each_dev+0x74/0xb0 > [ 41.238330] ? kmem_cache_alloc_trace+0x31a/0x470 > [ 41.238341] driver_attach+0x1e/0x20 > [ 41.238348] bus_add_driver+0x14a/0x220 > [ 41.238357] ? 0xc0f09000 > [ 41.238364] driver_register+0x60/0x100 > [ 41.238373] ? 0xc0f09000 > [ 41.238379] __pci_register_driver+0x54/0x60 > [ 41.238389] amdgpu_init+0x68/0x1000 [amdgpu] > [ 41.238836] do_one_initcall+0x48/0x1e0 > [ 41.238850] ? kmem_cache_alloc_trace+0x31a/0x470 > [ 41.238862] do_init_module+0x5f/0x224 > [ 41.238876] load_module+0x266b/0x2ec0 > [ 41.238887] ? security_kernel_post_read_file+0x5c/0x70 > [ 41.238905] __do_sys_finit_module+0xc1/0x120 > [ 41.238913] ? __do_sys_finit_module+0xc1/0x120 > [ 41.238924] __x64_sys_finit_module+0x1a/0x20 > [ 41.238932] do_syscall_64+0x37/0x80 > [ 41.238942] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > > Signed-off-by: Guchun Chen > --- > drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c > b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c > index 556ecfabc8d2..1244fcb0f446 100644 > --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c > +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c > @@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, > > enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id); > > - dceip = kzalloc(sizeof(dceip), GFP_KERNEL); > + dceip = kzalloc(sizeof(*dceip), GFP_KERNEL); > if (!dceip) > return; > > - vbios = kzalloc(sizeof(vbios), GFP_KERNEL); > + vbios = kzalloc(sizeof(*vbios), GFP_KERNEL); > if (!vbios) { > kfree(dceip); > return; Oh yes, of course. Looks like it was missed in review too. Sorry about the school boy error! Reviewed-by: Lee Jones -- Lee Jones [李琼斯] Senior Technical Lead - Developer Services Linaro.org │ Open source software for Arm SoCs Follow Linaro: Facebook | Twitter | Blog ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drivers: gpu: Remove duplicate include of amdgpu_hdp.h
amdgpu_hdp.h has been included at line 91, so remove the duplicate include. Signed-off-by: Wan Jiabing --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 49267eb64302..68836c22ef25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,7 +107,6 @@ #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" #include "amdgpu_smuio.h" -#include "amdgpu_hdp.h" #define MAX_GPU_INSTANCE 16 -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/pm/powerplay/smumgr/smu7_smumgr: Fix some typo error
From: wengjianfeng change 'addres' to 'address' Signed-off-by: wengjianfeng --- drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c index aae2524..0de9e0e 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/smumgr/smu7_smumgr.c @@ -97,7 +97,7 @@ int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, addr = smc_start_address; while (byte_count >= 4) { - /* Bytes are written into the SMC addres space with the MSB first. */ + /* Bytes are written into the SMC address space with the MSB first. */ data = src[0] * 0x100 + src[1] * 0x1 + src[2] * 0x100 + src[3]; result = smu7_set_smc_sram_address(hwmgr, addr, limit); @@ -127,7 +127,7 @@ int smu7_copy_bytes_to_smc(struct pp_hwmgr *hwmgr, uint32_t smc_start_address, extra_shift = 8 * (4 - byte_count); while (byte_count > 0) { - /* Bytes are written into the SMC addres space with the MSB first. */ + /* Bytes are written into the SMC address space with the MSB first. */ data = (0x100 * data) + *src++; byte_count--; } -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
[AMD Public Use] Hi Christian, I will conduct one stress test for this tomorrow. Would you mind waiting for my ack before submitting? Regards, Guchun -Original Message- From: Christian König Sent: Monday, March 22, 2021 8:41 PM To: amd-gfx@lists.freedesktop.org Cc: Chen, Guchun ; Das, Nirmoy Subject: [PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code"" Now that we found the underlying problem we can re-apply this patch. This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9268db1172bd..bc3951b71079 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -37,6 +37,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "amdgpu_res_cursor.h" /** * DOC: GPUVM @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr - * @nodes: array of drm_mm_nodes with the MC addresses + * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, + struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; - uint64_t pfn; int r; memset(, 0, sizeof(params)); @@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, else sync_mode = AMDGPU_SYNC_EXPLICIT; - pfn = offset >> PAGE_SHIFT; - if (nodes) { - while (pfn >= nodes->size) { - pfn -= nodes->size; - ++nodes; - } - } - amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - do { + amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, +); + while (cursor.remaining) { uint64_t tmp, num_entries, addr; - - num_entries = last - start + 1; - if (nodes) { - addr = nodes->start << PAGE_SHIFT; - num_entries = min((nodes->size - pfn) * - AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries); - } else { - addr = 0; - } - + num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; if (pages_addr) { bool contiguous = true; if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { + uint64_t pfn = cursor.start >> PAGE_SHIFT; uint64_t count; contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } if (!contiguous) { - addr = pfn << PAGE_SHIFT; + addr = cursor.start; params.pages_addr = pages_addr; } else { - addr = pages_addr[pfn]; + addr = pages_addr[cursor.start >> PAGE_SHIFT]; params.pages_addr = NULL; } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr += bo_adev->vm_manager.vram_base_offset; - addr += pfn << PAGE_SHIFT; + addr = bo_adev->vm_manager.vram_base_offset + + cursor.start; + } else { + addr = 0; } tmp = start + num_entries; @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; -
[PATCH] drm/amdgpu: re-apply "use the new cursor in the VM code""
Now that we found the underlying problem we can re-apply this patch. This reverts commit 867fee7f8821ff42e7308088cf0c3450ac49c17c. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 +- 1 file changed, 18 insertions(+), 37 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9268db1172bd..bc3951b71079 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -37,6 +37,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "amdgpu_res_cursor.h" /** * DOC: GPUVM @@ -1583,7 +1584,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * @last: last mapped entry * @flags: flags for the entries * @offset: offset into nodes and pages_addr - * @nodes: array of drm_mm_nodes with the MC addresses + * @res: ttm_resource to map * @pages_addr: DMA addresses to use for mapping * @fence: optional resulting fence * @@ -1598,13 +1599,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, bool unlocked, struct dma_resv *resv, uint64_t start, uint64_t last, uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, + struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { struct amdgpu_vm_update_params params; + struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; - uint64_t pfn; int r; memset(, 0, sizeof(params)); @@ -1622,14 +1623,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, else sync_mode = AMDGPU_SYNC_EXPLICIT; - pfn = offset >> PAGE_SHIFT; - if (nodes) { - while (pfn >= nodes->size) { - pfn -= nodes->size; - ++nodes; - } - } - amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; @@ -1648,23 +1641,17 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - do { + amdgpu_res_first(res, offset, (last - start + 1) * AMDGPU_GPU_PAGE_SIZE, +); + while (cursor.remaining) { uint64_t tmp, num_entries, addr; - - num_entries = last - start + 1; - if (nodes) { - addr = nodes->start << PAGE_SHIFT; - num_entries = min((nodes->size - pfn) * - AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries); - } else { - addr = 0; - } - + num_entries = cursor.size >> AMDGPU_GPU_PAGE_SHIFT; if (pages_addr) { bool contiguous = true; if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) { + uint64_t pfn = cursor.start >> PAGE_SHIFT; uint64_t count; contiguous = pages_addr[pfn + 1] == @@ -1684,16 +1671,18 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } if (!contiguous) { - addr = pfn << PAGE_SHIFT; + addr = cursor.start; params.pages_addr = pages_addr; } else { - addr = pages_addr[pfn]; + addr = pages_addr[cursor.start >> PAGE_SHIFT]; params.pages_addr = NULL; } } else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) { - addr += bo_adev->vm_manager.vram_base_offset; - addr += pfn << PAGE_SHIFT; + addr = bo_adev->vm_manager.vram_base_offset + + cursor.start; + } else { + addr = 0; } tmp = start + num_entries; @@ -1701,14 +1690,9 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_unlock; - pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; - if (nodes && nodes->size == pfn) { - pfn = 0; - ++nodes; - } + amdgpu_res_next(, num_entries * AMDGPU_GPU_PAGE_SIZE); start = tmp; - - } while (unlikely(start != last + 1)); + }; r =
Re: [PATCH] drivers: gpu: Remove duplicate include of amdgpu_hdp.h
Am 22.03.21 um 13:02 schrieb Wan Jiabing: amdgpu_hdp.h has been included at line 91, so remove the duplicate include. Signed-off-by: Wan Jiabing Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 49267eb64302..68836c22ef25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -107,7 +107,6 @@ #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" #include "amdgpu_smuio.h" -#include "amdgpu_hdp.h" #define MAX_GPU_INSTANCE 16 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] amdgpu: avoid incorrect %hu format string
Am 22.03.21 um 12:54 schrieb Arnd Bergmann: From: Arnd Bergmann clang points out that the %hu format string does not match the type of the variables here: drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] version_major, version_minor); ^ include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR' __drm_err(fmt, ##__VA_ARGS__) ~~~^~~ Change it to a regular %u, the same way a previous patch did for another instance of the same warning. Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier") Signed-off-by: Arnd Bergmann Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e2ed4689118a..c6dbc0801604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) if ((adev->asic_type == CHIP_POLARIS10 || adev->asic_type == CHIP_POLARIS11) && (adev->uvd.fw_version < FW_1_66_16)) - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", + DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n", version_major, version_minor); } else { unsigned int enc_major, enc_minor, dec_minor; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] amdgpu: avoid incorrect %hu format string
From: Arnd Bergmann clang points out that the %hu format string does not match the type of the variables here: drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c:263:7: warning: format specifies type 'unsigned short' but the argument has type 'unsigned int' [-Wformat] version_major, version_minor); ^ include/drm/drm_print.h:498:19: note: expanded from macro 'DRM_ERROR' __drm_err(fmt, ##__VA_ARGS__) ~~~^~~ Change it to a regular %u, the same way a previous patch did for another instance of the same warning. Fixes: 0b437e64e0af ("drm/amdgpu: remove h from printk format specifier") Signed-off-by: Arnd Bergmann --- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e2ed4689118a..c6dbc0801604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -259,7 +259,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) if ((adev->asic_type == CHIP_POLARIS10 || adev->asic_type == CHIP_POLARIS11) && (adev->uvd.fw_version < FW_1_66_16)) - DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n", + DRM_ERROR("POLARIS10/11 UVD firmware version %u.%u is too old.\n", version_major, version_minor); } else { unsigned int enc_major, enc_minor, dec_minor; -- 2.29.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/ttm: stop warning on TT shrinker failure v2
Don't print a warning when we fail to allocate a page for swapping things out. v2: only stop the warning Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_tt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c index 2f0833c98d2c..48b9a650630b 100644 --- a/drivers/gpu/drm/ttm/ttm_tt.c +++ b/drivers/gpu/drm/ttm/ttm_tt.c @@ -369,7 +369,7 @@ static unsigned long ttm_tt_shrinker_scan(struct shrinker *shrink, }; int ret; - ret = ttm_bo_swapout(, GFP_NOFS); + ret = ttm_bo_swapout(, GFP_NOFS | __GFP_NOWARN); return ret < 0 ? SHRINK_EMPTY : ret; } -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 38/44] drm/amdkfd: Simplify split_by_granularity
svm_range_split_by_granularity always added the parent range and only the parent range to the update list for the caller to add it to the deferred work list. So just do that in the caller unconditionally and eliminate the update_list parameter. Split the range so that the original prange is always the one that will be migrated. That way we can eliminate the pmigrate parameter and simplify the code further. Update the outdated documentation. Change-Id: Ifdc8d29b2abda67478e0d41daf5b46b861802ae7 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 29 -- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 73 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 3 files changed, 30 insertions(+), 76 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index da2ff655812e..5c8b32873086 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -780,12 +780,10 @@ svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { unsigned long addr = vmf->address; - struct list_head update_list; - struct svm_range *pmigrate; struct vm_area_struct *vma; + enum svm_work_list_ops op; struct svm_range *parent; struct svm_range *prange; - struct svm_range *next; struct kfd_process *p; struct mm_struct *mm; int r = 0; @@ -816,31 +814,24 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) goto out_unlock_prange; svm_range_lock(parent); - r = svm_range_split_by_granularity(p, mm, addr, parent, prange, - , _list); + r = svm_range_split_by_granularity(p, mm, addr, parent, prange); svm_range_unlock(parent); if (r) { pr_debug("failed %d to split range by granularity\n", r); goto out_unlock_prange; } - r = svm_migrate_vram_to_ram(pmigrate, mm); + r = svm_migrate_vram_to_ram(prange, mm); if (r) pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, -pmigrate, pmigrate->start, pmigrate->last); - - list_for_each_entry_safe(prange, next, _list, update_list) { - enum svm_work_list_ops op; - - /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ - if (p->xnack_enabled && prange == pmigrate) - op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; - else - op = SVM_OP_UPDATE_RANGE_NOTIFIER; +prange, prange->start, prange->last); - svm_range_add_list_work(>svms, prange, mm, op); - list_del_init(>update_list); - } + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && parent == prange) + op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; + else + op = SVM_OP_UPDATE_RANGE_NOTIFIER; + svm_range_add_list_work(>svms, parent, mm, op); schedule_deferred_list_work(>svms); out_unlock_prange: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3a7030d9f331..fbcb1491e987 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1005,16 +1005,14 @@ void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, * * @p: the process with svms list * @mm: mm structure + * @addr: the vm fault address in pages, to split the prange * @parent: parent range if prange is from child list * @prange: prange to split - * @addr: the vm fault address in pages, to split the prange - * @pmigrate: output, the range to be migrated to ram - * @update_list: output, the ranges to update notifier * - * Collects small ranges that make up one migration granule and splits the first - * and the last range at the granularity boundary + * Trims @prange to be a single aligned block of prange->granularity if + * possible. The head and tail are added to the child_list in @parent. * - * Context: caller hold svms lock + * Context: caller must hold mmap_read_lock and prange->lock * * Return: * 0 - OK, otherwise error code @@ -1022,75 +1020,42 @@ void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, unsigned long addr, struct svm_range *parent, - struct svm_range *prange, - struct svm_range **pmigrate, - struct list_head *update_list) + struct svm_range *prange) { - struct svm_range *tail; - struct svm_range *new; - unsigned long start; - unsigned long
[PATCH 40/44] drm/amdkfd: Return pdd from kfd_process_device_from_gduid
This saves callers from looking up the pdd with a linear search later. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 8 +++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 - drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++- 3 files changed, 29 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 2ccfdb218198..ca44547c46a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -865,11 +865,15 @@ struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, uint32_t gpu_idx, uint32_t *gpuid); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); -int kfd_process_device_from_gpuidx(struct kfd_process *p, - uint32_t gpu_idx, struct kfd_dev **gpu); int kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx); + +static inline struct kfd_process_device *kfd_process_device_from_gpuidx( + struct kfd_process *p, uint32_t gpuidx) { + return gpuidx < p->n_pdds ? p->pdds[gpuidx] : NULL; +} + void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); int kfd_process_restore_queues(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 48ea6f393353..cc988bf6057d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1674,16 +1674,6 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) return -EINVAL; } -int kfd_process_device_from_gpuidx(struct kfd_process *p, - uint32_t gpu_idx, struct kfd_dev **gpu) -{ - if (gpu_idx < p->n_pdds) { - *gpu = p->pdds[gpu_idx]->dev; - return 0; - } - return -EINVAL; -} - int kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, uint32_t *gpuid, uint32_t *gpuidx) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c48fe2f276b9..081d6bb75b09 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -175,12 +175,11 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, void svm_range_free_dma_mappings(struct svm_range *prange) { - struct kfd_dev *kfd_dev; + struct kfd_process_device *pdd; dma_addr_t *dma_addr; struct device *dev; struct kfd_process *p; uint32_t gpuidx; - int r; p = container_of(prange->svms, struct kfd_process, svms); @@ -189,12 +188,12 @@ void svm_range_free_dma_mappings(struct svm_range *prange) if (!dma_addr) continue; - r = kfd_process_device_from_gpuidx(p, gpuidx, _dev); - if (r) { + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { pr_debug("failed to find device idx %d\n", gpuidx); - return; + continue; } - dev = _dev->pdev->dev; + dev = >dev->pdev->dev; svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); kvfree(dma_addr); prange->dma_addr[gpuidx] = NULL; @@ -549,10 +548,9 @@ void svm_range_vram_node_free(struct svm_range *prange) struct amdgpu_device * svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) { + struct kfd_process_device *pdd; struct kfd_process *p; - struct kfd_dev *dev; int32_t gpu_idx; - int r; p = container_of(prange->svms, struct kfd_process, svms); @@ -561,13 +559,13 @@ svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) pr_debug("failed to get device by id 0x%x\n", gpu_id); return NULL; } - r = kfd_process_device_from_gpuidx(p, gpu_idx, ); - if (r < 0) { + pdd = kfd_process_device_from_gpuidx(p, gpu_idx); + if (!pdd) { pr_debug("failed to get device by idx 0x%x\n", gpu_idx); return NULL; } - return (struct amdgpu_device *)dev->kgd; + return (struct amdgpu_device *)pdd->dev->kgd; } static int svm_range_validate_vram(struct svm_range *prange) @@ -1120,7 +1118,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, struct dma_fence *fence = NULL; struct amdgpu_device *adev; struct kfd_process *p; - struct kfd_dev *dev; uint32_t
[PATCH 42/44] drm/amdkfd: Allow invalid pages in migration.src
This can happen when syste memory page were never allocated. Skip them during the migration. 0-initialize the BO. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 50 ++-- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 6748c5db64f5..87561b907543 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -310,7 +310,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, src = scratch; dst = (uint64_t *)(scratch + npages); - r = svm_range_vram_node_new(adev, prange, false); + r = svm_range_vram_node_new(adev, prange, true); if (r) { pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); goto out; @@ -328,17 +328,6 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, for (i = j = 0; i < npages; i++) { struct page *spage; - spage = migrate_pfn_to_page(migrate->src[i]); - src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); - r = dma_mapping_error(dev, src[i]); - if (r) { - pr_debug("failed %d dma_map_page\n", r); - goto out_free_vram_pages; - } - - pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", -src[i] >> PAGE_SHIFT, page_to_pfn(spage)); - dst[i] = vram_addr + (j << PAGE_SHIFT); migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); svm_migrate_get_vram_page(prange, migrate->dst[i]); @@ -346,6 +335,43 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate->dst[i] = migrate_pfn(migrate->dst[i]); migrate->dst[i] |= MIGRATE_PFN_LOCKED; + if (migrate->src[i] & MIGRATE_PFN_VALID) { + spage = migrate_pfn_to_page(migrate->src[i]); + src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, + DMA_TO_DEVICE); + r = dma_mapping_error(dev, src[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_free_vram_pages; + } + } else { + if (j) { + j--; + r = svm_migrate_copy_memory_gart( + adev, src + i - j, + dst + i - j, j + 1, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + offset = j; + vram_addr = (node->start + offset) << PAGE_SHIFT; + j = 0; + } + offset++; + vram_addr += PAGE_SIZE; + if (offset >= node->size) { + node++; + pr_debug("next node size 0x%llx\n", node->size); + vram_addr = node->start << PAGE_SHIFT; + offset = 0; + } + continue; + } + + pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", +src[i] >> PAGE_SHIFT, page_to_pfn(spage)); + if (j + offset >= node->size - 1 && i < npages - 1) { r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j + 1, -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 41/44] drm/amdkfd: Remove broken deferred mapping
Mapping without validation is broken. Also removed saving the pages from the last migration. They may be invalidated without an MMU notifier to catch it, so let the next proper validation take care of it. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 14 -- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 13 ++--- 2 files changed, 2 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 101d1f71db84..6748c5db64f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -539,18 +539,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, src = (uint64_t *)(scratch + npages); dst = scratch; - /* FIXME: Is it legal to hold on to this page array? We don't have -* proper references to the pages and we may not have an MMU notifier -* set up for the range at this point that could invalidate it (if -* it's a child range). -*/ - prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr), - GFP_KERNEL | __GFP_ZERO); - if (!prange->pages_addr) { - r = -ENOMEM; - goto out_oom; - } - for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { struct page *spage; @@ -590,8 +578,6 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); - prange->pages_addr[i] = page_to_pfn(dpage); - migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); migrate->dst[i] |= MIGRATE_PFN_LOCKED; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 081d6bb75b09..aedb2c84131e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1695,7 +1695,6 @@ static void svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) { struct mm_struct *mm = prange->work_item.mm; - int r; switch (prange->work_item.op) { case SVM_OP_NULL: @@ -1718,11 +1717,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); svm_range_update_notifier_and_interval_tree(mm, prange); - /* FIXME: need to validate somewhere */ - r = svm_range_map_to_gpus(prange, true); - if (r) - pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n", -r, svms, prange->start, prange->last); + /* TODO: implement deferred validation and mapping */ break; case SVM_OP_ADD_RANGE: pr_debug("add 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, @@ -1735,11 +1730,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) prange, prange->start, prange->last); svm_range_add_to_svms(prange); svm_range_add_notifier_locked(mm, prange); - /* FIXME: need to validate somewhere */ - r = svm_range_map_to_gpus(prange, true); - if (r) - pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n", -r, svms, prange->start, prange->last); + /* TODO: implement deferred validation and mapping */ break; default: WARN_ONCE(1, "Unknown prange 0x%p work op %d\n", prange, -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 43/44] drm/amdkfd: Correct locking during migration and mapping
This fixes potential race conditions between any code that validates and maps SVM ranges and MMU notifiers. The whole sequence is encapsulated in svm_range_validate_and_map. The page_addr and hmm_range structures are not useful outside that function, so they were removed from struct svm_range. Validation of system memory pages before migration to VRAM is maintained as an explicit workaround. It should not be needed, but without it there are still some page locking deadlocks to be investigated. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 573 --- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 9 +- 3 files changed, 310 insertions(+), 278 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 87561b907543..4d79d69d8aac 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -466,9 +466,6 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, migrate_vma_finalize(); } - kvfree(prange->pages_addr); - prange->pages_addr = NULL; - svm_range_dma_unmap(adev->dev, scratch, 0, npages); svm_range_free_dma_mappings(prange); @@ -513,6 +510,9 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, prange->start, prange->last, best_loc); + /* FIXME: workaround for page locking bug with invalid pages */ + svm_range_prefault(prange, mm); + start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index aedb2c84131e..0a6e28f0dcaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -39,6 +39,16 @@ */ #define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 +struct svm_validate_context { + struct kfd_process *process; + struct svm_range *prange; + bool intr; + unsigned long bitmap[MAX_GPU_INSTANCE]; + struct ttm_validate_buffer tv[MAX_GPU_INSTANCE+1]; + struct list_head validate_list; + struct ww_acquire_ctx ticket; +}; + static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -118,17 +128,14 @@ static void svm_range_remove_notifier(struct svm_range *prange) } static int -svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr, - unsigned long *pages_addr, uint64_t npages) +svm_range_dma_map_dev(struct device *dev, dma_addr_t **dma_addr, + unsigned long *hmm_pfns, uint64_t npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = *dma_addr; struct page *page; int i, r; - if (!pages_addr) - return 0; - if (!addr) { addr = kvmalloc_array(npages, sizeof(*addr), GFP_KERNEL | __GFP_ZERO); @@ -142,7 +149,7 @@ svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr, "leaking dma mapping\n")) dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); - page = hmm_pfn_to_page(pages_addr[i]); + page = hmm_pfn_to_page(hmm_pfns[i]); addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); r = dma_mapping_error(dev, addr[i]); if (r) { @@ -155,6 +162,37 @@ svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr, return 0; } +static int +svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, + unsigned long *hmm_pfns) +{ + struct kfd_process *p; + uint32_t gpuidx; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + struct kfd_process_device *pdd; + struct amdgpu_device *adev; + + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to find device idx %d\n", gpuidx); + return -EINVAL; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + r = svm_range_dma_map_dev(adev->dev, >dma_addr[gpuidx], + hmm_pfns, prange->npages); + if (r) + break; + } + + return r; +} + void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages) { @@ -207,7 +245,6 @@ static void svm_range_free(struct
[PATCH 34/44] drm/amdkfd: Fix dma unmapping
Don't dma_unmap in unmap_from_gpu. The dma_addr arrays are protected by the migrate_mutex, which we cannot hold when unmapping in MMU notifiers. Instead dma_unmap and free dma_addr arrays whenever the pages_array is invalidated: when migrating to VRAM and when re-validating RAM. Freeing dma_addr arrays in svm_migrate_vma_to_vram fixes a bug where the stale system memory pages were mapped instead of VRAM after a migration. When freeing dma_addr arrays, ignore the access bitmasks. Those masks may have changed since the dma_addr arrays were allocated and mapped. Change-Id: I01a6121c4c9908c1da4f303e87dcafd509fabc86 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 16 +++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 + 3 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index cf71b98303c2..da2ff655812e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -444,6 +444,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, prange->pages_addr = NULL; svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_free_dma_mappings(prange); out_free: kvfree(buf); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6c46d43bf613..3eea8f87724d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -173,9 +173,8 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -static void svm_range_free_dma_mappings(struct svm_range *prange) +void svm_range_free_dma_mappings(struct svm_range *prange) { - DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_dev *kfd_dev; dma_addr_t *dma_addr; struct device *dev; @@ -184,13 +183,8 @@ static void svm_range_free_dma_mappings(struct svm_range *prange) int r; p = container_of(prange->svms, struct kfd_process, svms); - if (p->xnack_enabled) - bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); - else - bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, - MAX_GPU_INSTANCE); - for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { dma_addr = prange->dma_addr[gpuidx]; if (!dma_addr) continue; @@ -292,6 +286,7 @@ svm_range_validate_ram(struct mm_struct *mm, struct svm_range *prange) } kvfree(prange->pages_addr); + svm_range_free_dma_mappings(prange); prange->pages_addr = prange->hmm_range->hmm_pfns; prange->hmm_range->hmm_pfns = NULL; @@ -1192,11 +1187,6 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, if (r) break; } - - svm_range_dma_unmap(adev->dev, prange->dma_addr[gpuidx], - start - prange->start, - last - start + 1); - amdgpu_amdkfd_flush_gpu_tlb_pasid((struct kgd_dev *)adev, p->pasid); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index fea9c63b5f95..b2ab920ab884 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -175,5 +175,6 @@ void svm_range_add_list_work(struct svm_range_list *svms, void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); +void svm_range_free_dma_mappings(struct svm_range *prange); #endif /* KFD_SVM_H_ */ -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 44/44] drm/amdkfd: Nested locking and invalidation of child ranges
This allows validation of child ranges, so the GPU page fault handler can be more light-weight. Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 8 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 40 +--- 2 files changed, 37 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 4d79d69d8aac..cc8bf6438383 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -826,12 +826,18 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) } mutex_lock(>migrate_mutex); + if (prange != parent) + mutex_lock_nested(>migrate_mutex, 1); if (!prange->actual_loc) goto out_unlock_prange; svm_range_lock(parent); + if (prange != parent) + mutex_lock_nested(>lock, 1); r = svm_range_split_by_granularity(p, mm, addr, parent, prange); + if (prange != parent) + mutex_unlock(>lock); svm_range_unlock(parent); if (r) { pr_debug("failed %d to split range by granularity\n", r); @@ -852,6 +858,8 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) schedule_deferred_list_work(>svms); out_unlock_prange: + if (prange != parent) + mutex_unlock(>migrate_mutex); mutex_unlock(>migrate_mutex); out: mutex_unlock(>svms.lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0a6e28f0dcaf..bc1a9e9ba722 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1281,10 +1281,6 @@ void svm_range_unreserve_bos(struct svm_validate_context *ctx) * serialize concurrent migrations or validations of the same range, the * prange->migrate_mutex must be held. * - * The range must be in the inverval tree and have an MMU notifier to catch - * concurrent invalidations of the virtual address range. This means it cannot - * be a child range. - * * For VRAM ranges, the SVM BO must be allocated and valid (protected by its * eviction fence. * @@ -1568,10 +1564,24 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, schedule_delayed_work(>restore_work, msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); } else { + struct svm_range *pchild; + unsigned long s, l; + pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", prange->svms, start, last); svm_range_lock(prange); - svm_range_unmap_from_gpus(prange, start, last); + list_for_each_entry(pchild, >child_list, child_list) { + mutex_lock_nested(>lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); + mutex_unlock(>lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); svm_range_unlock(prange); } @@ -1927,6 +1937,7 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, struct svm_range_list *svms; struct svm_range *pchild; struct kfd_process *p; + unsigned long s, l; bool unmap_parent; p = kfd_lookup_process_by_mm(mm); @@ -1937,14 +1948,23 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last, start, last); - svm_range_unmap_from_gpus(prange, start, last); - svm_range_lock(prange); unmap_parent = start <= prange->start && last >= prange->last; - list_for_each_entry(pchild, >child_list, child_list) + list_for_each_entry(pchild, >child_list, child_list) { + mutex_lock_nested(>lock, 1); + s = max(start, pchild->start); + l = min(last, pchild->last); + if (l >= s) + svm_range_unmap_from_gpus(pchild, s, l); svm_range_unmap_split(mm, prange, pchild, start, last); + mutex_unlock(>lock); + } + s = max(start, prange->start); + l = min(last, prange->last); + if (l >= s) + svm_range_unmap_from_gpus(prange, s, l); svm_range_unmap_split(mm, prange, prange, start, last); svm_range_unlock(prange); @@ -2142,12 +2162,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } -
[PATCH 32/44] drm/amdkfd: multiple gpu migrate vram to vram
If prefetch range to gpu with acutal location is another gpu, or GPU retry fault restore pages to migrate the range with acutal location is gpu, then migrate from one gpu to another gpu. Use system memory as bridge because sdma engine may not able to access another gpu vram, use sdma of source gpu to migrate to system memory, then use sdma of destination gpu to migrate from system memory to gpu. Print out gpuid or gpuidx in debug messages. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 4 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 +--- 3 files changed, 96 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index cd89b38e3d9b..cf71b98303c2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -462,8 +462,9 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) +static int +svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -723,6 +724,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) return r; } +/** + * svm_migrate_vram_to_vram - migrate svm range from device to device + * @prange: range structure + * @best_loc: the device to migrate to + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, +struct mm_struct *mm) +{ + int r; + + /* +* TODO: for both devices with PCIe large bar or on same xgmi hive, skip +* system memory as migration bridge +*/ + + pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + return r; + + return svm_migrate_ram_to_vram(prange, best_loc, mm); +} + +int +svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + if (!prange->actual_loc) + return svm_migrate_ram_to_vram(prange, best_loc, mm); + else + return svm_migrate_vram_to_vram(prange, best_loc, mm); + +} + /** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 9949b55d3b6a..bc680619d135 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -37,8 +37,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm); +int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index f4b4fea06ac9..6c46d43bf613 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -363,8 +363,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(_bo->kref, svm_range_bo_release); } -static bool svm_range_validate_svm_bo(struct svm_range *prange) +static bool +svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) { + struct amdgpu_device *bo_adev; + mutex_lock(>lock); if (!prange->svm_bo) { mutex_unlock(>lock); @@ -376,6 +379,22 @@ static bool svm_range_validate_svm_bo(struct svm_range *prange) return true; } if (svm_bo_ref_unless_zero(prange->svm_bo)) { + /* +* Migrate from GPU to GPU, remove range from source bo_adev +* svm_bo range list, and return false to allocate svm_bo from +* destination adev. +*/ + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + if (bo_adev != adev) { + mutex_unlock(>lock); + + spin_lock(>svm_bo->list_lock); + list_del_init(>svm_bo_list); + spin_unlock(>svm_bo->list_lock); + +
[PATCH 37/44] drm/amdkfd: Fix svm_bo_list locking in eviction worker
Take the svm_bo_list spin lock when iterating of the range list during eviction. Change-Id: I979d959e06c32e114cea8d151933b8ee7455627e Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 19 +-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 49aca4664411..3a7030d9f331 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2494,7 +2494,6 @@ int svm_range_schedule_evict_svm_bo(struct amdgpu_amdkfd_fence *fence) static void svm_range_evict_svm_bo_worker(struct work_struct *work) { struct svm_range_bo *svm_bo; - struct svm_range *prange; struct kfd_process *p; struct mm_struct *mm; @@ -2511,13 +2510,29 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) return; mmap_read_lock(mm); - list_for_each_entry(prange, _bo->range_list, svm_bo_list) { + spin_lock(_bo->list_lock); + while (!list_empty(_bo->range_list)) { + struct svm_range *prange = + list_first_entry(_bo->range_list, + struct svm_range, svm_bo_list); + list_del_init(>svm_bo_list); + spin_unlock(_bo->list_lock); + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, prange->last); + mutex_lock(>migrate_mutex); svm_migrate_vram_to_ram(prange, svm_bo->eviction_fence->mm); + + mutex_lock(>lock); + prange->svm_bo = NULL; + mutex_unlock(>lock); + mutex_unlock(>migrate_mutex); + + spin_lock(_bo->list_lock); } + spin_unlock(_bo->list_lock); mmap_read_unlock(mm); dma_fence_signal(_bo->eviction_fence->base); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 33/44] drm/amdkfd: Add SVM API support capability bits
From: Philip Yang SVMAPISupported property added to HSA_CAPABILITY, the value match HSA_CAPABILITY defined in Thunk spec: SVMAPISupported: it will not be supported on older kernels that don't have HMM or on systems with GFXv8 or older GPUs without support for 48-bit virtual addresses. CoherentHostAccess property added to HSA_MEMORYPROPERTY, the value match HSA_MEMORYPROPERTY defined in Thunk spec: CoherentHostAccess: whether or not device memory can be coherently accessed by the host CPU. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 ++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 ++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index cdef608db4f4..ab9fe854b4d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1419,6 +1419,12 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.capability |= (adev->ras_features != 0) ? HSA_CAP_RASEVENTNOTIFY : 0; + /* SVM API and HMM page migration work together, device memory type +* is initalized to not 0 when page migration register device memory. +*/ + if (adev->kfd.dev->pgmap.type != 0) + dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; + kfd_debug_print_topology(); if (!res) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index b8b68087bd7a..6bd6380b0ee0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -53,8 +53,9 @@ #define HSA_CAP_ASIC_REVISION_MASK 0x03c0 #define HSA_CAP_ASIC_REVISION_SHIFT22 #define HSA_CAP_SRAM_EDCSUPPORTED 0x0400 +#define HSA_CAP_SVMAPI_SUPPORTED 0x0800 -#define HSA_CAP_RESERVED 0xf80f8000 +#define HSA_CAP_RESERVED 0xf00f8000 struct kfd_node_properties { uint64_t hive_id; @@ -98,9 +99,10 @@ struct kfd_node_properties { #define HSA_MEM_HEAP_TYPE_GPU_LDS 4 #define HSA_MEM_HEAP_TYPE_GPU_SCRATCH 5 -#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001 -#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002 -#define HSA_MEM_FLAGS_RESERVED 0xfffc +#define HSA_MEM_FLAGS_HOT_PLUGGABLE0x0001 +#define HSA_MEM_FLAGS_NON_VOLATILE 0x0002 +#define HSA_MEM_FLAGS_COHERENTHOSTACCESS 0x0004 +#define HSA_MEM_FLAGS_RESERVED 0xfff8 struct kfd_mem_properties { struct list_headlist; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 35/44] drm/amdkfd: Call mutex_destroy
Destroy SVM-related mutexes correctly. Change-Id: I85da30b1b0dce72433e6d3b507cb0b55b83b433c Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3eea8f87724d..0fbc037b06e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -209,6 +209,8 @@ static void svm_range_free(struct svm_range *prange) svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); kvfree(prange->pages_addr); + mutex_destroy(>lock); + mutex_destroy(>migrate_mutex); kfree(prange); } @@ -2220,6 +,8 @@ void svm_range_list_fini(struct kfd_process *p) list_for_each_entry_safe(prange, next, >svms.list, list) svm_range_free(prange); + mutex_destroy(>svms.lock); + pr_debug("pasid 0x%x svms 0x%p done\n", p->pasid, >svms); } -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 36/44] drm/amdkfd: Fix spurious restore failures
Restore can appear to fail if the svms->evicted counter changes before the function can acquire the necessary locks. Re-read the counter after acquiring the lock to minimize the chances of having to reschedule the worker. Change-Id: I236b912bddf106583be264abde2f6bd1a5d5a083 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0fbc037b06e3..49aca4664411 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1429,6 +1429,8 @@ static void svm_range_restore_work(struct work_struct *work) svm_range_list_lock_and_flush_work(svms, mm); mutex_lock(>lock); + evicted_ranges = atomic_read(>evicted_ranges); + list_for_each_entry(prange, >list, list) { invalid = atomic_read(>invalid); if (!invalid) -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 31/44] drm/amdkfd: add svm range validate timestamp
With xnack on, add validate timestamp in order to handle GPU vm fault from multiple GPUs. If GPU retry fault need migrate the range to the best restore location, use range validate timestamp to record system timestamp after range is restored to update GPU page table. Because multiple pages of same range have multiple retry fault, define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING to the long time period that pending retry fault may still comes after page table update, to skip duplicate retry fault of same range. If difference between system timestamp and range last validate timestamp is bigger than AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING, that means the retry fault is from another GPU, then continue to handle retry fault recover. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 ++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 98c049dc3a63..f4b4fea06ac9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -34,6 +34,11 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 +/* Long enough to ensure no retry fault comes after svm range is restored and + * page table is updated. + */ +#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING 2000 + static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -246,6 +251,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(>deferred_list); INIT_LIST_HEAD(>child_list); atomic_set(>invalid, 0); + prange->validate_timestamp = ktime_to_us(ktime_get()); mutex_init(>migrate_mutex); mutex_init(>lock); svm_range_set_default_attributes(>preferred_loc, @@ -578,19 +584,25 @@ static int svm_range_validate_vram(struct svm_range *prange) static int svm_range_validate(struct mm_struct *mm, struct svm_range *prange) { + struct kfd_process *p; int r; pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] actual loc 0x%x\n", prange->svms, prange, prange->start, prange->last, prange->actual_loc); + p = container_of(prange->svms, struct kfd_process, svms); + if (!prange->actual_loc) r = svm_range_validate_ram(mm, prange); else r = svm_range_validate_vram(prange); - pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d invalid %d\n", prange->svms, -prange->start, prange->last, r, atomic_read(>invalid)); + if (!r) + prange->validate_timestamp = ktime_to_us(ktime_get()); + + pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d\n", prange->svms, +prange->start, prange->last, r); return r; } @@ -2086,6 +2098,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range_list *svms; struct svm_range *prange; struct kfd_process *p; + uint64_t timestamp; int32_t best_loc; int r = 0; @@ -2119,6 +2132,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, } mutex_lock(>migrate_mutex); + timestamp = ktime_to_us(ktime_get()) - prange->validate_timestamp; + /* skip duplicate vm fault on different pages of same range */ + if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { + pr_debug("svms 0x%p [0x%lx %lx] already restored\n", +svms, prange->start, prange->last); + goto out_unlock_range; + } best_loc = svm_range_best_restore_location(prange, adev); if (best_loc == -1) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index fed28e487878..fea9c63b5f95 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -87,6 +87,7 @@ struct svm_work_list_item { * @actual_loc: the actual location, 0 for CPU, or GPU id * @granularity:migration granularity, log2 num pages * @invalid:not 0 means cpu page table is invalidated + * @validate_timestamp: system timestamp when range is validated * @notifier: register mmu interval notifier * @work_item: deferred work item information * @deferred_list: list header used to add range to deferred list @@ -125,6 +126,7 @@ struct svm_range { uint32_tactual_loc; uint8_t granularity; atomic_tinvalid; + uint64_tvalidate_timestamp; struct mmu_interval_notifiernotifier; struct svm_work_list_item work_item; struct list_headdeferred_list; -- 2.31.0 ___ amd-gfx
[PATCH 39/44] drm/amdkfd: Point out several race conditions
There are several race conditions with XNACK enabled. For now just some FIXME comments with ideas how to fix it. Change-Id: If0abab6dcb8f4e95c9d8820f6c569263eda29a89 Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 5 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 21 - 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 5c8b32873086..101d1f71db84 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -539,6 +539,11 @@ svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, src = (uint64_t *)(scratch + npages); dst = scratch; + /* FIXME: Is it legal to hold on to this page array? We don't have +* proper references to the pages and we may not have an MMU notifier +* set up for the range at this point that could invalidate it (if +* it's a child range). +*/ prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr), GFP_KERNEL | __GFP_ZERO); if (!prange->pages_addr) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index fbcb1491e987..c48fe2f276b9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1727,7 +1727,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) pr_debug("update and map 0x%p prange 0x%p [0x%lx 0x%lx]\n", svms, prange, prange->start, prange->last); svm_range_update_notifier_and_interval_tree(mm, prange); - + /* FIXME: need to validate somewhere */ r = svm_range_map_to_gpus(prange, true); if (r) pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n", @@ -1744,6 +1744,7 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) prange, prange->start, prange->last); svm_range_add_to_svms(prange); svm_range_add_notifier_locked(mm, prange); + /* FIXME: need to validate somewhere */ r = svm_range_map_to_gpus(prange, true); if (r) pr_debug("failed %d map 0x%p [0x%lx 0x%lx]\n", @@ -2068,6 +2069,14 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } +/* FIXME: This function can race with MMU notifiers. MMU notifiers can + * invalidate the page addresses concurrently, so we may end up mapping + * invalid addresses here. We cannot hold the prange->lock (held in MMU + * notifier) while updating page tables because of lock dependencies, + * as SDMA page table updates need reservation locks. Only unmapping + * works without reservations. May need to hold the mmap_write_lock to + * prevent concurrent MMU notifiers. + */ int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) @@ -2592,6 +2601,16 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, continue; } + /* FIXME: With xnack on, this can race with MMU notifiers. +* They may invalidate page addresses before we map them. +* Then we end up mapping invalid addresses in the GPU page +* table. May need to find a way to still hold the mmap write +* for map_to_gpus but drop it for validate to allow +* concurrent evictions. This will lead to some retry logic +* and the need to protect the update list differently. +* Maybe factor migration and validation into a common helper +* function shared with the GPU page fault handler. +*/ r = svm_range_validate(mm, prange); if (r) { pr_debug("failed %d to validate svm range\n", r); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 30/44] drm/amdkfd: refine migration policy with xnack on
With xnack on, GPU vm fault handler decide the best restore location, then migrate range to the best restore location and update GPU mapping to recover the GPU vm fault. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 150 --- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 1 + 6 files changed, 157 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 8ce3ff56a0ce..cd89b38e3d9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -455,18 +455,19 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @mm: the process mm structure * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; - struct mm_struct *mm; int r = 0; if (prange->actual_loc == best_loc) { @@ -487,8 +488,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - mm = current->mm; - for (addr = start; addr < end;) { unsigned long next; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 95fd7b21791f..9949b55d3b6a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -37,7 +37,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 755c0517867d..2ccfdb218198 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -867,6 +867,9 @@ int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); int kfd_process_device_from_gpuidx(struct kfd_process *p, uint32_t gpu_idx, struct kfd_dev **gpu); +int kfd_process_gpuid_from_kgd(struct kfd_process *p, + struct amdgpu_device *adev, uint32_t *gpuid, + uint32_t *gpuidx); void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); int kfd_process_restore_queues(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c8479f6bd68c..48ea6f393353 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1684,6 +1684,22 @@ int kfd_process_device_from_gpuidx(struct kfd_process *p, return -EINVAL; } +int +kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, + uint32_t *gpuid, uint32_t *gpuidx) +{ + struct kgd_dev *kgd = (struct kgd_dev *)adev; + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + *gpuid = p->pdds[i]->dev->id; + *gpuidx = i; + return 0; + } + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 3a7b842b362c..98c049dc3a63 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -179,8 +179,11 @@ static void svm_range_free_dma_mappings(struct svm_range *prange) int r; p = container_of(prange->svms, struct kfd_process, svms); - bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, - MAX_GPU_INSTANCE); + if (p->xnack_enabled) +
[PATCH 12/44] drm/amdkfd: add xnack enabled flag to kfd_process
From: Alex Sierra This flag is useful at cpu invalidation page table decision. Between select queue eviction or page fault. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 4 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 36 2 files changed, 40 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7d70af26b5c7..d66430740e52 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -824,6 +824,8 @@ struct kfd_process { /* shared virtual memory registered by this process */ struct svm_range_list svms; bool svm_disabled; + + bool xnack_enabled; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ @@ -877,6 +879,8 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, struct kfd_process *p); +bool kfd_process_xnack_supported(struct kfd_process *p); + int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, struct vm_area_struct *vma); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 321895d7555a..c8479f6bd68c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1193,6 +1193,39 @@ void kfd_process_set_trap_handler(struct qcm_process_device *qpd, } } +bool kfd_process_xnack_supported(struct kfd_process *p) +{ + int i; + + /* On most GFXv9 GPUs, the retry mode in the SQ must match the +* boot time retry setting. Mixing processes with different +* XNACK/retry settings can hang the GPU. +* +* Different GPUs can have different noretry settings depending +* on HW bugs or limitations. We need to find at least one +* XNACK mode for this process that's compatible with all GPUs. +* Fortunately GPUs with retry enabled (noretry=0) can run code +* built for XNACK-off. On GFXv9 it may perform slower. +* +* Therefore applications built for XNACK-off can always be +* supported and will be our fallback if any GPU does not +* support retry. +*/ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_dev *dev = p->pdds[i]->dev; + + /* Only consider GFXv9 and higher GPUs. Older GPUs don't +* support the SVM APIs and don't need to be considered +* for the XNACK mode selection. +*/ + if (dev->device_info->asic_family >= CHIP_VEGA10 && + dev->noretry) + return false; + } + + return true; +} + /* * On return the kfd_process is fully operational and will be freed when the * mm is released @@ -1232,6 +1265,9 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err != 0) goto err_init_apertures; + /* Check XNACK support after PDDs are created in kfd_init_apertures */ + process->xnack_enabled = kfd_process_xnack_supported(process); + err = svm_range_list_init(process); if (err) goto err_init_svm_range_list; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 24/44] drm/amdkfd: add svm_bo reference for eviction fence
From: Alex Sierra [why] As part of the SVM functionality, the eviction mechanism used for SVM_BOs is different. This mechanism uses one eviction fence per prange, instead of one fence per kfd_process. [how] A svm_bo reference to amdgpu_amdkfd_fence to allow differentiate between SVM_BO or regular BO evictions. This also include modifications to set the reference at the fence creation call. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 4 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 -- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 14f68c028126..beb2ef070a0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence { struct mm_struct *mm; spinlock_t lock; char timeline_name[TASK_COMM_LEN]; + struct svm_range_bo *svm_bo; }; struct amdgpu_kfd_dev { @@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); + struct mm_struct *mm, + struct svm_range_bo *svm_bo); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 5af464933976..53559643c712 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -60,7 +60,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0); */ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm) + struct mm_struct *mm, + struct svm_range_bo *svm_bo) { struct amdgpu_amdkfd_fence *fence; @@ -73,7 +74,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, fence->mm = mm; get_task_comm(fence->timeline_name, current); spin_lock_init(>lock); - + fence->svm_bo = svm_bo; dma_fence_init(>base, _fence_ops, >lock, context, atomic_inc_return(_seq)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e93850f2f3b1..9af644f256e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -970,7 +970,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), - current->mm); + current->mm, + NULL); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -2188,7 +2189,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) */ new_fence = amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, - process_info->eviction_fence->mm); + process_info->eviction_fence->mm, + NULL); if (!new_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 13/44] drm/amdkfd: add ioctl to configure and query xnack retries
From: Alex Sierra Xnack retries are used for page fault recovery. Some AMD chip families support continuously retry while page table entries are invalid. The driver must handle the page fault interrupt and fill in a valid entry for the GPU to continue. This ioctl allows to enable/disable XNACK retries per KFD process. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 28 +++ include/uapi/linux/kfd_ioctl.h | 43 +++- 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 9511826ac8ae..63eee7ef3355 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1743,6 +1743,31 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(dev, >anon_fd); } +static int kfd_ioctl_set_xnack_mode(struct file *filep, + struct kfd_process *p, void *data) +{ + struct kfd_ioctl_set_xnack_mode_args *args = data; + int r = 0; + + mutex_lock(>mutex); + if (args->xnack_enabled >= 0) { + if (!list_empty(>pqm.queues)) { + pr_debug("Process has user queues running\n"); + mutex_unlock(>mutex); + return -EBUSY; + } + if (args->xnack_enabled && !kfd_process_xnack_supported(p)) + r = -EPERM; + else + p->xnack_enabled = args->xnack_enabled; + } else { + args->xnack_enabled = p->xnack_enabled; + } + mutex_unlock(>mutex); + + return r; +} + static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_svm_args *args = data; @@ -1869,6 +1894,9 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { kfd_ioctl_smi_events, 0), AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_XNACK_MODE, + kfd_ioctl_set_xnack_mode, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 247b57baa94f..3cb5b5dd9f77 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -597,6 +597,44 @@ struct kfd_ioctl_svm_args { struct kfd_ioctl_svm_attribute attrs[0]; }; +/** + * kfd_ioctl_set_xnack_mode_args - Arguments for set_xnack_mode + * + * @xnack_enabled: [in/out] Whether to enable XNACK mode for this process + * + * @xnack_enabled indicates whether recoverable page faults should be + * enabled for the current process. 0 means disabled, positive means + * enabled, negative means leave unchanged. If enabled, virtual address + * translations on GFXv9 and later AMD GPUs can return XNACK and retry + * the access until a valid PTE is available. This is used to implement + * device page faults. + * + * On output, @xnack_enabled returns the (new) current mode (0 or + * positive). Therefore, a negative input value can be used to query + * the current mode without changing it. + * + * The XNACK mode fundamentally changes the way SVM managed memory works + * in the driver, with subtle effects on application performance and + * functionality. + * + * Enabling XNACK mode requires shader programs to be compiled + * differently. Furthermore, not all GPUs support changing the mode + * per-process. Therefore changing the mode is only allowed while no + * user mode queues exist in the process. This ensure that no shader + * code is running that may be compiled for the wrong mode. And GPUs + * that cannot change to the requested mode will prevent the XNACK + * mode from occurring. All GPUs used by the process must be in the + * same XNACK mode. + * + * GFXv8 or older GPUs do not support 48 bit virtual addresses or SVM. + * Therefore those GPUs are not considered for the XNACK mode switch. + * + * Return: 0 on success, -errno on failure + */ +struct kfd_ioctl_set_xnack_mode_args { + __s32 xnack_enabled; +}; + #define AMDKFD_IOCTL_BASE 'K' #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr) #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type) @@ -699,7 +737,10 @@ struct kfd_ioctl_svm_args { #define AMDKFD_IOC_SVM AMDKFD_IOWR(0x20, struct kfd_ioctl_svm_args) +#define AMDKFD_IOC_SET_XNACK_MODE \ + AMDKFD_IOWR(0x21, struct kfd_ioctl_set_xnack_mode_args) + #define AMDKFD_COMMAND_START 0x01 -#define AMDKFD_COMMAND_END 0x21 +#define AMDKFD_COMMAND_END 0x22 #endif -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 15/44] drm/amdkfd: validate vram svm range from TTM
If svm range perfetch location is not zero, use TTM to alloc amdgpu_bo vram nodes to validate svm range, then map vram nodes to GPUs. Use offset to sub allocate from the same amdgpu_bo to handle overlap vram range while adding new range or unmapping range. svm_bo has ref count to trace the shared ranges. If all ranges of shared amdgpu_bo are migrated to ram, ref count becomes 0, then amdgpu_bo is released, all ranges svm_bo is set to NULL. To migrate range from ram back to vram, allocate the same amdgpu_bo with previous offset if the range has svm_bo. If prange migrate to VRAM, no CPU mapping exist, then process exit will not have unmap callback for this prange to free prange and svm bo. Free outstanding pranges from svms list before process is freed in svm_range_list_fini. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 338 +-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 36 +++ 2 files changed, 355 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 55828d6fbea9..9b1c5aa86f4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -44,7 +44,8 @@ static const struct mmu_interval_notifier_ops svm_range_mn_ops = { * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed * - * Remove the svm range from svms interval tree and link list + * Remove the svm_range from the svms and svm_bo lists and the svms + * interval tree. * * Context: The caller must hold svms->lock */ @@ -53,6 +54,12 @@ static void svm_range_unlink(struct svm_range *prange) pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + if (prange->svm_bo) { + spin_lock(>svm_bo->list_lock); + list_del(>svm_bo_list); + spin_unlock(>svm_bo->list_lock); + } + list_del(>list); if (prange->it_node.start != 0 && prange->it_node.last != 0) interval_tree_remove(>it_node, >svms->objects); @@ -193,6 +200,7 @@ static void svm_range_free(struct svm_range *prange) pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); kvfree(prange->pages_addr); kfree(prange); @@ -227,9 +235,11 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(>update_list); INIT_LIST_HEAD(>remove_list); INIT_LIST_HEAD(>insert_list); + INIT_LIST_HEAD(>svm_bo_list); INIT_LIST_HEAD(>deferred_list); INIT_LIST_HEAD(>child_list); atomic_set(>invalid, 0); + mutex_init(>lock); svm_range_set_default_attributes(>preferred_loc, >prefetch_loc, >granularity, >flags); @@ -277,14 +287,244 @@ svm_range_validate_ram(struct mm_struct *mm, struct svm_range *prange) return 0; } +static bool svm_bo_ref_unless_zero(struct svm_range_bo *svm_bo) +{ + if (!svm_bo || !kref_get_unless_zero(_bo->kref)) + return false; + + return true; +} + +static struct svm_range_bo *svm_range_bo_ref(struct svm_range_bo *svm_bo) +{ + if (svm_bo) + kref_get(_bo->kref); + + return svm_bo; +} + +static void svm_range_bo_release(struct kref *kref) +{ + struct svm_range_bo *svm_bo; + + svm_bo = container_of(kref, struct svm_range_bo, kref); + spin_lock(_bo->list_lock); + while (!list_empty(_bo->range_list)) { + struct svm_range *prange = + list_first_entry(_bo->range_list, + struct svm_range, svm_bo_list); + /* list_del_init tells a concurrent svm_range_vram_node_new when +* it's safe to reuse the svm_bo pointer and svm_bo_list head. +*/ + list_del_init(>svm_bo_list); + spin_unlock(_bo->list_lock); + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, +prange->start, prange->last); + mutex_lock(>lock); + prange->svm_bo = NULL; + mutex_unlock(>lock); + + spin_lock(_bo->list_lock); + } + spin_unlock(_bo->list_lock); + + amdgpu_bo_unref(_bo->bo); + kfree(svm_bo); +} + +static void svm_range_bo_unref(struct svm_range_bo *svm_bo) +{ + if (!svm_bo) + return; + + kref_put(_bo->kref, svm_range_bo_release); +} + +static struct svm_range_bo *svm_range_bo_new(void) +{ + struct svm_range_bo *svm_bo; + + svm_bo = kzalloc(sizeof(*svm_bo),
[PATCH 25/44] drm/amdgpu: add param bit flag to create SVM BOs
From: Alex Sierra Add CREATE_SVM_BO define bit for SVM BOs. Another define flag was moved to concentrate these KFD type flags in one include file. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 4 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 9af644f256e9..bc38de8c5c38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -33,9 +33,6 @@ #include #include "amdgpu_xgmi.h" -/* BO flag to indicate a KFD userptr BO */ -#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) - /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ @@ -217,7 +214,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) u32 domain = bo->preferred_domains; bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { domain = AMDGPU_GEM_DOMAIN_CPU; sg = false; } @@ -1278,7 +1275,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; + bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; (*mem)->va = va; (*mem)->domain = domain; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 25411b2c4dd9..b07903d317e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -37,6 +37,10 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_CREATE_USERPTR_BO(1ULL << 63) +#define AMDGPU_AMDKFD_CREATE_SVM_BO(1ULL << 62) + #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) struct amdgpu_bo_param { -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 27/44] drm/amdgpu: svm bo enable_signal call condition
From: Alex Sierra [why] To support svm bo eviction mechanism. [how] If the BO crated has AMDGPU_AMDKFD_CREATE_SVM_BO flag set, enable_signal callback will be called inside amdgpu_evict_flags. This also causes gutting of the BO by removing all placements, so that TTM won't actually do an eviction. Instead it will discard the memory held by the BO. This is needed for HMM migration to user mode system memory pages. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 14 ++ 1 file changed, 14 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index aca5a29f6d2a..2d80eb3fa571 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -111,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); + if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + struct dma_fence *fence; + struct dma_resv *resv = >base._resv; + + rcu_read_lock(); + fence = rcu_dereference(resv->fence_excl); + if (fence && !fence->ops->signaled) + dma_fence_enable_sw_signaling(fence); + + placement->num_placement = 0; + placement->num_busy_placement = 0; + rcu_read_unlock(); + return; + } switch (bo->mem.mem_type) { case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 28/44] drm/amdgpu: add svm_bo eviction to enable_signal cb
From: Alex Sierra Add to amdgpu_amdkfd_fence.enable_signal callback, support for svm_bo fence eviction. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 11 --- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 53559643c712..1fe233cddb20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -28,6 +28,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "kfd_svm.h" static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); @@ -123,9 +124,13 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) - return true; - + if (!fence->svm_bo) { + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) + return true; + } else { + if (!svm_range_schedule_evict_svm_bo(fence)) + return true; + } return false; } -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 20/44] drm/amdkfd: invalidate tables on page retry fault
GPU page tables are invalidated by unmapping prange directly at the mmu notifier, when page fault retry is enabled through amdgpu_noretry global parameter. The restore page table is performed at the page fault handler. If xnack is on, we update GPU mappings after migration to avoid unnecessary GPUVM faults. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 71 +++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 +- 3 files changed, 64 insertions(+), 17 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 1243cf02f872..8ce3ff56a0ce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -789,7 +789,11 @@ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) list_for_each_entry_safe(prange, next, _list, update_list) { enum svm_work_list_ops op; - op = SVM_OP_UPDATE_RANGE_NOTIFIER; + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && prange == pmigrate) + op = SVM_OP_UPDATE_RANGE_NOTIFIER_AND_MAP; + else + op = SVM_OP_UPDATE_RANGE_NOTIFIER; svm_range_add_list_work(>svms, prange, mm, op); list_del_init(>update_list); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 69241ed4a377..fb8ca844d9bd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1006,6 +1006,13 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, *pmigrate = new; + /* xnack on, update mapping on GPUs with ACCESS_IN_PLACE */ + if (p->xnack_enabled && (*pmigrate)->work_item.op == SVM_OP_ADD_RANGE) { + (*pmigrate)->work_item.op = SVM_OP_ADD_RANGE_AND_MAP; + pr_debug("change prange 0x%p [0x%lx 0x%lx] op %d\n", +*pmigrate, (*pmigrate)->start, (*pmigrate)->last, +SVM_OP_ADD_RANGE_AND_MAP); + } return 0; } @@ -1407,25 +1414,38 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm, unsigned long start, unsigned long last) { struct svm_range_list *svms = prange->svms; - int invalid, evicted_ranges; + struct kfd_process *p; int r = 0; - invalid = atomic_inc_return(>invalid); - evicted_ranges = atomic_inc_return(>evicted_ranges); - if (evicted_ranges != 1) - return r; + p = container_of(svms, struct kfd_process, svms); - pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", -prange->svms, prange->start, prange->last); + pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n", +svms, prange->start, prange->last, start, last); - /* First eviction, stop the queues */ - r = kgd2kfd_quiesce_mm(mm); - if (r) - pr_debug("failed to quiesce KFD\n"); + if (!p->xnack_enabled) { + int invalid, evicted_ranges; + + invalid = atomic_inc_return(>invalid); + evicted_ranges = atomic_inc_return(>evicted_ranges); + if (evicted_ranges != 1) + return r; - pr_debug("schedule to restore svm %p ranges\n", svms); - schedule_delayed_work(>restore_work, - msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + pr_debug("evicting svms 0x%p range [0x%lx 0x%lx]\n", +prange->svms, prange->start, prange->last); + + /* First eviction, stop the queues */ + r = kgd2kfd_quiesce_mm(mm); + if (r) + pr_debug("failed to quiesce KFD\n"); + + pr_debug("schedule to restore svm %p ranges\n", svms); + schedule_delayed_work(>restore_work, + msecs_to_jiffies(AMDGPU_SVM_RANGE_RESTORE_DELAY_MS)); + } else { + pr_debug("invalidate unmap svms 0x%p [0x%lx 0x%lx] from GPUs\n", +prange->svms, start, last); + svm_range_unmap_from_gpus(prange, start, last); + } return r; } @@ -1621,6 +1641,7 @@ static void svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) { struct mm_struct *mm = prange->work_item.mm; + int r; switch (prange->work_item.op) { case SVM_OP_NULL: @@ -1639,12 +1660,32 @@ svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) svms, prange, prange->start, prange->last); svm_range_update_notifier_and_interval_tree(mm, prange); break; + case
[PATCH 18/44] drm/amdkfd: HMM migrate ram to vram
Register svm range with same address and size but perferred_location is changed from CPU to GPU or from GPU to CPU, trigger migration the svm range from ram to vram or from vram to ram. If svm range prefetch location is GPU with flags KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first, then migrate it from ram to vram. After migrating to vram is done, CPU access will have cpu page fault, page fault handler migrate it back to ram and resume cpu access. Migration steps: 1. migrate_vma_pages get svm range ram pages, notify the interval is invalidated and unmap from CPU page table, HMM interval notifier callback evict process queues 2. Allocate new pages in vram using TTM 3. Use svm copy memory to sdma copy data from ram to vram 4. migrate_vma_pages copy ram pages structure to vram pages structure 5. migrate_vma_finalize put ram pages to free ram pages and memory 6. Restore work wait for migration is finished, then update GPUs page table mapping to new vram pages, resume process queues If migrate_vma_setup failed to collect all ram pages of range, retry 3 times until success to start migration. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 278 +++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 187 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 + 4 files changed, 460 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2a6824ddae88..668c360be0bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -204,6 +204,284 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) return r; } +static uint64_t +svm_migrate_node_physical_addr(struct amdgpu_device *adev, + struct drm_mm_node **mm_node, uint64_t *offset) +{ + struct drm_mm_node *node = *mm_node; + uint64_t pos = *offset; + + if (node->start == AMDGPU_BO_INVALID_OFFSET) { + pr_debug("drm node is not validated\n"); + return 0; + } + + pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start, +node->size); + + if (pos >= node->size) { + do { + pos -= node->size; + node++; + } while (pos >= node->size); + + *mm_node = node; + *offset = pos; + } + + return (node->start + pos) << PAGE_SHIFT; +} + +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) +{ + return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT; +} + +static void +svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) +{ + struct page *page; + + page = pfn_to_page(pfn); + page->zone_device_data = prange; + get_page(page); + lock_page(page); +} + +static void +svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr)); + unlock_page(page); + put_page(page); +} + + +static int +svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, +struct migrate_vma *migrate, struct dma_fence **mfence, +dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + struct drm_mm_node *node; + dma_addr_t *src; + uint64_t *dst; + uint64_t vram_addr; + uint64_t offset; + uint64_t i, j; + int r = -ENOMEM; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, +prange->last); + + src = scratch; + dst = (uint64_t *)(scratch + npages); + + r = svm_range_vram_node_new(adev, prange, false); + if (r) { + pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); + goto out; + } + + node = prange->ttm_res->mm_node; + offset = prange->offset; + vram_addr = svm_migrate_node_physical_addr(adev, , ); + if (!vram_addr) { + WARN_ONCE(1, "vram node address is 0\n"); + r = -ENOMEM; + goto out; + } + + for (i = j = 0; i < npages; i++) { + struct page *spage; + + spage = migrate_pfn_to_page(migrate->src[i]); + src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, DMA_TO_DEVICE); + r = dma_mapping_error(dev, src[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_free_vram_pages; + } + + pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", +src[i] >>
[PATCH 03/44] drm/amdkfd: add svm ioctl API
From: Philip Yang Add svm (shared virtual memory) ioctl data structure and API definition. The svm ioctl API is designed to be extensible in the future. All operations are provided by a single IOCTL to preserve ioctl number space. The arguments structure ends with a variable size array of attributes that can be used to set or get one or multiple attributes. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 4 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + include/uapi/linux/kfd_ioctl.h | 130 ++- 5 files changed, 147 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 43de260b2230..dbc824cc6b32 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1742,6 +1742,16 @@ static int kfd_ioctl_smi_events(struct file *filep, return kfd_smi_event_open(dev, >anon_fd); } +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + int r = 0; + + if (p->svm_disabled) + return -EPERM; + + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -1840,6 +1850,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_SMI_EVENTS, kfd_ioctl_smi_events, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_SVM, kfd_ioctl_svm, 0), }; #define AMDKFD_CORE_IOCTL_COUNTARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c index a2c9063076cc..52da1a3b2c7a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c @@ -405,6 +405,10 @@ int kfd_init_apertures(struct kfd_process *process) case CHIP_POLARIS12: case CHIP_VEGAM: kfd_init_apertures_vi(pdd, id); + /* VI GPUs cannot support SVM with only +* 40 bits of virtual address space. +*/ + process->svm_disabled |= true; break; case CHIP_VEGA10: case CHIP_VEGA12: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 99b4624ef4c7..18fc2ccd1a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -809,6 +809,8 @@ struct kfd_process { struct kobject *kobj; struct kobject *kobj_queues; struct attribute attr_pasid; + + bool svm_disabled; }; #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 028ebb0deddd..89e7c125d334 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1208,6 +1208,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->mm = thread->mm; process->lead_thread = thread->group_leader; process->n_pdds = 0; + process->svm_disabled = false; INIT_DELAYED_WORK(>eviction_work, evict_process_worker); INIT_DELAYED_WORK(>restore_work, restore_process_worker); process->last_restore_timestamp = get_jiffies_64(); diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index bf5e7d7846dd..247b57baa94f 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -30,9 +30,10 @@ * - 1.1 - initial version * - 1.3 - Add SMI events support * - 1.4 - Indicate new SRAM EDC bit in device properties + * - 1.5 - Add SVM API */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 4 +#define KFD_IOCTL_MINOR_VERSION 5 struct kfd_ioctl_get_version_args { __u32 major_version;/* from KFD */ @@ -473,6 +474,129 @@ enum kfd_mmio_remap { KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4, }; +/* Guarantee host access to memory */ +#define KFD_IOCTL_SVM_FLAG_HOST_ACCESS 0x0001 +/* Fine grained coherency between all devices with access */ +#define KFD_IOCTL_SVM_FLAG_COHERENT0x0002 +/* Use any GPU in same hive as preferred device */ +#define KFD_IOCTL_SVM_FLAG_HIVE_LOCAL 0x0004 +/* GPUs only read, allows replication */ +#define KFD_IOCTL_SVM_FLAG_GPU_RO 0x0008 +/* Allow execution on GPU */ +#define KFD_IOCTL_SVM_FLAG_GPU_EXEC0x0010 +/* GPUs mostly read, may allow similar optimizations
[PATCH 19/44] drm/amdkfd: HMM migrate vram to ram
If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start migrate memory from vram to ram in steps: 1. migrate_vma_pages get vram pages, and notify HMM to invalidate the pages, HMM interval notifier callback evict process queues 2. Allocate system memory pages 3. Use svm copy memory to migrate data from vram to ram 4. migrate_vma_pages copy pages structure from vram pages to ram pages 5. Return VM_FAULT_SIGBUS if migration failed, to notify application 6. migrate_vma_finalize put vram pages, page_free callback free vram pages and vram nodes 7. Restore work wait for migration is finished, then update GPU page table mapping to system memory, and resume process queues Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 310 ++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 158 +++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 12 + 4 files changed, 473 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 668c360be0bb..1243cf02f872 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -259,6 +259,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) put_page(page); } +static unsigned long +svm_migrate_addr(struct amdgpu_device *adev, struct page *page) +{ + unsigned long addr; + + addr = page_to_pfn(page) << PAGE_SHIFT; + return (addr - adev->kfd.dev->pgmap.range.start); +} + +static struct page * +svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct page *page; + + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (page) + lock_page(page); + + return page; +} + +void svm_migrate_put_sys_page(unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(addr >> PAGE_SHIFT); + unlock_page(page); + put_page(page); +} static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, @@ -484,13 +513,222 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) static void svm_migrate_page_free(struct page *page) { + /* Keep this function to avoid warning */ +} + +static int +svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + uint64_t *src; + dma_addr_t *dst; + struct page *dpage; + uint64_t i = 0, j; + uint64_t addr; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, +prange->last); + + addr = prange->start << PAGE_SHIFT; + + src = (uint64_t *)(scratch + npages); + dst = scratch; + + prange->pages_addr = kvmalloc_array(npages, sizeof(*prange->pages_addr), + GFP_KERNEL | __GFP_ZERO); + if (!prange->pages_addr) { + r = -ENOMEM; + goto out_oom; + } + + for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + struct page *spage; + + spage = migrate_pfn_to_page(migrate->src[i]); + if (!spage) { + pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", +prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + src[i] = svm_migrate_addr(adev, spage); + if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, +src + i - j, j, +FROM_VRAM_TO_RAM, +mfence); + if (r) + goto out_oom; + j = 0; + } + + dpage = svm_migrate_get_sys_page(migrate->vma, addr); + if (!dpage) { + pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n", +prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + r = dma_mapping_error(dev, dst[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_oom; + } + + pr_debug("dma mapping dst to 0x%llx, page_to_pfn
[PATCH 22/44] drm/amdkfd: page table restore through svm API
Page table restore implementation in SVM API. This is called from the fault handler at amdgpu_vm. To update page tables through the page fault retry IH. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 69 drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 + 2 files changed, 71 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index fb8ca844d9bd..c791d91cb45d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1946,6 +1946,75 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, return NULL; } +int +svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint64_t addr) +{ + int r = 0; + struct mm_struct *mm = NULL; + struct svm_range *prange; + struct svm_range_list *svms; + struct kfd_process *p; + + p = kfd_lookup_process_by_pasid(pasid); + if (!p) { + pr_debug("kfd process not founded pasid 0x%x\n", pasid); + return -ESRCH; + } + svms = >svms; + + pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, addr); + + mm = get_task_mm(p->lead_thread); + if (!mm) { + pr_debug("svms 0x%p failed to get mm\n", svms); + r = -ESRCH; + goto out; + } + + svm_range_list_lock_and_flush_work(svms, mm); + mutex_lock(>lock); + prange = svm_range_from_addr(svms, addr, NULL); + + mmap_write_downgrade(mm); + + if (!prange) { + pr_debug("failed to find prange svms 0x%p address [0x%llx]\n", +svms, addr); + r = -EFAULT; + goto out_unlock_svms; + } + + mutex_lock(>migrate_mutex); + + r = svm_range_validate(mm, prange); + if (r) { + pr_debug("failed %d to validate svms 0x%p [0x%lx 0x%lx]\n", r, +svms, prange->start, prange->last); + + goto out_unlock_range; + } + + pr_debug("restoring svms 0x%p [0x%lx %lx] mapping\n", +svms, prange->start, prange->last); + + r = svm_range_map_to_gpus(prange, true); + if (r) + pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r, +svms, prange->start, prange->last); + +out_unlock_range: + mutex_unlock(>migrate_mutex); +out_unlock_svms: + mutex_unlock(>lock); + mmap_read_unlock(mm); + mmput(mm); +out: + kfd_unref_process(p); + + return r; +} + void svm_range_list_fini(struct kfd_process *p) { struct svm_range *prange; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 3f945a601546..3aa6f6b97481 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -159,6 +159,8 @@ int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, struct svm_range *prange, struct svm_range **pmigrate, struct list_head *deferred_update_list); +int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr); void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 21/44] drm/amdgpu: enable 48-bit IH timestamp counter
From: Alex Sierra By default this timestamp is 32 bit counter. It gets overflowed in around 10 minutes. Change-Id: I7c46604b0272dcfd1ce24351437c16fe53dca0ab Signed-off-by: Alex Sierra Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ca8efa5c6978..2f17c8a57015 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); /* enable_intr field is only valid in ring0 */ if (ih == >irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 23/44] drm/amdkfd: SVM API call to restore page tables
From: Alex Sierra Use SVM API to restore page tables when retry fault and compute context are enabled. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 0e9ae5f91c7c..a61df234f012 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -37,6 +37,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "kfd_svm.h" /** * DOC: GPUVM @@ -3302,18 +3303,29 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, uint64_t value, flags; struct amdgpu_vm *vm; long r; + bool is_compute_context = false; spin_lock(>vm_manager.pasid_lock); vm = idr_find(>vm_manager.pasid_idr, pasid); - if (vm) + if (vm) { root = amdgpu_bo_ref(vm->root.base.bo); - else + is_compute_context = vm->is_compute_context; + } else { root = NULL; + } spin_unlock(>vm_manager.pasid_lock); if (!root) return false; + addr /= AMDGPU_GPU_PAGE_SIZE; + + if (!amdgpu_noretry && is_compute_context && + !svm_range_restore_pages(adev, pasid, addr)) { + amdgpu_bo_unref(); + return true; + } + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; @@ -3327,18 +3339,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!vm) goto error_unlock; - addr /= AMDGPU_GPU_PAGE_SIZE; flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (vm->is_compute_context) { + if (is_compute_context) { /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | AMDGPU_PTE_TF; value = 0; - } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 06/44] drm/amdgpu: add common HMM get pages function
From: Philip Yang Move the HMM get pages function from amdgpu_ttm and to amdgpu_mn. This common function will be used by new svm APIs. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c | 83 + drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 76 +++--- 3 files changed, 100 insertions(+), 66 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 828b5167ff12..997da4237a10 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -155,3 +155,86 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mmu_interval_notifier_remove(>notifier); bo->notifier.mm = NULL; } + +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked) +{ + struct hmm_range *hmm_range; + unsigned long timeout; + unsigned long i; + unsigned long *pfns; + int r = 0; + + hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); + if (unlikely(!hmm_range)) + return -ENOMEM; + + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_range; + } + + hmm_range->notifier = notifier; + hmm_range->default_flags = HMM_PFN_REQ_FAULT; + if (!readonly) + hmm_range->default_flags |= HMM_PFN_REQ_WRITE; + hmm_range->hmm_pfns = pfns; + hmm_range->start = start; + hmm_range->end = start + npages * PAGE_SIZE; + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); + +retry: + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + + if (likely(!mmap_locked)) + mmap_read_lock(mm); + + r = hmm_range_fault(hmm_range); + + if (likely(!mmap_locked)) + mmap_read_unlock(mm); + if (unlikely(r)) { + /* +* FIXME: This timeout should encompass the retry from +* mmu_interval_read_retry() as well. +*/ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + /* +* Due to default_flags, all pages are HMM_PFN_VALID or +* hmm_range_fault() fails. FIXME: The pages cannot be touched outside +* the notifier_lock, and mmu_interval_read_retry() must be done first. +*/ + for (i = 0; pages && i < npages; i++) + pages[i] = hmm_pfn_to_page(pfns[i]); + + *phmm_range = hmm_range; + + return 0; + +out_free_pfns: + kvfree(pfns); +out_free_range: + kfree(hmm_range); + + return r; +} + +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +{ + int r; + + r = mmu_interval_read_retry(hmm_range->notifier, + hmm_range->notifier_seq); + kvfree(hmm_range->hmm_pfns); + kfree(hmm_range); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index a292238f75eb..7f7d37a457c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,6 +30,13 @@ #include #include +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked); +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); + #if defined(CONFIG_HMM_MIRROR) int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 2bafbd78ba4b..aca5a29f6d2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -32,7 +32,6 @@ #include #include -#include #include #include #include @@ -692,10 +691,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; - struct hmm_range *range; - unsigned long timeout; struct mm_struct *mm; - unsigned long i; + bool readonly; int r = 0; mm = bo->notifier.mm; @@ -711,76 +708,26 @@ int
[PATCH 29/44] drm/amdgpu: reserve fence slot to update page table
From: Philip Yang Forgot to reserve a fence slot to use sdma to update page table, cause below kernel BUG backtrace to handle vm retry fault while application is exiting. [ 133.048143] kernel BUG at /home/yangp/git/compute_staging/kernel/drivers/dma-buf/dma-resv.c:281! [ 133.048487] Workqueue: events amdgpu_irq_handle_ih1 [amdgpu] [ 133.048506] RIP: 0010:dma_resv_add_shared_fence+0x204/0x280 [ 133.048672] amdgpu_vm_sdma_commit+0x134/0x220 [amdgpu] [ 133.048788] amdgpu_vm_bo_update_range+0x220/0x250 [amdgpu] [ 133.048905] amdgpu_vm_handle_fault+0x202/0x370 [amdgpu] [ 133.049031] gmc_v9_0_process_interrupt+0x1ab/0x310 [amdgpu] [ 133.049165] ? kgd2kfd_interrupt+0x9a/0x180 [amdgpu] [ 133.049289] ? amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049408] amdgpu_irq_dispatch+0xb6/0x240 [amdgpu] [ 133.049534] amdgpu_ih_process+0x9b/0x1c0 [amdgpu] [ 133.049657] amdgpu_irq_handle_ih1+0x21/0x60 [amdgpu] [ 133.049669] process_one_work+0x29f/0x640 [ 133.049678] worker_thread+0x39/0x3f0 [ 133.049685] ? process_one_work+0x640/0x640 Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a61df234f012..3e32f76cd7bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -3302,7 +3302,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; - long r; + int r; bool is_compute_context = false; spin_lock(>vm_manager.pasid_lock); @@ -3360,6 +3360,12 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, value = 0; } + r = dma_resv_reserve_shared(root->tbo.base.resv, 1); + if (r) { + pr_debug("failed %d to reserve fence slot\n", r); + goto error_unlock; + } + r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr, addr, flags, value, NULL, NULL, NULL); @@ -3371,7 +3377,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, error_unlock: amdgpu_bo_unreserve(root); if (r < 0) - DRM_ERROR("Can't handle page fault (%ld)\n", r); + DRM_ERROR("Can't handle page fault (%d)\n", r); error_unref: amdgpu_bo_unref(); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 26/44] drm/amdkfd: add svm_bo eviction mechanism support
svm_bo eviction mechanism is different from regular BOs. Every SVM_BO created contains one eviction fence and one worker item for eviction process. SVM_BOs can be attached to one or more pranges. For SVM_BO eviction mechanism, TTM will start to call enable_signal callback for every SVM_BO until VRAM space is available. Here, all the ttm_evict calls are synchronous, this guarantees that each eviction has completed and the fence has signaled before it returns. Signed-off-by: Alex Sierra Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 186 +-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 13 +- 2 files changed, 153 insertions(+), 46 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c791d91cb45d..3a7b842b362c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -34,6 +34,7 @@ #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 +static void svm_range_evict_svm_bo_worker(struct work_struct *work); static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, @@ -332,7 +333,15 @@ static void svm_range_bo_release(struct kref *kref) spin_lock(_bo->list_lock); } spin_unlock(_bo->list_lock); - + if (!dma_fence_is_signaled(_bo->eviction_fence->base)) { + /* We're not in the eviction worker. +* Signal the fence and synchronize with any +* pending eviction work. +*/ + dma_fence_signal(_bo->eviction_fence->base); + cancel_work_sync(_bo->eviction_work); + } + dma_fence_put(_bo->eviction_fence->base); amdgpu_bo_unref(_bo->bo); kfree(svm_bo); } @@ -345,6 +354,61 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(_bo->kref, svm_range_bo_release); } +static bool svm_range_validate_svm_bo(struct svm_range *prange) +{ + mutex_lock(>lock); + if (!prange->svm_bo) { + mutex_unlock(>lock); + return false; + } + if (prange->ttm_res) { + /* We still have a reference, all is well */ + mutex_unlock(>lock); + return true; + } + if (svm_bo_ref_unless_zero(prange->svm_bo)) { + if (READ_ONCE(prange->svm_bo->evicting)) { + struct dma_fence *f; + struct svm_range_bo *svm_bo; + /* The BO is getting evicted, +* we need to get a new one +*/ + mutex_unlock(>lock); + svm_bo = prange->svm_bo; + f = dma_fence_get(_bo->eviction_fence->base); + svm_range_bo_unref(prange->svm_bo); + /* wait for the fence to avoid long spin-loop +* at list_empty_careful +*/ + dma_fence_wait(f, false); + dma_fence_put(f); + } else { + /* The BO was still around and we got +* a new reference to it +*/ + mutex_unlock(>lock); + pr_debug("reuse old bo svms 0x%p [0x%lx 0x%lx]\n", +prange->svms, prange->start, prange->last); + + prange->ttm_res = >svm_bo->bo->tbo.mem; + return true; + } + + } else { + mutex_unlock(>lock); + } + + /* We need a new svm_bo. Spin-loop to wait for concurrent +* svm_range_bo_release to finish removing this range from +* its range list. After this, it is safe to reuse the +* svm_bo pointer and svm_bo_list head. +*/ + while (!list_empty_careful(>svm_bo_list)) + ; + + return false; +} + static struct svm_range_bo *svm_range_bo_new(void) { struct svm_range_bo *svm_bo; @@ -364,72 +428,56 @@ int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear) { - struct amdkfd_process_info *process_info; struct amdgpu_bo_param bp; struct svm_range_bo *svm_bo; struct amdgpu_bo_user *ubo; struct amdgpu_bo *bo; struct kfd_process *p; + struct mm_struct *mm; int r; - pr_debug("[0x%lx 0x%lx]\n", prange->start, prange->last); - mutex_lock(>lock); - if (prange->svm_bo) { - if (prange->ttm_res) { - /* We still have a reference, all is well */ - mutex_unlock(>lock); - return 0; - } - if (svm_bo_ref_unless_zero(prange->svm_bo)) { -
[PATCH 16/44] drm/amdkfd: support xgmi same hive mapping
From: Philip Yang amdgpu_gmc_get_vm_pte use bo_va->is_xgmi same hive information to set pte flags to update GPU mapping. Add local structure variable bo_va, and update bo_va.is_xgmi, pass it to mapping->bo_va while mapping to GPU. Assuming xgmi pstate is hi after boot. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9b1c5aa86f4a..de5777330d23 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -26,6 +26,8 @@ #include "amdgpu_object.h" #include "amdgpu_vm.h" #include "amdgpu_mn.h" +#include "amdgpu.h" +#include "amdgpu_xgmi.h" #include "kfd_priv.h" #include "kfd_svm.h" @@ -1026,10 +1028,12 @@ static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) static int svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct svm_range *prange, dma_addr_t *pages_addr, -bool reserve_vm, struct dma_fence **fence) +bool reserve_vm, struct amdgpu_device *bo_adev, +struct dma_fence **fence) { struct ttm_validate_buffer tv[2]; struct ww_acquire_ctx ticket; + struct amdgpu_bo_va bo_va; struct list_head list; uint64_t pte_flags; int r = 0; @@ -1062,13 +1066,18 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } + if (prange->svm_bo && prange->ttm_res) { + bo_va.is_xgmi = amdgpu_xgmi_same_hive(adev, bo_adev); + prange->mapping.bo_va = _va; + } + prange->mapping.start = prange->start; prange->mapping.last = prange->last; prange->mapping.offset = prange->offset; pte_flags = svm_range_get_pte_flags(adev, prange); prange->mapping.flags = pte_flags; - r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false, NULL, + r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false, NULL, prange->mapping.start, prange->mapping.last, pte_flags, prange->mapping.offset, @@ -1092,6 +1101,7 @@ svm_range_map_to_gpu(struct amdgpu_device *adev, struct amdgpu_vm *vm, *fence = dma_fence_get(vm->last_update); unreserve_out: + prange->mapping.bo_va = NULL; if (reserve_vm) ttm_eu_backoff_reservation(, ); out: @@ -1102,6 +1112,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm) { DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; struct amdgpu_device *adev; struct kfd_process *p; struct kfd_dev *dev; @@ -1109,6 +1120,11 @@ static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm) uint32_t gpuidx; int r = 0; + if (prange->svm_bo && prange->ttm_res) + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + else + bo_adev = NULL; + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); p = container_of(prange->svms, struct kfd_process, svms); @@ -1125,6 +1141,12 @@ static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm) return -EINVAL; adev = (struct amdgpu_device *)dev->kgd; + if (bo_adev && adev != bo_adev && + !amdgpu_xgmi_same_hive(adev, bo_adev)) { + pr_debug("cannot map to device idx %d\n", gpuidx); + continue; + } + r = svm_range_dma_map(adev->dev, >dma_addr[gpuidx], prange->pages_addr, prange->npages); if (r) @@ -1132,7 +1154,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange, bool reserve_vm) r = svm_range_map_to_gpu(adev, pdd->vm, prange, prange->dma_addr[gpuidx], reserve_vm, -); +bo_adev, ); if (r) break; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 00/44] Add HMM-based SVM memory manager to KFD v2
Since the last patch series I sent on Jan 6 a lot has changed. Patches 1-33 are the cleaned up, rebased on amd-staging-drm-next 5.11 version from about a week ago. The remaining 11 patches are current work-in-progress with further cleanup and fixes. MMU notifiers and CPU page faults now can split ranges and update our range data structures without taking heavy locks by doing some of the critical work in a deferred work handler. This includes updating MMU notifiers and the SVM range interval tree. In the mean time, new ranges can live as children of their parent ranges until the deferred work handler consolidates them in the main interval tree. We also added proper DMA mapping of system memory pages. Current work in progress is cleaning up all the locking, simplifying our code and data structures and resolving a few known bugs. This series and the corresponding ROCm Thunk and KFDTest changes are also available on gitub: https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/tree/fxkamd/hmm-wip https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip An updated Thunk Alex Sierra (10): drm/amdgpu: replace per_device_list by array drm/amdkfd: helper to convert gpu id and idx drm/amdkfd: add xnack enabled flag to kfd_process drm/amdkfd: add ioctl to configure and query xnack retries drm/amdgpu: enable 48-bit IH timestamp counter drm/amdkfd: SVM API call to restore page tables drm/amdkfd: add svm_bo reference for eviction fence drm/amdgpu: add param bit flag to create SVM BOs drm/amdgpu: svm bo enable_signal call condition drm/amdgpu: add svm_bo eviction to enable_signal cb Felix Kuehling (22): drm/amdkfd: map svm range to GPUs drm/amdkfd: svm range eviction and restore drm/amdkfd: validate vram svm range from TTM drm/amdkfd: HMM migrate ram to vram drm/amdkfd: HMM migrate vram to ram drm/amdkfd: invalidate tables on page retry fault drm/amdkfd: page table restore through svm API drm/amdkfd: add svm_bo eviction mechanism support drm/amdkfd: refine migration policy with xnack on drm/amdkfd: add svm range validate timestamp drm/amdkfd: multiple gpu migrate vram to vram drm/amdkfd: Fix dma unmapping drm/amdkfd: Call mutex_destroy drm/amdkfd: Fix spurious restore failures drm/amdkfd: Fix svm_bo_list locking in eviction worker drm/amdkfd: Simplify split_by_granularity drm/amdkfd: Point out several race conditions drm/amdkfd: Return pdd from kfd_process_device_from_gduid drm/amdkfd: Remove broken deferred mapping drm/amdkfd: Allow invalid pages in migration.src drm/amdkfd: Correct locking during migration and mapping drm/amdkfd: Nested locking and invalidation of child ranges Philip Yang (12): drm/amdkfd: add svm ioctl API drm/amdkfd: register svm range drm/amdkfd: add svm ioctl GET_ATTR op drm/amdgpu: add common HMM get pages function drm/amdkfd: validate svm range system memory drm/amdkfd: deregister svm range drm/amdgpu: export vm update mapping interface drm/amdkfd: register HMM device private zone drm/amdkfd: support xgmi same hive mapping drm/amdkfd: copy memory through gart table drm/amdgpu: reserve fence slot to update page table drm/amdkfd: Add SVM API support capability bits drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|4 + drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|4 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c | 16 +- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c| 83 + drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h|7 + drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|4 + drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 90 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 48 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 11 + drivers/gpu/drm/amd/amdgpu/vega10_ih.c|1 + drivers/gpu/drm/amd/amdkfd/Kconfig|1 + drivers/gpu/drm/amd/amdkfd/Makefile |4 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 173 +- drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c |4 + drivers/gpu/drm/amd/amdkfd/kfd_iommu.c|8 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 922 ++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 59 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 54 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 191 +- .../amd/amdkfd/kfd_process_queue_manager.c|6 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2865 + drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 175 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c |6 + drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 10 +- include/uapi/linux/kfd_ioctl.h| 171 +- 26 files changed, 4681 insertions(+), 249 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c create mode 100644
[PATCH 05/44] drm/amdkfd: add svm ioctl GET_ATTR op
From: Philip Yang Get the intersection of attributes over all memory in the given range Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 164 +++ 1 file changed, 164 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 22f942bb2b0c..e57103a9025e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -769,6 +769,167 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, return r; } +static int +svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, + uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) +{ + DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); + DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool get_preferred_loc = false; + bool get_prefetch_loc = false; + bool get_granularity = false; + bool get_accessible = false; + bool get_flags = false; + uint64_t last = start + size - 1UL; + struct mm_struct *mm = current->mm; + uint8_t granularity = 0xff; + struct interval_tree_node *node; + struct svm_range_list *svms; + struct svm_range *prange; + uint32_t prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + uint32_t flags = 0x; + int gpuidx; + uint32_t i; + + pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", >svms, start, +start + size - 1, nattr); + + mmap_read_lock(mm); + if (!svm_range_is_valid(mm, start, size)) { + pr_debug("invalid range\n"); + mmap_read_unlock(mm); + return -EINVAL; + } + mmap_read_unlock(mm); + + for (i = 0; i < nattr; i++) { + switch (attrs[i].type) { + case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: + get_preferred_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: + get_prefetch_loc = true; + break; + case KFD_IOCTL_SVM_ATTR_ACCESS: + get_accessible = true; + break; + case KFD_IOCTL_SVM_ATTR_SET_FLAGS: + get_flags = true; + break; + case KFD_IOCTL_SVM_ATTR_GRANULARITY: + get_granularity = true; + break; + case KFD_IOCTL_SVM_ATTR_CLR_FLAGS: + case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: + case KFD_IOCTL_SVM_ATTR_NO_ACCESS: + fallthrough; + default: + pr_debug("get invalid attr type 0x%x\n", attrs[i].type); + return -EINVAL; + } + } + + svms = >svms; + + mutex_lock(>lock); + + node = interval_tree_iter_first(>objects, start, last); + if (!node) { + pr_debug("range attrs not found return default values\n"); + svm_range_set_default_attributes(, _loc, +, ); + /* TODO: Automatically create SVM ranges and map them on +* GPU page faults + if (p->xnack_enabled) + bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); +*/ + + goto fill_values; + } + bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); + bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE); + + while (node) { + struct interval_tree_node *next; + + prange = container_of(node, struct svm_range, it_node); + next = interval_tree_iter_next(node, start, last); + + if (get_preferred_loc) { + if (prange->preferred_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (location != KFD_IOCTL_SVM_LOCATION_UNDEFINED && +location != prange->preferred_loc)) { + location = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_preferred_loc = false; + } else { + location = prange->preferred_loc; + } + } + if (get_prefetch_loc) { + if (prange->prefetch_loc == + KFD_IOCTL_SVM_LOCATION_UNDEFINED || + (prefetch_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED && +prefetch_loc != prange->prefetch_loc)) { + prefetch_loc = KFD_IOCTL_SVM_LOCATION_UNDEFINED; + get_prefetch_loc = false; + } else {
[PATCH 08/44] drm/amdkfd: deregister svm range
From: Philip Yang When application explicitly call unmap or unmap from mmput when application exit, driver will receive MMU_NOTIFY_UNMAP event to remove svm range from process svms object tree and list first, unmap from GPUs (in the following patch). Split the svm ranges to handle partial unmapping of svm ranges. To avoid deadlocks, updating MMU notifiers, range lists and interval trees is done in a deferred worker. New child ranges are attached to their parent range's child_list until the worker can update the svm_range_list. svm_range_set_attr flushes deferred work and takes the mmap_write_lock to guarantee that it has an up-to-date svm_range_list. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 285 +- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 18 ++ 3 files changed, 305 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index f547e1282d69..4101f5341ec9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -735,6 +735,9 @@ struct svm_range_list { struct mutexlock; struct rb_root_cached objects; struct list_headlist; + struct work_struct deferred_list_work; + struct list_headdeferred_range_list; + spinlock_t deferred_list_lock; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6024caf7373f..e23171ac866a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -136,6 +136,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(>update_list); INIT_LIST_HEAD(>remove_list); INIT_LIST_HEAD(>insert_list); + INIT_LIST_HEAD(>deferred_list); + INIT_LIST_HEAD(>child_list); svm_range_set_default_attributes(>preferred_loc, >prefetch_loc, >granularity, >flags); @@ -512,6 +514,41 @@ svm_range_split_head(struct svm_range *prange, struct svm_range *new, return r; } +void svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, +struct svm_range *pchild, enum svm_work_list_ops op) +{ + pr_debug("add child 0x%p [0x%lx 0x%lx] to prange 0x%p child list %d\n", +pchild, pchild->start, pchild->last, prange, op); + + pchild->work_item.mm = mm; + pchild->work_item.op = op; + list_add_tail(>child_list, >child_list); +} + +/** + * svm_range_list_lock_and_flush_work - flush pending deferred work + * + * @svms: the svm range list + * @mm: the mm structure + * + * Context: Returns with mmap write lock held, pending deferred work flushed + * + */ +static void +svm_range_list_lock_and_flush_work(struct svm_range_list *svms, + struct mm_struct *mm) +{ +retry_flush_work: + flush_work(>deferred_list_work); + mmap_write_lock(mm); + + if (list_empty(>deferred_range_list)) + return; + mmap_write_unlock(mm); + pr_debug("retry flush\n"); + goto retry_flush_work; +} + struct svm_range *svm_range_clone(struct svm_range *old) { struct svm_range *new; @@ -664,21 +701,264 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, return r; } +static void +svm_range_update_notifier_and_interval_tree(struct mm_struct *mm, + struct svm_range *prange) +{ + unsigned long start; + unsigned long last; + + start = prange->notifier.interval_tree.start >> PAGE_SHIFT; + last = prange->notifier.interval_tree.last >> PAGE_SHIFT; + + if (prange->start == start && prange->last == last) + return; + + pr_debug("up notifier 0x%p prange 0x%p [0x%lx 0x%lx] [0x%lx 0x%lx]\n", + prange->svms, prange, start, last, prange->start, + prange->last); + + if (start != 0 && last != 0) { + interval_tree_remove(>it_node, >svms->objects); + svm_range_remove_notifier(prange); + } + prange->it_node.start = prange->start; + prange->it_node.last = prange->last; + + interval_tree_insert(>it_node, >svms->objects); + svm_range_add_notifier_locked(mm, prange); +} + +static void +svm_range_handle_list_op(struct svm_range_list *svms, struct svm_range *prange) +{ + struct mm_struct *mm = prange->work_item.mm; + + switch (prange->work_item.op) { + case SVM_OP_NULL: + pr_debug("NULL OP 0x%p prange 0x%p [0x%lx 0x%lx]\n", +svms, prange, prange->start, prange->last); +
[PATCH 09/44] drm/amdgpu: export vm update mapping interface
From: Philip Yang It will be used by kfd to map svm range to GPU, because svm range does not have amdgpu_bo and bo_va, cannot use amdgpu_bo_update interface, use amdgpu vm update interface directly. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 11 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9268db1172bd..0e9ae5f91c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1592,15 +1592,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, - dma_addr_t *pages_addr, - struct dma_fence **fence) +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; enum amdgpu_sync_mode sync_mode; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 976a12e5a8b9..848e175e99ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -366,6 +366,8 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; +struct amdgpu_bo_va_mapping; + #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -397,6 +399,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 11/44] drm/amdkfd: svm range eviction and restore
HMM interval notifier callback notify CPU page table will be updated, stop process queues if the updated address belongs to svm range registered in process svms objects tree. Scheduled restore work to update GPU page table using new pages address in the updated svm range. The restore worker flushes any deferred work to make sure it restores an up-to-date svm_range_list. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 2 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 145 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 2 + 4 files changed, 149 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4101f5341ec9..7d70af26b5c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -738,6 +738,8 @@ struct svm_range_list { struct work_struct deferred_list_work; struct list_headdeferred_range_list; spinlock_t deferred_list_lock; + atomic_tevicted_ranges; + struct delayed_work restore_work; }; /* Process data */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 4d7a67141190..321895d7555a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1064,6 +1064,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, cancel_delayed_work_sync(>eviction_work); cancel_delayed_work_sync(>restore_work); + cancel_delayed_work_sync(>svms.restore_work); mutex_lock(>mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 1244ba380292..55828d6fbea9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -21,6 +21,7 @@ */ #include +#include #include "amdgpu_sync.h" #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -28,6 +29,8 @@ #include "kfd_priv.h" #include "kfd_svm.h" +#define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 + static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, const struct mmu_notifier_range *range, @@ -226,6 +229,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(>insert_list); INIT_LIST_HEAD(>deferred_list); INIT_LIST_HEAD(>child_list); + atomic_set(>invalid, 0); svm_range_set_default_attributes(>preferred_loc, >prefetch_loc, >granularity, >flags); @@ -282,6 +286,9 @@ svm_range_validate(struct mm_struct *mm, struct svm_range *prange) r = svm_range_validate_ram(mm, prange); + pr_debug("svms 0x%p [0x%lx 0x%lx] ret %d invalid %d\n", prange->svms, +prange->start, prange->last, r, atomic_read(>invalid)); + return r; } @@ -886,6 +893,134 @@ svm_range_list_lock_and_flush_work(struct svm_range_list *svms, goto retry_flush_work; } +static void svm_range_restore_work(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct amdkfd_process_info *process_info; + struct svm_range_list *svms; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int evicted_ranges; + int invalid; + int r; + + svms = container_of(dwork, struct svm_range_list, restore_work); + evicted_ranges = atomic_read(>evicted_ranges); + if (!evicted_ranges) + return; + + pr_debug("restore svm ranges\n"); + + /* kfd_process_notifier_release destroys this worker thread. So during +* the lifetime of this thread, kfd_process and mm will be valid. +*/ + p = container_of(svms, struct kfd_process, svms); + process_info = p->kgd_process_info; + mm = p->mm; + if (!mm) + return; + + mutex_lock(_info->lock); + svm_range_list_lock_and_flush_work(svms, mm); + mutex_lock(>lock); + + list_for_each_entry(prange, >list, list) { + invalid = atomic_read(>invalid); + if (!invalid) + continue; + + pr_debug("restoring svms 0x%p prange 0x%p [0x%lx %lx] inv %d\n", +prange->svms, prange, prange->start, prange->last, +invalid); + + r = svm_range_validate(mm, prange); + if (r) { + pr_debug("failed %d to validate [0x%lx 0x%lx]\n", r, +prange->start, prange->last); + + goto unlock_out; + } + + r =
[PATCH 07/44] drm/amdkfd: validate svm range system memory
From: Philip Yang Use HMM to get system memory pages address, which will be used to map to GPUs or migrate to vram. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 103 ++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 ++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e57103a9025e..6024caf7373f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -28,6 +28,15 @@ #include "kfd_priv.h" #include "kfd_svm.h" +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq); + +static const struct mmu_interval_notifier_ops svm_range_mn_ops = { + .invalidate = svm_range_cpu_invalidate_pagetables, +}; + /** * svm_range_unlink - unlink svm_range from lists and interval tree * @prange: svm range structure to be removed @@ -46,6 +55,18 @@ static void svm_range_unlink(struct svm_range *prange) interval_tree_remove(>it_node, >svms->objects); } +static void +svm_range_add_notifier_locked(struct mm_struct *mm, struct svm_range *prange) +{ + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, +prange, prange->start, prange->last); + + mmu_interval_notifier_insert_locked(>notifier, mm, +prange->start << PAGE_SHIFT, +prange->npages << PAGE_SHIFT, +_range_mn_ops); +} + /** * svm_range_add_to_svms - add svm range to svms * @prange: svm range structure to be added @@ -65,6 +86,18 @@ static void svm_range_add_to_svms(struct svm_range *prange) interval_tree_insert(>it_node, >svms->objects); } +static void svm_range_remove_notifier(struct svm_range *prange) +{ + pr_debug("remove notifier svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", +prange->svms, prange, +prange->notifier.interval_tree.start >> PAGE_SHIFT, +prange->notifier.interval_tree.last >> PAGE_SHIFT); + + if (prange->notifier.interval_tree.start != 0 && + prange->notifier.interval_tree.last != 0) + mmu_interval_notifier_remove(>notifier); +} + static void svm_range_free(struct svm_range *prange) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, @@ -112,6 +145,56 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, return prange; } +/** + * svm_range_validate_ram - get system memory pages of svm range + * + * @mm: the mm_struct of process + * @prange: the range struct + * + * After mapping system memory to GPU, system memory maybe invalidated anytime + * during application running, we use HMM callback to sync GPU with CPU page + * table update, so we don't need use lock to prevent CPU invalidation and check + * hmm_range_get_pages_done return value. + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_range_validate_ram(struct mm_struct *mm, struct svm_range *prange) +{ + int r; + + r = amdgpu_hmm_range_get_pages(>notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, >hmm_range, + false, true); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + return r; + } + + kvfree(prange->pages_addr); + prange->pages_addr = prange->hmm_range->hmm_pfns; + prange->hmm_range->hmm_pfns = NULL; + + amdgpu_hmm_range_get_pages_done(prange->hmm_range); + prange->hmm_range = NULL; + + return 0; +} + +static int +svm_range_validate(struct mm_struct *mm, struct svm_range *prange) +{ + int r = 0; + + pr_debug("actual loc 0x%x\n", prange->actual_loc); + + r = svm_range_validate_ram(mm, prange); + + return r; +} + static int svm_range_check_attr(struct kfd_process *p, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) @@ -581,6 +664,18 @@ svm_range_handle_overlap(struct svm_range_list *svms, struct svm_range *new, return r; } +/** + * svm_range_cpu_invalidate_pagetables - interval notifier callback + * + */ +static bool +svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, + const struct mmu_notifier_range *range, + unsigned long cur_seq) +{ + return true; +} + void svm_range_list_fini(struct kfd_process *p) { pr_debug("pasid 0x%x svms 0x%p\n", p->pasid, >svms); @@ -732,6 +827,7 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, /* Apply changes as a
[PATCH 10/44] drm/amdkfd: map svm range to GPUs
Use amdgpu_vm_bo_update_mapping to update GPU page table to map or unmap svm range system memory pages address to GPUs. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 395 +-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 4 + 2 files changed, 374 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e23171ac866a..1244ba380292 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -98,11 +98,99 @@ static void svm_range_remove_notifier(struct svm_range *prange) mmu_interval_notifier_remove(>notifier); } +static int +svm_range_dma_map(struct device *dev, dma_addr_t **dma_addr, + unsigned long *pages_addr, uint64_t npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + dma_addr_t *addr = *dma_addr; + struct page *page; + int i, r; + + if (!pages_addr) + return 0; + + if (!addr) { + addr = kvmalloc_array(npages, sizeof(*addr), + GFP_KERNEL | __GFP_ZERO); + if (!addr) + return -ENOMEM; + *dma_addr = addr; + } + + for (i = 0; i < npages; i++) { + if (WARN_ONCE(addr[i] && !dma_mapping_error(dev, addr[i]), + "leaking dma mapping\n")) + dma_unmap_page(dev, addr[i], PAGE_SIZE, dir); + + page = hmm_pfn_to_page(pages_addr[i]); + addr[i] = dma_map_page(dev, page, 0, PAGE_SIZE, dir); + r = dma_mapping_error(dev, addr[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + return r; + } + pr_debug("dma mapping 0x%llx for page addr 0x%lx\n", +addr[i] >> PAGE_SHIFT, page_to_pfn(page)); + } + return 0; +} + +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, +unsigned long offset, unsigned long npages) +{ + enum dma_data_direction dir = DMA_BIDIRECTIONAL; + int i; + + if (!dma_addr) + return; + + for (i = offset; i < offset + npages; i++) { + if (!dma_addr[i] || dma_mapping_error(dev, dma_addr[i])) + continue; + pr_debug("dma unmapping 0x%llx\n", dma_addr[i] >> PAGE_SHIFT); + dma_unmap_page(dev, dma_addr[i], PAGE_SIZE, dir); + dma_addr[i] = 0; + } +} + +static void svm_range_free_dma_mappings(struct svm_range *prange) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + struct kfd_dev *kfd_dev; + dma_addr_t *dma_addr; + struct device *dev; + struct kfd_process *p; + uint32_t gpuidx; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + dma_addr = prange->dma_addr[gpuidx]; + if (!dma_addr) + continue; + + r = kfd_process_device_from_gpuidx(p, gpuidx, _dev); + if (r) { + pr_debug("failed to find device idx %d\n", gpuidx); + return; + } + dev = _dev->pdev->dev; + svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); + kvfree(dma_addr); + prange->dma_addr[gpuidx] = NULL; + } +} + static void svm_range_free(struct svm_range *prange) { pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); + svm_range_free_dma_mappings(prange); kvfree(prange->pages_addr); kfree(prange); } @@ -342,41 +430,62 @@ svm_range_is_same_attrs(struct svm_range *old, struct svm_range *new) } static int -svm_range_split_pages(struct svm_range *new, struct svm_range *old, - uint64_t start, uint64_t last) +svm_range_split_array(void *ppnew, void *ppold, size_t size, + uint64_t old_start, uint64_t old_n, + uint64_t new_start, uint64_t new_n) { - unsigned long old_start; - unsigned long *pages_addr; + unsigned char *new, *old, *pold; uint64_t d; - old_start = old->start; - new->pages_addr = kvmalloc_array(new->npages, -sizeof(*new->pages_addr), -GFP_KERNEL | __GFP_ZERO); - if (!new->pages_addr) - return -ENOMEM; + if (!ppold) + return 0; + pold = *(unsigned char **)ppold; + if (!pold) +
[PATCH 14/44] drm/amdkfd: register HMM device private zone
From: Philip Yang Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to allocate vram backing pages for page migration. Signed-off-by: Philip Yang Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 + drivers/gpu/drm/amd/amdkfd/Kconfig | 1 + drivers/gpu/drm/amd/amdkfd/Makefile| 3 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 103 + drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 48 ++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + 6 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5f6696a3c778..eb40b3879a0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -30,6 +30,7 @@ #include #include "amdgpu_xgmi.h" #include +#include "kfd_migrate.h" /* Total memory size in system memory and all GPU VRAM. Used to * estimate worst case amount of memory to reserve for page tables @@ -167,12 +168,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, adev_to_drm(adev), _resources); + if (adev->kfd.init_complete) + svm_migrate_init(adev); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { if (adev->kfd.dev) { + svm_migrate_fini(adev); kgd2kfd_device_exit(adev->kfd.dev); adev->kfd.dev = NULL; } diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index f02c938f75da..7880fc101a3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -8,6 +8,7 @@ config HSA_AMD depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select HMM_MIRROR + select DEVICE_PRIVATE select MMU_NOTIFIER select DRM_AMDGPU_USERPTR help diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 387ce0217d35..a93301dbc464 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -55,7 +55,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ $(AMDKFD_PATH)/kfd_crat.o \ - $(AMDKFD_PATH)/kfd_svm.o + $(AMDKFD_PATH)/kfd_svm.o \ + $(AMDKFD_PATH)/kfd_migrate.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c new file mode 100644 index ..4bb39c562665 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -0,0 +1,103 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include "amdgpu_sync.h" +#include "amdgpu_object.h" +#include "amdgpu_vm.h" +#include "amdgpu_mn.h" +#include "kfd_priv.h" +#include "kfd_svm.h" +#include "kfd_migrate.h" + +static void svm_migrate_page_free(struct page *page) +{ +} + +/** + * svm_migrate_to_ram - CPU page fault handler + * @vmf: CPU vm fault vma, address + * + * Context: vm fault handler, mm->mmap_sem is taken + * + * Return: + * 0 - OK + * VM_FAULT_SIGBUS - notice application to have SIGBUS page fault + */ +static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + +static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { + .page_free = svm_migrate_page_free, +
[PATCH 04/44] drm/amdkfd: register svm range
From: Philip Yang svm range structure stores the range start address, size, attributes, flags, prefetch location and gpu bitmap which indicates which GPU this range maps to. Same virtual address is shared by CPU and GPUs. Process has svm range list which uses both interval tree and list to store all svm ranges registered by the process. Interval tree is used by GPU vm fault handler and CPU page fault handler to get svm range structure from the specific address. List is used to scan all ranges in eviction restore work. No overlap range interval [start, last] exist in svms object interval tree. If process registers new range which has overlap with old range, the old range split into 2 ranges depending on the overlap happens at head or tail part of old range. Apply attributes preferred location, prefetch location, mapping flags, migration granularity to svm range, store mapping gpu index into bitmap. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/Makefile | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 17 + drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 8 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 791 +++ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 87 +++ 6 files changed, 914 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.c create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_svm.h diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index e1e4115dcf78..387ce0217d35 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -54,7 +54,8 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ - $(AMDKFD_PATH)/kfd_crat.o + $(AMDKFD_PATH)/kfd_crat.o \ + $(AMDKFD_PATH)/kfd_svm.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dbc824cc6b32..9511826ac8ae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -38,6 +38,7 @@ #include "kfd_priv.h" #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" +#include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" @@ -1744,11 +1745,27 @@ static int kfd_ioctl_smi_events(struct file *filep, static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { + struct kfd_ioctl_svm_args *args = data; int r = 0; if (p->svm_disabled) return -EPERM; + pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", +args->start_addr, args->size, args->op, args->nattr); + + if ((args->start_addr & ~PAGE_MASK) || (args->size & ~PAGE_MASK)) + return -EINVAL; + if (!args->start_addr || !args->size) + return -EINVAL; + + mutex_lock(>mutex); + + r = svm_ioctl(p, args->op, args->start_addr, args->size, args->nattr, + args->attrs); + + mutex_unlock(>mutex); + return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 18fc2ccd1a77..f547e1282d69 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -731,6 +731,12 @@ struct kfd_process_device { #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) +struct svm_range_list { + struct mutexlock; + struct rb_root_cached objects; + struct list_headlist; +}; + /* Process data */ struct kfd_process { /* @@ -810,6 +816,8 @@ struct kfd_process { struct kobject *kobj_queues; struct attribute attr_pasid; + /* shared virtual memory registered by this process */ + struct svm_range_list svms; bool svm_disabled; }; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 89e7c125d334..4d7a67141190 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -35,6 +35,7 @@ #include #include "amdgpu_amdkfd.h" #include "amdgpu.h" +#include "kfd_svm.h" struct mm_struct; @@ -42,6 +43,7 @@ struct mm_struct; #include "kfd_device_queue_manager.h" #include "kfd_dbgmgr.h" #include "kfd_iommu.h" +#include "kfd_svm.h" /* * List of struct kfd_process (field kfd_process). @@ -1003,6 +1005,7 @@ static void kfd_process_wq_release(struct work_struct *work) kfd_iommu_unbind_process(p); kfd_process_free_outstanding_kfd_bos(p); + svm_range_list_fini(p); kfd_process_destroy_pdds(p);
[PATCH 02/44] drm/amdkfd: helper to convert gpu id and idx
From: Alex Sierra svm range uses gpu bitmap to store which GPU svm range maps to. Application pass driver gpu id to specify GPU, the helper is needed to convert gpu id to gpu bitmap idx. Access through kfd_process_device pointers array from kfd_process. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 5 drivers/gpu/drm/amd/amdkfd/kfd_process.c | 30 2 files changed, 35 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 33e56db14327..99b4624ef4c7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -842,6 +842,11 @@ struct kfd_process *kfd_create_process(struct file *filep); struct kfd_process *kfd_get_process(const struct task_struct *); struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); +int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, + uint32_t gpu_idx, uint32_t *gpuid); +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); +int kfd_process_device_from_gpuidx(struct kfd_process *p, + uint32_t gpu_idx, struct kfd_dev **gpu); void kfd_unref_process(struct kfd_process *p); int kfd_process_evict_queues(struct kfd_process *p); int kfd_process_restore_queues(struct kfd_process *p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d4241d29ea94..028ebb0deddd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1607,6 +1607,36 @@ int kfd_process_restore_queues(struct kfd_process *p) return ret; } +int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, + uint32_t gpu_idx, uint32_t *gpuid) +{ + if (gpu_idx < p->n_pdds) { + *gpuid = p->pdds[gpu_idx]->dev->id; + return 0; + } + return -EINVAL; +} + +int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) +{ + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && gpu_id == p->pdds[i]->dev->id) + return i; + return -EINVAL; +} + +int kfd_process_device_from_gpuidx(struct kfd_process *p, + uint32_t gpu_idx, struct kfd_dev **gpu) +{ + if (gpu_idx < p->n_pdds) { + *gpu = p->pdds[gpu_idx]->dev; + return 0; + } + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; -- 2.31.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/44] drm/amdgpu: replace per_device_list by array
From: Alex Sierra Remove per_device_list from kfd_process and replace it with a kfd_process_device pointers array of MAX_GPU_INSTANCES size. This helps to manage the kfd_process_devices binded to a specific kfd_process. Also, functions used by kfd_chardev to iterate over the list were removed, since they are not valid anymore. Instead, it was replaced by a local loop iterating the array. Signed-off-by: Alex Sierra Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 116 -- drivers/gpu/drm/amd/amdkfd/kfd_iommu.c| 8 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 20 +-- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 108 .../amd/amdkfd/kfd_process_queue_manager.c| 6 +- 5 files changed, 111 insertions(+), 147 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 6802c616e10e..43de260b2230 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -870,52 +870,47 @@ static int kfd_ioctl_get_process_apertures(struct file *filp, { struct kfd_ioctl_get_process_apertures_args *args = data; struct kfd_process_device_apertures *pAperture; - struct kfd_process_device *pdd; + int i; dev_dbg(kfd_device, "get apertures for PASID 0x%x", p->pasid); args->num_of_nodes = 0; mutex_lock(>mutex); + /* Run over all pdd of the process */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + pAperture = + >process_apertures[args->num_of_nodes]; + pAperture->gpu_id = pdd->dev->id; + pAperture->lds_base = pdd->lds_base; + pAperture->lds_limit = pdd->lds_limit; + pAperture->gpuvm_base = pdd->gpuvm_base; + pAperture->gpuvm_limit = pdd->gpuvm_limit; + pAperture->scratch_base = pdd->scratch_base; + pAperture->scratch_limit = pdd->scratch_limit; - /*if the process-device list isn't empty*/ - if (kfd_has_process_device_data(p)) { - /* Run over all pdd of the process */ - pdd = kfd_get_first_process_device_data(p); - do { - pAperture = - >process_apertures[args->num_of_nodes]; - pAperture->gpu_id = pdd->dev->id; - pAperture->lds_base = pdd->lds_base; - pAperture->lds_limit = pdd->lds_limit; - pAperture->gpuvm_base = pdd->gpuvm_base; - pAperture->gpuvm_limit = pdd->gpuvm_limit; - pAperture->scratch_base = pdd->scratch_base; - pAperture->scratch_limit = pdd->scratch_limit; - - dev_dbg(kfd_device, - "node id %u\n", args->num_of_nodes); - dev_dbg(kfd_device, - "gpu id %u\n", pdd->dev->id); - dev_dbg(kfd_device, - "lds_base %llX\n", pdd->lds_base); - dev_dbg(kfd_device, - "lds_limit %llX\n", pdd->lds_limit); - dev_dbg(kfd_device, - "gpuvm_base %llX\n", pdd->gpuvm_base); - dev_dbg(kfd_device, - "gpuvm_limit %llX\n", pdd->gpuvm_limit); - dev_dbg(kfd_device, - "scratch_base %llX\n", pdd->scratch_base); - dev_dbg(kfd_device, - "scratch_limit %llX\n", pdd->scratch_limit); - - args->num_of_nodes++; - - pdd = kfd_get_next_process_device_data(p, pdd); - } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); - } + dev_dbg(kfd_device, + "node id %u\n", args->num_of_nodes); + dev_dbg(kfd_device, + "gpu id %u\n", pdd->dev->id); + dev_dbg(kfd_device, + "lds_base %llX\n", pdd->lds_base); + dev_dbg(kfd_device, + "lds_limit %llX\n", pdd->lds_limit); + dev_dbg(kfd_device, + "gpuvm_base %llX\n", pdd->gpuvm_base); + dev_dbg(kfd_device, + "gpuvm_limit %llX\n", pdd->gpuvm_limit); + dev_dbg(kfd_device, + "scratch_base %llX\n", pdd->scratch_base); + dev_dbg(kfd_device, + "scratch_limit %llX\n", pdd->scratch_limit); + if (++args->num_of_nodes >= NUM_OF_SUPPORTED_GPUS) + break; + } mutex_unlock(>mutex); return 0; @@ -926,9 +921,8 @@ static
Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages
On Mon, 22 Mar 2021 at 11:34, Christian König wrote: > > Hi Daniel, > > Am 22.03.21 um 10:38 schrieb Daniel Gomez: > > On Fri, 19 Mar 2021 at 21:29, Felix Kuehling wrote: > >> This caused a regression in kfdtest in a large-buffer stress test after > >> memory allocation for user pages fails: > > I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and > > not this one. > > Just some background for the mem leak patch if helps to understand this: > > The leak was introduce here: > > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb > > where the bound status was introduced for all drm drivers including > > radeon and amdgpu. So this patch just reverts the logic to the > > original code but keeping the bound status. In my case, the binding > > code allocates the user pages memory and returns without bounding (at > > amdgpu_gtt_mgr_has_gart_addr). So, > > when the unbinding happens, the memory needs to be cleared to prevent the > > leak. > > Ah, now I understand what's happening here. Daniel your patch is not > really correct. > > The problem is rather that we don't set the tt object to bound if it > doesn't have a GTT address. Okay, I understand. > > Going to provide a patch for this. Looking forward to your patch. Thanks Christian! > > Regards, > Christian. > > > > >> [17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16 > >> [17359.543746] BUG: kernel NULL pointer dereference, address: > >> > >> [17359.551494] #PF: supervisor read access in kernel mode > >> [17359.557375] #PF: error_code(0x) - not-present page > >> [17359.563247] PGD 0 P4D 0 > >> [17359.566514] Oops: [#1] SMP PTI > >> [17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted > >> 5.11.0-kfd-fkuehlin #193 > >> [17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 > >> 06/17/2016 > >> [17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] > >> [17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 > >> 00 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 > >> <48> 8b 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45 > >> [17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206 > >> [17359.620315] RAX: RBX: 950e8d4edf00 RCX: > >> > >> [17359.628204] RDX: RSI: 950e8d4edf00 RDI: > >> 950eadec5e80 > >> [17359.636084] RBP: 950eadec5e80 R08: R09: > >> > >> [17359.643958] R10: 0246 R11: 0001 R12: > >> 950c03377800 > >> [17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: > >> > >> [17359.659701] FS: 7febb20cb740() GS:950ebfc0() > >> knlGS: > >> [17359.668528] CS: 0010 DS: ES: CR0: 80050033 > >> [17359.675012] CR2: CR3: 0006d700e005 CR4: > >> 001706e0 > >> [17359.682883] Call Trace: > >> [17359.686063] amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu] > >> [17359.692349] ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm] > >> [17359.698307] ttm_bo_release+0x278/0x5e0 [ttm] > >> [17359.703385] amdgpu_bo_unref+0x1a/0x30 [amdgpu] > >> [17359.708701] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 > >> [amdgpu] > >> [17359.716307] kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu] > >> [17359.723036] kfd_ioctl+0x223/0x400 [amdgpu] > >> [17359.728017] ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu] > >> [17359.734152] __x64_sys_ioctl+0x8b/0xd0 > >> [17359.738796] do_syscall_64+0x2d/0x40 > >> [17359.743259] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > >> [17359.749205] RIP: 0033:0x7febb083b6d7 > >> [17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 > >> c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 > >> <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48 > >> [17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: > >> 0010 > >> [17359.782668] RAX: ffda RBX: 0001 RCX: > >> 7febb083b6d7 > >> [17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: > >> 0003 > >> [17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: > >> c404 > >> [17359.806352] R10: R11: 0202 R12: > >> 559416e4e2aa > >> [17359.814251] R13: R14: 0021 R15: > >> > >> [17359.822140] Modules linked in: ip6table_filter ip6_tables > >> iptable_filter amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 > >> gpu_sched ip_tables x_tables > >> [17359.837776] CR2: > >> [17359.841888] ---[ end trace a6f27d64475b28c8 ]--- > >> [17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] > >> [17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 > >> 00
Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages
Hi Daniel, Am 22.03.21 um 10:38 schrieb Daniel Gomez: On Fri, 19 Mar 2021 at 21:29, Felix Kuehling wrote: This caused a regression in kfdtest in a large-buffer stress test after memory allocation for user pages fails: I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and not this one. Just some background for the mem leak patch if helps to understand this: The leak was introduce here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb where the bound status was introduced for all drm drivers including radeon and amdgpu. So this patch just reverts the logic to the original code but keeping the bound status. In my case, the binding code allocates the user pages memory and returns without bounding (at amdgpu_gtt_mgr_has_gart_addr). So, when the unbinding happens, the memory needs to be cleared to prevent the leak. Ah, now I understand what's happening here. Daniel your patch is not really correct. The problem is rather that we don't set the tt object to bound if it doesn't have a GTT address. Going to provide a patch for this. Regards, Christian. [17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16 [17359.543746] BUG: kernel NULL pointer dereference, address: [17359.551494] #PF: supervisor read access in kernel mode [17359.557375] #PF: error_code(0x) - not-present page [17359.563247] PGD 0 P4D 0 [17359.566514] Oops: [#1] SMP PTI [17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted 5.11.0-kfd-fkuehlin #193 [17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 06/17/2016 [17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] [17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45 [17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206 [17359.620315] RAX: RBX: 950e8d4edf00 RCX: [17359.628204] RDX: RSI: 950e8d4edf00 RDI: 950eadec5e80 [17359.636084] RBP: 950eadec5e80 R08: R09: [17359.643958] R10: 0246 R11: 0001 R12: 950c03377800 [17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: [17359.659701] FS: 7febb20cb740() GS:950ebfc0() knlGS: [17359.668528] CS: 0010 DS: ES: CR0: 80050033 [17359.675012] CR2: CR3: 0006d700e005 CR4: 001706e0 [17359.682883] Call Trace: [17359.686063] amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu] [17359.692349] ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm] [17359.698307] ttm_bo_release+0x278/0x5e0 [ttm] [17359.703385] amdgpu_bo_unref+0x1a/0x30 [amdgpu] [17359.708701] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 [amdgpu] [17359.716307] kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu] [17359.723036] kfd_ioctl+0x223/0x400 [amdgpu] [17359.728017] ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu] [17359.734152] __x64_sys_ioctl+0x8b/0xd0 [17359.738796] do_syscall_64+0x2d/0x40 [17359.743259] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [17359.749205] RIP: 0033:0x7febb083b6d7 [17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48 [17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: 0010 [17359.782668] RAX: ffda RBX: 0001 RCX: 7febb083b6d7 [17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: 0003 [17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: c404 [17359.806352] R10: R11: 0202 R12: 559416e4e2aa [17359.814251] R13: R14: 0021 R15: [17359.822140] Modules linked in: ip6table_filter ip6_tables iptable_filter amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 gpu_sched ip_tables x_tables [17359.837776] CR2: [17359.841888] ---[ end trace a6f27d64475b28c8 ]--- [17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] [17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45 [17359.874929] RSP: 0018:a4764971fc98 EFLAGS: 00010206 [17359.881014] RAX: RBX: 950e8d4edf00 RCX: [17359.889007] RDX: RSI: 950e8d4edf00 RDI: 950eadec5e80 [17359.897008] RBP: 950eadec5e80 R08: R09: [17359.905020] R10: 0246 R11: 0001 R12: 950c03377800 [17359.913034] R13: 950eadec5e80
Re: [PATCH] drm/radeon/ttm: Fix memory leak userptr pages
On Fri, 19 Mar 2021 at 21:29, Felix Kuehling wrote: > > This caused a regression in kfdtest in a large-buffer stress test after > memory allocation for user pages fails: I'm sorry to hear that. BTW, I guess you meant amdgpu leak patch and not this one. Just some background for the mem leak patch if helps to understand this: The leak was introduce here: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=0b988ca1c7c4c73983b4ea96ef7c2af2263c87eb where the bound status was introduced for all drm drivers including radeon and amdgpu. So this patch just reverts the logic to the original code but keeping the bound status. In my case, the binding code allocates the user pages memory and returns without bounding (at amdgpu_gtt_mgr_has_gart_addr). So, when the unbinding happens, the memory needs to be cleared to prevent the leak. > > [17359.536303] amdgpu: init_user_pages: Failed to get user pages: -16 > [17359.543746] BUG: kernel NULL pointer dereference, address: > [17359.551494] #PF: supervisor read access in kernel mode > [17359.557375] #PF: error_code(0x) - not-present page > [17359.563247] PGD 0 P4D 0 > [17359.566514] Oops: [#1] SMP PTI > [17359.570728] CPU: 8 PID: 5944 Comm: kfdtest Not tainted 5.11.0-kfd-fkuehlin > #193 > [17359.578760] Hardware name: ASUS All Series/X99-E WS/USB 3.1, BIOS 3201 > 06/17/2016 > [17359.586971] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] > [17359.594075] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 > 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b > 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45 > [17359.614340] RSP: 0018:a4764971fc98 EFLAGS: 00010206 > [17359.620315] RAX: RBX: 950e8d4edf00 RCX: > > [17359.628204] RDX: RSI: 950e8d4edf00 RDI: > 950eadec5e80 > [17359.636084] RBP: 950eadec5e80 R08: R09: > > [17359.643958] R10: 0246 R11: 0001 R12: > 950c03377800 > [17359.651833] R13: 950eadec5e80 R14: 950c03377858 R15: > > [17359.659701] FS: 7febb20cb740() GS:950ebfc0() > knlGS: > [17359.668528] CS: 0010 DS: ES: CR0: 80050033 > [17359.675012] CR2: CR3: 0006d700e005 CR4: > 001706e0 > [17359.682883] Call Trace: > [17359.686063] amdgpu_ttm_backend_destroy+0x12/0x70 [amdgpu] > [17359.692349] ttm_bo_cleanup_memtype_use+0x37/0x60 [ttm] > [17359.698307] ttm_bo_release+0x278/0x5e0 [ttm] > [17359.703385] amdgpu_bo_unref+0x1a/0x30 [amdgpu] > [17359.708701] amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu+0x7e5/0x910 [amdgpu] > [17359.716307] kfd_ioctl_alloc_memory_of_gpu+0x11a/0x220 [amdgpu] > [17359.723036] kfd_ioctl+0x223/0x400 [amdgpu] > [17359.728017] ? kfd_dev_is_large_bar+0x90/0x90 [amdgpu] > [17359.734152] __x64_sys_ioctl+0x8b/0xd0 > [17359.738796] do_syscall_64+0x2d/0x40 > [17359.743259] entry_SYSCALL_64_after_hwframe+0x44/0xa9 > [17359.749205] RIP: 0033:0x7febb083b6d7 > [17359.753681] Code: b3 66 90 48 8b 05 b1 47 2d 00 64 c7 00 26 00 00 00 48 c7 > c0 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 b8 10 00 00 00 0f 05 <48> 3d > 01 f0 ff ff 73 01 c3 48 8b 0d 81 47 2d 00 f7 d8 64 89 01 48 > [17359.774340] RSP: 002b:7ffdb5522cd8 EFLAGS: 0202 ORIG_RAX: > 0010 > [17359.782668] RAX: ffda RBX: 0001 RCX: > 7febb083b6d7 > [17359.790566] RDX: 7ffdb5522d60 RSI: c0284b16 RDI: > 0003 > [17359.798459] RBP: 7ffdb5522d10 R08: 7ffdb5522dd0 R09: > c404 > [17359.806352] R10: R11: 0202 R12: > 559416e4e2aa > [17359.814251] R13: R14: 0021 R15: > > [17359.822140] Modules linked in: ip6table_filter ip6_tables iptable_filter > amdgpu x86_pkg_temp_thermal drm_ttm_helper ttm iommu_v2 gpu_sched ip_tables > x_tables > [17359.837776] CR2: > [17359.841888] ---[ end trace a6f27d64475b28c8 ]--- > [17359.847318] RIP: 0010:amdgpu_ttm_backend_unbind+0x52/0x110 [amdgpu] > [17359.854479] Code: 48 39 c6 74 1b 8b 53 0c 48 8d bd 80 a1 ff ff e8 24 62 00 > 00 85 c0 0f 85 ab 00 00 00 c6 43 54 00 5b 5d c3 48 8b 46 10 8b 4e 50 <48> 8b > 30 48 85 f6 74 ba 8b 50 0c 48 8b bf 80 a1 ff ff 83 e1 01 45 > [17359.874929] RSP: 0018:a4764971fc98 EFLAGS: 00010206 > [17359.881014] RAX: RBX: 950e8d4edf00 RCX: > > [17359.889007] RDX: RSI: 950e8d4edf00 RDI: > 950eadec5e80 > [17359.897008] RBP: 950eadec5e80 R08: R09: > > [17359.905020] R10: 0246 R11: 0001 R12: > 950c03377800 > [17359.913034] R13: 950eadec5e80 R14: 950c03377858 R15: > > [17359.921050] FS: 7febb20cb740() GS:950ebfc0() >
Re: [PATCH] drm/amd/display: Allow idle optimization based on vblank.
On 2021-03-20 1:31 a.m., R, Bindu wrote: > > The Update patch has been submitted. Submitted where? Still can't see it. -- Earthling Michel Dänzer | https://redhat.com Libre software enthusiast | Mesa and X developer ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/display: fix modprobe failure on vega series
Fixes: d88b34caee83 ("Remove some large variables from the stack") [ 41.232097] Call Trace: [ 41.232105] kvasprintf+0x66/0xd0 [ 41.232122] kasprintf+0x49/0x70 [ 41.232136] __drm_crtc_init_with_planes+0x2e1/0x340 [drm] [ 41.232219] ? create_object+0x263/0x3b0 [ 41.232231] drm_crtc_init_with_planes+0x46/0x60 [drm] [ 41.232303] amdgpu_dm_init+0x69c/0x1750 [amdgpu] [ 41.232998] ? phm_wait_for_register_unequal.part.1+0x58/0x90 [amdgpu] [ 41.233662] ? smu9_wait_for_response+0x7d/0xa0 [amdgpu] [ 41.234294] ? smu9_send_msg_to_smc_with_parameter+0x77/0xd0 [amdgpu] [ 41.234912] ? smum_send_msg_to_smc_with_parameter+0x96/0x100 [amdgpu] [ 41.235520] ? psm_set_states+0x5c/0x60 [amdgpu] [ 41.236165] dm_hw_init+0x12/0x20 [amdgpu] [ 41.236834] amdgpu_device_init+0x1402/0x1df0 [amdgpu] [ 41.237314] amdgpu_driver_load_kms+0x65/0x320 [amdgpu] [ 41.237780] amdgpu_pci_probe+0x150/0x250 [amdgpu] [ 41.238240] local_pci_probe+0x47/0xa0 [ 41.238253] pci_device_probe+0x10b/0x1c0 [ 41.238265] really_probe+0xf5/0x4c0 [ 41.238275] driver_probe_device+0xe8/0x150 [ 41.238284] device_driver_attach+0x58/0x60 [ 41.238293] __driver_attach+0xa3/0x140 [ 41.238301] ? device_driver_attach+0x60/0x60 [ 41.238309] ? device_driver_attach+0x60/0x60 [ 41.238317] bus_for_each_dev+0x74/0xb0 [ 41.238330] ? kmem_cache_alloc_trace+0x31a/0x470 [ 41.238341] driver_attach+0x1e/0x20 [ 41.238348] bus_add_driver+0x14a/0x220 [ 41.238357] ? 0xc0f09000 [ 41.238364] driver_register+0x60/0x100 [ 41.238373] ? 0xc0f09000 [ 41.238379] __pci_register_driver+0x54/0x60 [ 41.238389] amdgpu_init+0x68/0x1000 [amdgpu] [ 41.238836] do_one_initcall+0x48/0x1e0 [ 41.238850] ? kmem_cache_alloc_trace+0x31a/0x470 [ 41.238862] do_init_module+0x5f/0x224 [ 41.238876] load_module+0x266b/0x2ec0 [ 41.238887] ? security_kernel_post_read_file+0x5c/0x70 [ 41.238905] __do_sys_finit_module+0xc1/0x120 [ 41.238913] ? __do_sys_finit_module+0xc1/0x120 [ 41.238924] __x64_sys_finit_module+0x1a/0x20 [ 41.238932] do_syscall_64+0x37/0x80 [ 41.238942] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Signed-off-by: Guchun Chen --- drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c index 556ecfabc8d2..1244fcb0f446 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c +++ b/drivers/gpu/drm/amd/display/dc/calcs/dce_calcs.c @@ -2051,11 +2051,11 @@ void bw_calcs_init(struct bw_calcs_dceip *bw_dceip, enum bw_calcs_version version = bw_calcs_version_from_asic_id(asic_id); - dceip = kzalloc(sizeof(dceip), GFP_KERNEL); + dceip = kzalloc(sizeof(*dceip), GFP_KERNEL); if (!dceip) return; - vbios = kzalloc(sizeof(vbios), GFP_KERNEL); + vbios = kzalloc(sizeof(*vbios), GFP_KERNEL); if (!vbios) { kfree(dceip); return; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu/swsmu: fix typo (memlk -> memclk)
- no functional changes Signed-off-by: Tobias Jakobi --- drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 4 ++-- drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 6e641f1513d8..66d69c13f915 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1433,7 +1433,7 @@ static int navi10_get_power_profile_mode(struct smu_context *smu, char *buf) size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor.Mem_FPS, activity_monitor.Mem_MinFreqStep, activity_monitor.Mem_MinActiveFreqType, @@ -1493,7 +1493,7 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u activity_monitor.Soc_PD_Data_error_coeff = input[8]; activity_monitor.Soc_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor.Mem_FPS = input[1]; activity_monitor.Mem_MinFreqStep = input[2]; activity_monitor.Mem_MinActiveFreqType = input[3]; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index af73e1430af5..f21679acd9af 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -1366,7 +1366,7 @@ static int sienna_cichlid_get_power_profile_mode(struct smu_context *smu, char * size += sprintf(buf + size, "%19s %d(%13s) %7d %7d %7d %7d %7d %7d %7d %7d %7d\n", " ", 2, - "MEMLK", + "MEMCLK", activity_monitor->Mem_FPS, activity_monitor->Mem_MinFreqStep, activity_monitor->Mem_MinActiveFreqType, @@ -1429,7 +1429,7 @@ static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long * activity_monitor->Fclk_PD_Data_error_coeff = input[8]; activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9]; break; - case 2: /* Memlk */ + case 2: /* Memclk */ activity_monitor->Mem_FPS = input[1]; activity_monitor->Mem_MinFreqStep = input[2]; activity_monitor->Mem_MinActiveFreqType = input[3]; -- 2.26.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/radeon: don't evict if not initialized
Thanks, Fixed as suggested and sent as v2. - Tong On Sun, Mar 21, 2021 at 9:26 AM Christian König wrote: > > > > Am 20.03.21 um 21:10 schrieb Tong Zhang: > > TTM_PL_VRAM may not initialized at all when calling > > radeon_bo_evict_vram(). We need to check before doing eviction. > > > > [2.160837] BUG: kernel NULL pointer dereference, address: > > 0020 > > [2.161212] #PF: supervisor read access in kernel mode > > [2.161490] #PF: error_code(0x) - not-present page > > [2.161767] PGD 0 P4D 0 > > [2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm] > > [2.168506] Call Trace: > > [2.168641] radeon_bo_evict_vram+0x1c/0x20 [radeon] > > [2.168936] radeon_device_fini+0x28/0xf9 [radeon] > > [2.169224] radeon_driver_unload_kms+0x44/0xa0 [radeon] > > [2.169534] radeon_driver_load_kms+0x174/0x210 [radeon] > > [2.169843] drm_dev_register+0xd9/0x1c0 [drm] > > [2.170104] radeon_pci_probe+0x117/0x1a0 [radeon] > > > > Signed-off-by: Tong Zhang > > --- > > drivers/gpu/drm/radeon/radeon_object.c | 4 +++- > > 1 file changed, 3 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/radeon/radeon_object.c > > b/drivers/gpu/drm/radeon/radeon_object.c > > index 9b81786782de..04e9a8118b0e 100644 > > --- a/drivers/gpu/drm/radeon/radeon_object.c > > +++ b/drivers/gpu/drm/radeon/radeon_object.c > > @@ -384,7 +384,9 @@ int radeon_bo_evict_vram(struct radeon_device *rdev) > > } > > #endif > > man = ttm_manager_type(bdev, TTM_PL_VRAM); > > - return ttm_resource_manager_evict_all(bdev, man); > > + if (man) > > + return ttm_resource_manager_evict_all(bdev, man); > > + return 0; > > You should probably code this the other way around, e.g. > > If (!man) > return 0; > ... > > Apart from that looks good to me. > > Christian. > > > } > > > > void radeon_bo_force_delete(struct radeon_device *rdev) > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Fix cat debugfs hang_hws file causes system crash bug
Here is the system crash log: [ 1272.884438] BUG: unable to handle kernel NULL pointer dereference at (null) [ 1272.88] IP: [< (null)>] (null) [ 1272.884447] PGD 825b09067 PUD 8267c8067 PMD 0 [ 1272.884452] Oops: 0010 [#1] SMP [ 1272.884509] CPU: 13 PID: 3485 Comm: cat Kdump: loaded Tainted: G [ 1272.884515] task: 9a38dbd4d140 ti: 9a37cd3b8000 task.ti: 9a37cd3b8000 [ 1272.884517] RIP: 0010:[<>] [< (null)>] (null) [ 1272.884520] RSP: 0018:9a37cd3bbe68 EFLAGS: 00010203 [ 1272.884522] RAX: RBX: RCX: 00014d5f [ 1272.884524] RDX: fff4 RSI: 0001 RDI: 9a38aca4d200 [ 1272.884526] RBP: 9a37cd3bbed0 R08: 9a38dcd5f1a0 R09: 9a31ffc07300 [ 1272.884527] R10: 9a31ffc07300 R11: addd5e9d R12: 9a38b4e0fb00 [ 1272.884529] R13: 0001 R14: 9a37cd3bbf18 R15: 9a38aca4d200 [ 1272.884532] FS: 7feccaa67740() GS:9a38dcd4() knlGS: [ 1272.884534] CS: 0010 DS: ES: CR0: 80050033 [ 1272.884536] CR2: CR3: 0008267c CR4: 003407e0 [ 1272.884537] Call Trace: [ 1272.884544] [] ? seq_read+0x130/0x440 [ 1272.884548] [] vfs_read+0x9f/0x170 [ 1272.884552] [] SyS_read+0x7f/0xf0 [ 1272.884557] [] system_call_fastpath+0x22/0x27 [ 1272.884558] Code: Bad RIP value. [ 1272.884562] RIP [< (null)>] (null) [ 1272.884564] RSP [ 1272.884566] CR2: Signed-off-by: Qu Huang --- drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c index 511712c..673d5e3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debugfs.c @@ -33,6 +33,11 @@ static int kfd_debugfs_open(struct inode *inode, struct file *file) return single_open(file, show, NULL); } +static int kfd_debugfs_hang_hws_read(struct seq_file *m, void *data) +{ + seq_printf(m, "echo gpu_id > hang_hws\n"); + return 0; +} static ssize_t kfd_debugfs_hang_hws_write(struct file *file, const char __user *user_buf, size_t size, loff_t *ppos) @@ -94,7 +99,7 @@ void kfd_debugfs_init(void) debugfs_create_file("rls", S_IFREG | 0444, debugfs_root, kfd_debugfs_rls_by_device, _debugfs_fops); debugfs_create_file("hang_hws", S_IFREG | 0200, debugfs_root, - NULL, _debugfs_hang_hws_fops); + kfd_debugfs_hang_hws_read, _debugfs_hang_hws_fops); } void kfd_debugfs_fini(void) -- 1.8.3.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH v2] drm/radeon: don't evict if not initialized
TTM_PL_VRAM may not initialized at all when calling radeon_bo_evict_vram(). We need to check before doing eviction. [2.160837] BUG: kernel NULL pointer dereference, address: 0020 [2.161212] #PF: supervisor read access in kernel mode [2.161490] #PF: error_code(0x) - not-present page [2.161767] PGD 0 P4D 0 [2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm] [2.168506] Call Trace: [2.168641] radeon_bo_evict_vram+0x1c/0x20 [radeon] [2.168936] radeon_device_fini+0x28/0xf9 [radeon] [2.169224] radeon_driver_unload_kms+0x44/0xa0 [radeon] [2.169534] radeon_driver_load_kms+0x174/0x210 [radeon] [2.169843] drm_dev_register+0xd9/0x1c0 [drm] [2.170104] radeon_pci_probe+0x117/0x1a0 [radeon] Suggested-by: Christian König Signed-off-by: Tong Zhang --- v2: coding style fix drivers/gpu/drm/radeon/radeon_object.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 9b81786782de..499ce55e34cc 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -384,6 +384,8 @@ int radeon_bo_evict_vram(struct radeon_device *rdev) } #endif man = ttm_manager_type(bdev, TTM_PL_VRAM); + if (!man) + return 0; return ttm_resource_manager_evict_all(bdev, man); } -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH V2] drm/amdgpu: Fix a typo
On Fri, 19 Mar 2021, Bhaskar Chowdhury wrote: s/traing/training/ ...Plus the entire sentence construction for better readability. Signed-off-by: Bhaskar Chowdhury --- Changes from V1: Alex and Randy's suggestions incorporated. drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index c325d6f53a71..bf3857867f51 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -661,10 +661,10 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) if (ops & PSP_MEM_TRAIN_SEND_LONG_MSG) { /* -* Long traing will encroach certain mount of bottom VRAM, -* saving the content of this bottom VRAM to system memory -* before training, and restoring it after training to avoid -* VRAM corruption. +* Long training will encroach a certain amount on the bottom of VRAM; + * save the content from the bottom VRAM to system memory + * before training, and restore it after training to avoid + * VRAM corruption. These 3 new lines are indented with spaces instead of tabs. Oops. :( (I may be too late with this comment -- sorry about that.) */ sz = GDDR6_MEM_TRAINING_ENCROACHED_SIZE; -- 2.26.2 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/radeon: don't evict if not initialized
TTM_PL_VRAM may not initialized at all when calling radeon_bo_evict_vram(). We need to check before doing eviction. [2.160837] BUG: kernel NULL pointer dereference, address: 0020 [2.161212] #PF: supervisor read access in kernel mode [2.161490] #PF: error_code(0x) - not-present page [2.161767] PGD 0 P4D 0 [2.163088] RIP: 0010:ttm_resource_manager_evict_all+0x70/0x1c0 [ttm] [2.168506] Call Trace: [2.168641] radeon_bo_evict_vram+0x1c/0x20 [radeon] [2.168936] radeon_device_fini+0x28/0xf9 [radeon] [2.169224] radeon_driver_unload_kms+0x44/0xa0 [radeon] [2.169534] radeon_driver_load_kms+0x174/0x210 [radeon] [2.169843] drm_dev_register+0xd9/0x1c0 [drm] [2.170104] radeon_pci_probe+0x117/0x1a0 [radeon] Signed-off-by: Tong Zhang --- drivers/gpu/drm/radeon/radeon_object.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 9b81786782de..04e9a8118b0e 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -384,7 +384,9 @@ int radeon_bo_evict_vram(struct radeon_device *rdev) } #endif man = ttm_manager_type(bdev, TTM_PL_VRAM); - return ttm_resource_manager_evict_all(bdev, man); + if (man) + return ttm_resource_manager_evict_all(bdev, man); + return 0; } void radeon_bo_force_delete(struct radeon_device *rdev) -- 2.25.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/display: Remove unnecessary conversion to bool
Fix the following coccicheck warnings: ./drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c:875:62-67: WARNING: conversion to bool not needed here. Reported-by: Abaci Robot Signed-off-by: Jiapeng Chong --- drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c index 3e6f760..5692a8b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mpc.c @@ -872,7 +872,7 @@ bool mpc3_program_shaper( else next_mode = LUT_RAM_A; - mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A ? true:false, rmu_idx); + mpc3_configure_shaper_lut(mpc, next_mode == LUT_RAM_A, rmu_idx); if (next_mode == LUT_RAM_A) mpc3_program_shaper_luta_settings(mpc, params, rmu_idx); -- 1.8.3.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amd/display: Set AMDGPU_DM_DEFAULT_MIN_BACKLIGHT to 0
On Sat, Mar 20, 2021 at 8:36 AM Alex Deucher wrote: > > On Fri, Mar 19, 2021 at 5:31 PM Evan Benn wrote: > > > > On Sat, 20 Mar 2021 at 02:10, Harry Wentland wrote: > > > On 2021-03-19 10:22 a.m., Alex Deucher wrote: > > > > On Fri, Mar 19, 2021 at 3:23 AM Evan Benn wrote: > > > >> > > > >> AMDGPU_DM_DEFAULT_MIN_BACKLIGHT was set to the value of 12 > > > >> to ensure no display backlight will flicker at low user brightness > > > >> settings. However this value is quite bright, so for devices that do > > > >> not > > > >> implement the ACPI ATIF > > > >> ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS > > > >> functionality the user cannot set the brightness to a low level even if > > > >> the display would support such a low PWM. > > > >> > > > >> This ATIF feature is not implemented on for example AMD grunt > > > >> chromebooks. > > > >> > > > >> Signed-off-by: Evan Benn > > > >> > > > >> --- > > > >> I could not find a justification for the reason for the value. It has > > > >> caused some noticable regression for users: > > > >> https://bugzilla.kernel.org/show_bug.cgi?id=203439>>> > > > >> Maybe this can be either user controlled or userspace configured, but > > > >> preventing users from turning their backlight dim seems wrong. > > > > > > > > My understanding is that some panels flicker if you set the min to a > > > > value too low. This was a safe minimum if the platform didn't specify > > > > it's own safe minimum. I think we'd just be trading one bug for > > > > another (flickering vs not dim enough). Maybe a whitelist or > > > > blacklist would be a better solution? > > > > > > > > > > Yeah, this is a NACK from me as-is for the reasons Alex described. > > > > Thanks Harry + Alex, > > > > I agree this solution is not the best. > > > > > > > > I agree a whitelist approach might be best. > > > > Do you have any idea what an allowlist could be keyed on? > > Is the flickering you observed here a function of the panel or the gpu > > or some other component? > > Maybe we could move the minimum level into the logic for that hardware. > > > > Maybe the panel string from the EDID? Either that or something from > dmi data? Harry would probably have a better idea. One problem with keying from panel EDID is that for example the grunt chromebook platform has more than 100 different panels already shipped. Add to that that repair centers or people repairing their own device will use 'compatible' panels. I'm sure the AMD windows laptops have even more variety! > > Alex > > > > > > > Is this fix perhaps for OLED panels? If so we could use a different > > > min-value for OLED panels that don't do PWM, but use 12 for everything > > > else. > > > > All the chromebooks I have worked with LCD + LED backlight have been > > fine with a backlight set to 0. > > We do have OLED panels too, but I'm not aware of what they do. > > > > > Harry > > > > > > > Alex > > > > > > > > > > > >> > > > >> Also reviewed here: > > > >> https://chromium-review.googlesource.com/c/chromiumos/third_party/kernel/+/2748377>>> > > > >> drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- > > > >> 1 file changed, 1 insertion(+), 1 deletion(-) > > > >> > > > >> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > >> b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > >> index 573cf17262da..0129bd69b94e 100644 > > > >> --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > >> +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > >> @@ -3151,7 +3151,7 @@ static int amdgpu_dm_mode_config_init(struct > > > >> amdgpu_device *adev) > > > >> return 0; > > > >> } > > > >> > > > >> -#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 12 > > > >> +#define AMDGPU_DM_DEFAULT_MIN_BACKLIGHT 0 > > > >> #define AMDGPU_DM_DEFAULT_MAX_BACKLIGHT 255 > > > >> #define AUX_BL_DEFAULT_TRANSITION_TIME_MS 50 > > > >> > > > >> -- > > > >> 2.31.0.291.g576ba9dcdaf-goog > > > >> > > > >> ___ > > > >> dri-devel mailing list > > > >> dri-de...@lists.freedesktop.org > > > >> https://lists.freedesktop.org/mailman/listinfo/dri-devel>> > > > >> ___ > > > > dri-devel mailing list > > > > dri-de...@lists.freedesktop.org > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel>> > > > ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/dispaly: fix deadlock issue in amdgpu reset
In amdggpu reset, while dm.dc_lock is held by dm_suspend, handle_hpd_rx_irq tries to acquire it. Deadlock occurred! Deadlock log: [ 104.528304] amdgpu :03:00.0: amdgpu: GPU reset begin! [ 104.640084] == [ 104.640092] WARNING: possible circular locking dependency detected [ 104.640099] 5.11.0-custom #1 Tainted: GW E [ 104.640107] -- [ 104.640114] cat/1158 is trying to acquire lock: [ 104.640120] 88810a09ce00 ((work_completion)(>work)){+.+.}-{0:0}, at: __flush_work+0x2e3/0x450 [ 104.640144] but task is already holding lock: [ 104.640151] 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] [ 104.640581] which lock already depends on the new lock. [ 104.640590] the existing dependency chain (in reverse order) is: [ 104.640598] -> #2 (>dm.dc_lock){+.+.}-{3:3}: [ 104.640611]lock_acquire+0xca/0x390 [ 104.640623]__mutex_lock+0x9b/0x930 [ 104.640633]mutex_lock_nested+0x1b/0x20 [ 104.640640]handle_hpd_rx_irq+0x9b/0x1c0 [amdgpu] [ 104.640959]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641264]process_one_work+0x2a7/0x5b0 [ 104.641275]worker_thread+0x4a/0x3d0 [ 104.641283]kthread+0x125/0x160 [ 104.641290]ret_from_fork+0x22/0x30 [ 104.641300] -> #1 (>hpd_lock){+.+.}-{3:3}: [ 104.641312]lock_acquire+0xca/0x390 [ 104.641321]__mutex_lock+0x9b/0x930 [ 104.641328]mutex_lock_nested+0x1b/0x20 [ 104.641336]handle_hpd_rx_irq+0x67/0x1c0 [amdgpu] [ 104.641635]dm_irq_work_func+0x4e/0x60 [amdgpu] [ 104.641931]process_one_work+0x2a7/0x5b0 [ 104.641940]worker_thread+0x4a/0x3d0 [ 104.641948]kthread+0x125/0x160 [ 104.641954]ret_from_fork+0x22/0x30 [ 104.641963] -> #0 ((work_completion)(>work)){+.+.}-{0:0}: [ 104.641975]check_prev_add+0x94/0xbf0 [ 104.641983]__lock_acquire+0x130d/0x1ce0 [ 104.641992]lock_acquire+0xca/0x390 [ 104.642000]__flush_work+0x303/0x450 [ 104.642008]flush_work+0x10/0x20 [ 104.642016]amdgpu_dm_irq_suspend+0x93/0x100 [amdgpu] [ 104.642312]dm_suspend+0x181/0x1d0 [amdgpu] [ 104.642605]amdgpu_device_ip_suspend_phase1+0x8a/0x100 [amdgpu] [ 104.642835]amdgpu_device_ip_suspend+0x21/0x70 [amdgpu] [ 104.643066]amdgpu_device_pre_asic_reset+0x1bd/0x1d2 [amdgpu] [ 104.643403]amdgpu_device_gpu_recover.cold+0x5df/0xa9d [amdgpu] [ 104.643715]gpu_recover_get+0x2e/0x60 [amdgpu] [ 104.643951]simple_attr_read+0x6d/0x110 [ 104.643960]debugfs_attr_read+0x49/0x70 [ 104.643970]full_proxy_read+0x5f/0x90 [ 104.643979]vfs_read+0xa3/0x190 [ 104.643986]ksys_read+0x70/0xf0 [ 104.643992]__x64_sys_read+0x1a/0x20 [ 104.643999]do_syscall_64+0x38/0x90 [ 104.644007]entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 104.644017] other info that might help us debug this: [ 104.644026] Chain exists of: (work_completion)(>work) --> >hpd_lock --> >dm.dc_lock [ 104.644043] Possible unsafe locking scenario: [ 104.644049]CPU0CPU1 [ 104.644055] [ 104.644060] lock(>dm.dc_lock); [ 104.644066]lock(>hpd_lock); [ 104.644075]lock(>dm.dc_lock); [ 104.644083] lock((work_completion)(>work)); [ 104.644090] *** DEADLOCK *** [ 104.644096] 3 locks held by cat/1158: [ 104.644103] #0: 88810d0e4eb8 (>mutex){+.+.}-{3:3}, at: simple_attr_read+0x4e/0x110 [ 104.644119] #1: 88810a0a1600 (>reset_sem){}-{3:3}, at: amdgpu_device_lock_adev+0x42/0x94 [amdgpu] [ 104.644489] #2: 88810a09cc70 (>dm.dc_lock){+.+.}-{3:3}, at: dm_suspend+0xb2/0x1d0 [amdgpu] Signed-off-by: Lang Yu --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index e176ea84d75b..8727488df769 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2657,13 +2657,15 @@ static void handle_hpd_rx_irq(void *param) } } - mutex_lock(>dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_lock(>dm.dc_lock); #ifdef CONFIG_DRM_AMD_DC_HDCP result = dc_link_handle_hpd_rx_irq(dc_link, _irq_data, NULL); #else result = dc_link_handle_hpd_rx_irq(dc_link, NULL, NULL); #endif - mutex_unlock(>dm.dc_lock); + if (!amdgpu_in_reset(adev)) + mutex_unlock(>dm.dc_lock); out: if (result &&
Re: [PATCH] drm/amdgpu: Use correct size when access vram
Am 22.03.21 um 01:53 schrieb xinhui pan: To make size is 4 byte aligned. Use &~0x3ULL instead of &3ULL. Signed-off-by: xinhui pan Good catch. Patch is Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 6d5cf0525325..41a4c456961c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1503,7 +1503,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, memcpy(buf, , bytes); } } else { - bytes = cursor.size & 0x3ull; + bytes = cursor.size & ~0x3ULL; amdgpu_device_vram_access(adev, cursor.start, (uint32_t *)buf, bytes, write); ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx