Re: [PATCH] MAINTAINERS: Remove me from amdgpu maintainers
Am 07.05.20 um 04:02 schrieb Chunming Zhou: Glad to spend time on kernel driver in past years. I've moved to new focus in umd and couldn't commit enough time to discussions. Signed-off-by: Chunming Zhou So Long, and Thanks for All the Fish :) Reviewed-by: Christian König --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 938316092634..4ca508bd4c9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14066,7 +14066,6 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M:Alex Deucher M:Christian König -M: David (ChunMing) Zhou L:amd-gfx@lists.freedesktop.org S:Supported T:git git://people.freedesktop.org/~agd5f/linux ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: allocate large structures dynamically
Am 06.05.20 um 21:01 schrieb Joe Perches: On Tue, 2020-05-05 at 16:01 +0200, Arnd Bergmann wrote: After the structure was padded to 1024 bytes, it is no longer suitable for being a local variable, as the function surpasses the warning limit for 32-bit architectures: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:587:5: error: stack frame size of 1072 bytes in function 'amdgpu_ras_feature_enable' [-Werror,-Wframe-larger-than=] int amdgpu_ras_feature_enable(struct amdgpu_device *adev, ^ Use kzalloc() instead to get it from the heap. [] diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c [] @@ -588,19 +588,23 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, struct ras_common_if *head, bool enable) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - union ta_ras_cmd_input info; + union ta_ras_cmd_input *info; int ret; if (!con) return -EINVAL; +info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); Spaces were used for indentation here not a tab. It might be useful to run your proposed patches through checkpatch Is this an actual bug fix as the previous use didn't zero unused info members? + if (!info) + return -ENOMEM; + if (!enable) { - info.disable_features = (struct ta_ras_disable_features_input) { + info->disable_features = (struct ta_ras_disable_features_input) { .block_id = amdgpu_ras_block_to_ta(head->block), .error_type = amdgpu_ras_error_to_ta(head->type), }; } else { - info.enable_features = (struct ta_ras_enable_features_input) { + info->enable_features = (struct ta_ras_enable_features_input) { .block_id = amdgpu_ras_block_to_ta(head->block), .error_type = amdgpu_ras_error_to_ta(head->type), }; @@ -609,26 +613,33 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Are we alerady in that state we are going to set? */ - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) - return 0; + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) { And trivia: The !! uses with bool seem unnecessary and it's probably better to make amdgpu_ras_is_feature_enabled to return bool. Maybe something like: --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 538895cfd862..05c4eaf0ddfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -526,16 +526,16 @@ void amdgpu_ras_parse_status_code(struct amdgpu_device* adev, } /* feature ctl begin */ -static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) +static bool amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, + struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); return con->hw_supported & BIT(head->block); } -static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, - struct ras_common_if *head) +static bool amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, + struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -560,7 +560,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, */ if (!amdgpu_ras_is_feature_allowed(adev, head)) return 0; - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + if (!(enable ^ amdgpu_ras_is_feature_enabled(adev, head))) And while we are at improving coding style I think that writing this as "if (enabled == amdgpu_ras_is_feature_enabled(adev, head))" would be much more readable. Christian. return 0; if (enable) { @@ -609,7 +609,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Are we alerady in that state we are going to set? */ - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + if (!(enable ^ amdgpu_ras_is_feature_enabled(adev, head))) return 0; if (!amdgpu_ras_intr_triggered()) { ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd
[PATCH] MAINTAINERS: Remove me from amdgpu maintainers
Glad to spend time on kernel driver in past years. I've moved to new focus in umd and couldn't commit enough time to discussions. Signed-off-by: Chunming Zhou --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 938316092634..4ca508bd4c9e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14066,7 +14066,6 @@ F: drivers/net/wireless/quantenna RADEON and AMDGPU DRM DRIVERS M: Alex Deucher M: Christian König -M: David (ChunMing) Zhou L: amd-gfx@lists.freedesktop.org S: Supported T: git git://people.freedesktop.org/~agd5f/linux -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[pull] amdgpu drm-fixes-5.7
Hi Dave, Daniel, Fixes for 5.7. The following changes since commit e3dcd86b3b4c045a4db17c02340138a4c514fe20: Merge tag 'amd-drm-fixes-5.7-2020-04-29' of git://people.freedesktop.org/~agd5f/linux into drm-fixes (2020-05-01 11:19:55 +1000) are available in the Git repository at: git://people.freedesktop.org/~agd5f/linux tags/amd-drm-fixes-5.7-2020-05-06 for you to fetch changes up to e6142dd511425cb827b5db869f489eb81f5f994d: drm/amd/display: Prevent dpcd reads with passive dongles (2020-05-05 16:13:57 -0400) amd-drm-fixes-5.7-2020-05-06: amdgpu: - Runtime PM fixes - DC fix for PPC - Misc DC fixes Aurabindo Pillai (1): drm/amd/display: Prevent dpcd reads with passive dongles Daniel Kolesa (1): drm/amd/display: work around fp code being emitted outside of DC_FP_START/END Evan Quan (2): drm/amdgpu: move kfd suspend after ip_suspend_phase1 drm/amdgpu: drop redundant cg/pg ungate on runpm enter Michel Dänzer (1): drm/amdgpu/dc: Use WARN_ON_ONCE for ASSERT Roman Li (1): drm/amd/display: fix counter in wait_for_no_pipes_pending Sung Lee (1): drm/amd/display: Update DCN2.1 DV Code Revision drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 ++--- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 17 +++- drivers/gpu/drm/amd/display/dc/core/dc.c | 5 ++-- .../gpu/drm/amd/display/dc/dcn20/dcn20_resource.c | 31 -- .../display/dc/dml/dcn21/display_rq_dlg_calc_21.c | 8 +++--- drivers/gpu/drm/amd/display/dc/os_types.h | 2 +- 6 files changed, 43 insertions(+), 27 deletions(-) ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH -next] drm/amdgpu/navi10: fix unsigned comparison with 0
On Wed, May 6, 2020 at 3:03 AM ChenTao wrote: > > Fixes warning because size is uint32_t and can never be negtative > > drivers/gpu/drm/amd/amdgpu/../powerplay/navi10_ppt.c:1296:12: warning: > comparison of unsigned expression < 0 is always false [-Wtype-limits] >if (size < 0) > > Reported-by: Hulk Robot > Signed-off-by: ChenTao Applied. Thanks! Alex > --- > drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 -- > 1 file changed, 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c > b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c > index 2184d247a9f7..0c9be864d072 100644 > --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c > +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c > @@ -1293,8 +1293,6 @@ static int navi10_set_power_profile_mode(struct > smu_context *smu, long *input, u > } > > if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { > - if (size < 0) > - return -EINVAL; > > ret = smu_update_table(smu, >SMU_TABLE_ACTIVITY_MONITOR_COEFF, > WORKLOAD_PPLIB_CUSTOM_BIT, > -- > 2.22.0 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/dri-devel ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amd/display: Fix vblank and pageflip event handling for FreeSync
[Why] We're the drm vblank event a frame too early in the case where the pageflip happens close to VUPDATE and ends up blocking the signal. The implementation in DM was previously correct *before* we started sending vblank events from VSTARTUP unconditionally to handle cases where HUBP was off, OTG was ON and userspace was still requesting some DRM planes enabled. As part of that patch series we dropped VUPDATE since it was deemed close enough to VSTARTUP, but there's a key difference betweeen VSTARTUP and VUPDATE - the VUPDATE signal can be blocked if we're holding the pipe lock. There was a fix recently to revert the unconditional behavior for the DCN VSTARTUP vblank event since it was sending the pageflip event on the wrong frame - once again, due to blocking VUPDATE and having the address start scanning out two frames later. The problem with this fix is it didn't update the logic that calls drm_crtc_handle_vblank(), so the timestamps are totally bogus now. [How] Essentially reverts most of the original VSTARTUP series but retains the behavior to send back events when active planes == 0. Some refactoring/cleanup was done to not have duplicated code in both the handlers. Fixes: 16f17eda8bad ("drm/amd/display: Send vblank and user events at vsartup for DCN") Fixes: 3a2ce8d66a4b ("drm/amd/display: Disable VUpdate interrupt for DCN hardware") Fixes: 2b5aed9ac3f7 ("drm/amd/display: Fix pageflip event race condition for DCN.") Cc: Leo Li Cc: Mario Kleiner Signed-off-by: Nicholas Kazlauskas --- .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 137 +++--- 1 file changed, 55 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 59f1d4a94f12..30ce28f7c444 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -441,7 +441,7 @@ static void dm_vupdate_high_irq(void *interrupt_params) /** * dm_crtc_high_irq() - Handles CRTC interrupt - * @interrupt_params: ignored + * @interrupt_params: used for determining the CRTC instance * * Handles the CRTC/VSYNC interrupt by notfying DRM's VBLANK * event handler. @@ -455,70 +455,6 @@ static void dm_crtc_high_irq(void *interrupt_params) unsigned long flags; acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK); - - if (acrtc) { - acrtc_state = to_dm_crtc_state(acrtc->base.state); - - DRM_DEBUG_VBL("crtc:%d, vupdate-vrr:%d\n", - acrtc->crtc_id, - amdgpu_dm_vrr_active(acrtc_state)); - - /* Core vblank handling at start of front-porch is only possible -* in non-vrr mode, as only there vblank timestamping will give -* valid results while done in front-porch. Otherwise defer it -* to dm_vupdate_high_irq after end of front-porch. -*/ - if (!amdgpu_dm_vrr_active(acrtc_state)) - drm_crtc_handle_vblank(&acrtc->base); - - /* Following stuff must happen at start of vblank, for crc -* computation and below-the-range btr support in vrr mode. -*/ - amdgpu_dm_crtc_handle_crc_irq(&acrtc->base); - - if (acrtc_state->stream && adev->family >= AMDGPU_FAMILY_AI && - acrtc_state->vrr_params.supported && - acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) { - spin_lock_irqsave(&adev->ddev->event_lock, flags); - mod_freesync_handle_v_update( - adev->dm.freesync_module, - acrtc_state->stream, - &acrtc_state->vrr_params); - - dc_stream_adjust_vmin_vmax( - adev->dm.dc, - acrtc_state->stream, - &acrtc_state->vrr_params.adjust); - spin_unlock_irqrestore(&adev->ddev->event_lock, flags); - } - } -} - -#if defined(CONFIG_DRM_AMD_DC_DCN) -/** - * dm_dcn_crtc_high_irq() - Handles VStartup interrupt for DCN generation ASICs - * @interrupt params - interrupt parameters - * - * Notify DRM's vblank event handler at VSTARTUP - * - * Unlike DCE hardware, we trigger the handler at VSTARTUP. at which: - * * We are close enough to VUPDATE - the point of no return for hw - * * We are in the fixed portion of variable front porch when vrr is enabled - * * We are before VUPDATE, where double-buffered vrr registers are swapped - * - * It is therefore the correct place to signal vblank, send user flip events, - * and update VRR. - */ -static void dm_dcn_crtc_high_irq(void *interrupt_params) -{ - struct common_irq_params *irq_params = interrupt_params; - struct amdgpu_dev
Re: [PATCH hmm v2 0/5] Adjust hmm_range_fault() API
On Fri, May 01, 2020 at 03:20:43PM -0300, Jason Gunthorpe wrote: > From: Jason Gunthorpe > > The API is a bit complicated for the uses we actually have, and > disucssions for simplifying have come up a number of times. > > This small series removes the customizable pfn format and simplifies the > return code of hmm_range_fault() > > All the drivers are adjusted to process in the simplified format. > I would appreciated tested-by's for the two drivers, thanks! > > v2: > - Move call chain to commit message > - Fix typo of HMM_PFN_REQ_FAULT > - Move nouveau_hmm_convert_pfn() to nouveau_svm.c > - Add acks and tested-bys > v1: > https://lore.kernel.org/r/0-v1-4eb72686de3c+5062-hmm_no_flags_...@mellanox.com > > Cc: Christoph Hellwig > Cc: John Hubbard > Cc: Jérôme Glisse > Cc: Ben Skeggs > To: Ralph Campbell > Cc: nouv...@lists.freedesktop.org > Cc: Niranjana Vishwanathapura > Cc: intel-...@lists.freedesktop.org > Cc: "Kuehling, Felix" > Cc: Alex Deucher > Cc: "Christian König" > Cc: "David (ChunMing) Zhou" > Cc: amd-gfx@lists.freedesktop.org > Cc: dri-de...@lists.freedesktop.org > Cc: linux-ker...@vger.kernel.org > Cc: "Yang, Philip" > To: linux...@kvack.org > > Jason Gunthorpe (5): > mm/hmm: make CONFIG_DEVICE_PRIVATE into a select > mm/hmm: make hmm_range_fault return 0 or -1 > drm/amdgpu: remove dead code after hmm_range_fault() > mm/hmm: remove HMM_PFN_SPECIAL > mm/hmm: remove the customizable pfn format from hmm_range_fault Applied to hmm.git, thanks Jason ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: allocate large structures dynamically
On Tue, 2020-05-05 at 16:01 +0200, Arnd Bergmann wrote: > After the structure was padded to 1024 bytes, it is no longer > suitable for being a local variable, as the function surpasses > the warning limit for 32-bit architectures: > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:587:5: error: stack frame size of > 1072 bytes in function 'amdgpu_ras_feature_enable' > [-Werror,-Wframe-larger-than=] > int amdgpu_ras_feature_enable(struct amdgpu_device *adev, > ^ > > Use kzalloc() instead to get it from the heap. [] > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c [] > @@ -588,19 +588,23 @@ int amdgpu_ras_feature_enable(struct amdgpu_device > *adev, > struct ras_common_if *head, bool enable) > { > struct amdgpu_ras *con = amdgpu_ras_get_context(adev); > - union ta_ras_cmd_input info; > + union ta_ras_cmd_input *info; > int ret; > > if (!con) > return -EINVAL; > > +info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL); Spaces were used for indentation here not a tab. It might be useful to run your proposed patches through checkpatch Is this an actual bug fix as the previous use didn't zero unused info members? > + if (!info) > + return -ENOMEM; > + > if (!enable) { > - info.disable_features = (struct ta_ras_disable_features_input) { > + info->disable_features = (struct ta_ras_disable_features_input) > { > .block_id = amdgpu_ras_block_to_ta(head->block), > .error_type = amdgpu_ras_error_to_ta(head->type), > }; > } else { > - info.enable_features = (struct ta_ras_enable_features_input) { > + info->enable_features = (struct ta_ras_enable_features_input) { > .block_id = amdgpu_ras_block_to_ta(head->block), > .error_type = amdgpu_ras_error_to_ta(head->type), > }; > @@ -609,26 +613,33 @@ int amdgpu_ras_feature_enable(struct amdgpu_device > *adev, > /* Do not enable if it is not allowed. */ > WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); > /* Are we alerady in that state we are going to set? */ > - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) > - return 0; > + if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) { And trivia: The !! uses with bool seem unnecessary and it's probably better to make amdgpu_ras_is_feature_enabled to return bool. Maybe something like: --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 538895cfd862..05c4eaf0ddfa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -526,16 +526,16 @@ void amdgpu_ras_parse_status_code(struct amdgpu_device* adev, } /* feature ctl begin */ -static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) +static bool amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, + struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); return con->hw_supported & BIT(head->block); } -static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, - struct ras_common_if *head) +static bool amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, + struct ras_common_if *head) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -560,7 +560,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, */ if (!amdgpu_ras_is_feature_allowed(adev, head)) return 0; - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + if (!(enable ^ amdgpu_ras_is_feature_enabled(adev, head))) return 0; if (enable) { @@ -609,7 +609,7 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, /* Do not enable if it is not allowed. */ WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head)); /* Are we alerady in that state we are going to set? */ - if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) + if (!(enable ^ amdgpu_ras_is_feature_enabled(adev, head))) return 0; if (!amdgpu_ras_intr_triggered()) { ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/1] drm/amdgpu: Use GEM obj reference for KFD BOs
[AMD Official Use Only - Internal Distribution Only] Reviewed-by: Alex Sierra -Original Message- From: amd-gfx On Behalf Of Christian König Sent: Wednesday, May 6, 2020 3:12 AM To: Kuehling, Felix ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH 1/1] drm/amdgpu: Use GEM obj reference for KFD BOs Am 06.05.20 um 02:59 schrieb Felix Kuehling: > Releasing the AMDGPU BO ref directly leads to problems when BOs were > exported as DMA bufs. Releasing the GEM reference makes sure that the > AMDGPU/TTM BO is not freed too early. > > Also take a GEM reference when importing BOs from DMABufs to keep > references to imported BOs balances properly. > > Signed-off-by: Felix Kuehling > Tested-by: Alex Sierra Acked-by: Christian König > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++-- > 1 file changed, 3 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 1247938b1ec1..da8b31a53291 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -1354,7 +1354,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( > } > > /* Free the BO*/ > - amdgpu_bo_unref(&mem->bo); > + drm_gem_object_put_unlocked(&mem->bo->tbo.base); > mutex_destroy(&mem->lock); > kfree(mem); > > @@ -1699,7 +1699,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev > *kgd, > | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE > | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; > > - (*mem)->bo = amdgpu_bo_ref(bo); > + drm_gem_object_get(&bo->tbo.base); > + (*mem)->bo = bo; > (*mem)->va = va; > (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? > AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Calex.sierra%40amd.com%7C14fc955a4c63411b3fe808d7f1952092%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637243495134045678&sdata=3QrUtuE4LU8V1xGRPjaQJ9QeAv9hKhFTZ7GLCuskVeM%3D&reserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 00/14] drm/radeon: remove comparison to bool
Am 06.05.2020 18:00 schrieb Alex Deucher : On Wed, May 6, 2020 at 10:27 AM Zheng Bin wrote: > > Zheng Bin (14): > drm/radeon: remove comparison to bool in btc_dpm.c > drm/radeon: remove comparison to bool in ci_dpm.c > drm/radeon: remove comparison to bool in ni_dpm.c > drm/radeon: remove comparison to bool in radeon_atpx_handler.c > drm/radeon: remove comparison to bool in radeon_object.c > drm/radeon: remove comparison to bool in radeon_ttm.c > drm/radeon: remove comparison to bool in r100.c > drm/radeon: remove comparison to bool in r300.c > drm/radeon: remove comparison to bool in r600.c > drm/radeon: remove comparison to bool in rs600.c > drm/radeon: remove comparison to bool in rs690.c > drm/radeon: remove comparison to bool in rv6xx_dpm.c > drm/radeon: remove comparison to bool in rv515.c > drm/radeon: remove comparison to bool in si_dpm.c Does the checker need to be fixed? All of these are comparing boolean variables to true/false. Seems like needless code churn to me. We should probably make sure that no new code like this leaks in, but I also don't see that this is necessary for the old driver stack. Christian. Alex > > drivers/gpu/drm/radeon/btc_dpm.c | 2 +- > drivers/gpu/drm/radeon/ci_dpm.c | 4 ++-- > drivers/gpu/drm/radeon/ni_dpm.c | 6 +++--- > drivers/gpu/drm/radeon/r100.c| 2 +- > drivers/gpu/drm/radeon/r300.c| 2 +- > drivers/gpu/drm/radeon/r600.c| 3 ++- > drivers/gpu/drm/radeon/radeon_atpx_handler.c | 4 ++-- > drivers/gpu/drm/radeon/radeon_object.c | 2 +- > drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- > drivers/gpu/drm/radeon/rs600.c | 2 +- > drivers/gpu/drm/radeon/rs690.c | 3 ++- > drivers/gpu/drm/radeon/rv515.c | 2 +- > drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +- > drivers/gpu/drm/radeon/si_dpm.c | 6 +++--- > 14 files changed, 22 insertions(+), 20 deletions(-) > > -- > 2.26.0.106.g9fadedd > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=02%7C01%7Cchristian.koenig%40amd.com%7C10c2a90728574bb20ef208d7f1d69e2b%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637243776401264275&sdata=Z6alCS8hPA7rWNKHimpkc6zBldtBagK0dGpX8mTOEZA%3D&reserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 0/1] Fiji GPU audio register timeout when in BACO state
On Sat, May 02, 2020 at 12:09:13PM +0200, Takashi Iwai wrote: > On Sat, 02 May 2020 09:27:31 +0200, > Takashi Iwai wrote: > > > > On Sat, 02 May 2020 09:17:28 +0200, > > Lukas Wunner wrote: > > > > > > On Sat, May 02, 2020 at 09:11:58AM +0200, Takashi Iwai wrote: > > > > --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c > > > > @@ -673,6 +673,12 @@ static int amdgpu_dm_audio_component_bind(struct > > > > device *kdev, > > > > struct amdgpu_device *adev = dev->dev_private; > > > > struct drm_audio_component *acomp = data; > > > > > > > > + if (!device_link_add(hda_kdev, kdev, DL_FLAG_STATELESS | > > > > +DL_FLAG_PM_RUNTIME)) { > > > > + DRM_ERROR("DM: cannot add device link to audio > > > > device\n"); > > > > + return -ENOMEM; > > > > + } > > > > + > > > > > > Doesn't this duplicate drivers/pci/quirks.c:quirk_gpu_hda() ? > > > > Gah, you're right, that was the place I overlooked. > > It was a typical "false Eureka right-after-wakeup" phenomenon :) > > Need a vaccine aka coffee... > > > > So the runtime PM dependency must be already placed there, and the > > problem is not the lack of the dependency tree but the really other > > timing issue. Back to square. > > One interesting test is to open the stream while the mode isn't set > yet and see whether the same problem appears. > Namely, after the monitor is connected but no mode is set, run > directly like >aplay -Dhdmi:1,0 foo.wav > You might need to wrap the command with pasuspender if PA is active. I could not figure out how to get the interface for aplay set other than not specifying it and having it find the default device (which can change). I even used aplay -L and aplay -l to show devices. I could not get it working. Is there anything else I can try? I did not apply the last patch when it was pointed out that it is already a quirk. Regards, Nicholas > > > Takashi ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH hmm v2 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault
On Mon, May 04, 2020 at 06:30:00PM -0700, John Hubbard wrote: > On 2020-05-01 11:20, Jason Gunthorpe wrote: > > From: Jason Gunthorpe > > > > Presumably the intent here was that hmm_range_fault() could put the data > > into some HW specific format and thus avoid some work. However, nothing > > actually does that, and it isn't clear how anything actually could do that > > as hmm_range_fault() provides CPU addresses which must be DMA mapped. > > > > Perhaps there is some special HW that does not need DMA mapping, but we > > don't have any examples of this, and the theoretical performance win of > > avoiding an extra scan over the pfns array doesn't seem worth the > > complexity. Plus pfns needs to be scanned anyhow to sort out any > > DEVICE_PRIVATE pages. > > > > This version replaces the uint64_t with an usigned long containing a pfn > > and fixed flags. On input flags is filled with the HMM_PFN_REQ_* values, > > on successful output it is filled with HMM_PFN_* values, describing the > > state of the pages. > > > > Just some minor stuff below. I wasn't able to spot any errors in the code, > though, so these are just documentation nits. > > > ... > > > > > diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst > > index 9924f2caa0184c..c9f2329113a47f 100644 > > +++ b/Documentation/vm/hmm.rst > > @@ -185,9 +185,6 @@ The usage pattern is:: > > range.start = ...; > > range.end = ...; > > range.pfns = ...; > > That should be: > > range.hmm_pfns = ...; Yep > > > - range.flags = ...; > > - range.values = ...; > > - range.pfn_shift = ...; > > if (!mmget_not_zero(interval_sub->notifier.mm)) > > return -EFAULT; > > @@ -229,15 +226,10 @@ The hmm_range struct has 2 fields, default_flags and > > pfn_flags_mask, that specif > > fault or snapshot policy for the whole range instead of having to set them > > for each entry in the pfns array. > > -For instance, if the device flags for range.flags are:: > > +For instance if the device driver wants pages for a range with at least > > read > > +permission, it sets:: > > -range.flags[HMM_PFN_VALID] = (1 << 63); > > -range.flags[HMM_PFN_WRITE] = (1 << 62); > > - > > -and the device driver wants pages for a range with at least read > > permission, > > -it sets:: > > - > > -range->default_flags = (1 << 63); > > +range->default_flags = HMM_PFN_REQ_FAULT; > > range->pfn_flags_mask = 0; > > and calls hmm_range_fault() as described above. This will fill fault all > > pages > > @@ -246,18 +238,18 @@ in the range with at least read permission. > > Now let's say the driver wants to do the same except for one page in the > > range for > > which it wants to have write permission. Now driver set:: > > -range->default_flags = (1 << 63); > > -range->pfn_flags_mask = (1 << 62); > > -range->pfns[index_of_write] = (1 << 62); > > +range->default_flags = HMM_PFN_REQ_FAULT; > > +range->pfn_flags_mask = HMM_PFN_REQ_WRITE; > > +range->pfns[index_of_write] = HMM_PFN_REQ_WRITE; > > > All these choices for _WRITE behavior make it slightly confusing. I mean, it's > better than it was, but there are default flags, a mask, and an index as well, > and it looks like maybe we have a little more power and flexibility than > desirable? Nouveau for example is now just setting the mask only: The example is showing how to fault all pages but request write for only certain pages, ie it shows how to use default_flags and pfn_flags together in probably the only way that could make any sense > > @@ -542,12 +564,15 @@ static int nouveau_range_fault(struct nouveau_svmm > > *svmm, > > return -EBUSY; > > range.notifier_seq = mmu_interval_read_begin(range.notifier); > > - range.default_flags = 0; > > - range.pfn_flags_mask = -1UL; > > down_read(&mm->mmap_sem); > > ret = hmm_range_fault(&range); > > up_read(&mm->mmap_sem); > > if (ret) { > > + /* > > +* FIXME: the input PFN_REQ flags are destroyed on > > +* -EBUSY, we need to regenerate them, also for the > > +* other continue below > > +*/ > > How serious is this FIXME? It seems like we could get stuck in a loop here, > if we're not issuing a new REQ, right? Serious enough someone should fix it and not copy it into other drivers.. > > if (ret == -EBUSY) > > continue; > > return ret; > > @@ -562,7 +587,7 @@ static int nouveau_range_fault(struct nouveau_svmm > > *svmm, > > break; > > } > > - nouveau_dmem_convert_pfn(drm, &range); > > + nouveau_hmm_convert_pfn(drm, &range, ioctl_addr); > > svmm->vmm->vmm.object.client->super = true; > > ret = nvif_object_ioctl(&svmm->vmm->vmm.object, data, size, NULL); > > @@ -589,6 +614,7 @@
Re: [PATCH hmm v2 5/5] mm/hmm: remove the customizable pfn format from hmm_range_fault
On Fri, May 01, 2020 at 05:53:26PM -0700, Ralph Campbell wrote: > > Acked-by: Felix Kuehling > > Tested-by: Ralph Campbell > > Signed-off-by: Jason Gunthorpe > > Signed-off-by: Christoph Hellwig > > Documentation/vm/hmm.rst| 26 ++-- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 35 ++ > > drivers/gpu/drm/nouveau/nouveau_dmem.c | 27 +--- > > drivers/gpu/drm/nouveau/nouveau_dmem.h | 3 +- > > drivers/gpu/drm/nouveau/nouveau_svm.c | 87 - > > include/linux/hmm.h | 99 ++- > > mm/hmm.c| 160 +++- > > 7 files changed, 192 insertions(+), 245 deletions(-) > > > > ...snip... > > > +static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, > > + struct hmm_range *range, u64 *ioctl_addr) > > +{ > > + unsigned long i, npages; > > + > > + /* > > +* The ioctl_addr prepared here is passed through nvif_object_ioctl() > > +* to an eventual DMA map in something like gp100_vmm_pgt_pfn() > > +* > > +* This is all just encoding the internal hmm reprensetation into a > > s/reprensetation/representation/ > > Looks good and still tests OK with nouveau. Got it, thanks Jason ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 00/14] drm/radeon: remove comparison to bool
On Wed, May 6, 2020 at 10:27 AM Zheng Bin wrote: > > Zheng Bin (14): > drm/radeon: remove comparison to bool in btc_dpm.c > drm/radeon: remove comparison to bool in ci_dpm.c > drm/radeon: remove comparison to bool in ni_dpm.c > drm/radeon: remove comparison to bool in radeon_atpx_handler.c > drm/radeon: remove comparison to bool in radeon_object.c > drm/radeon: remove comparison to bool in radeon_ttm.c > drm/radeon: remove comparison to bool in r100.c > drm/radeon: remove comparison to bool in r300.c > drm/radeon: remove comparison to bool in r600.c > drm/radeon: remove comparison to bool in rs600.c > drm/radeon: remove comparison to bool in rs690.c > drm/radeon: remove comparison to bool in rv6xx_dpm.c > drm/radeon: remove comparison to bool in rv515.c > drm/radeon: remove comparison to bool in si_dpm.c Does the checker need to be fixed? All of these are comparing boolean variables to true/false. Seems like needless code churn to me. Alex > > drivers/gpu/drm/radeon/btc_dpm.c | 2 +- > drivers/gpu/drm/radeon/ci_dpm.c | 4 ++-- > drivers/gpu/drm/radeon/ni_dpm.c | 6 +++--- > drivers/gpu/drm/radeon/r100.c| 2 +- > drivers/gpu/drm/radeon/r300.c| 2 +- > drivers/gpu/drm/radeon/r600.c| 3 ++- > drivers/gpu/drm/radeon/radeon_atpx_handler.c | 4 ++-- > drivers/gpu/drm/radeon/radeon_object.c | 2 +- > drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- > drivers/gpu/drm/radeon/rs600.c | 2 +- > drivers/gpu/drm/radeon/rs690.c | 3 ++- > drivers/gpu/drm/radeon/rv515.c | 2 +- > drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +- > drivers/gpu/drm/radeon/si_dpm.c | 6 +++--- > 14 files changed, 22 insertions(+), 20 deletions(-) > > -- > 2.26.0.106.g9fadedd > > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code
[AMD Official Use Only - Internal Distribution Only] Perhaps it's too much churn for this patch set, but I'd like to unify the pp func callbacks between powerplay and swsmu so we can drop all of the is_swsmu_supported() and function pointer checks sprinkled all through the code. Alex From: Wang, Kevin(Yang) Sent: Wednesday, May 6, 2020 7:04 AM To: Zhang, Hawking ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Monk ; Feng, Kenneth Subject: Re: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code [AMD Official Use Only - Internal Distribution Only] From: Zhang, Hawking Sent: Wednesday, May 6, 2020 5:26 PM To: Wang, Kevin(Yang) ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Monk ; Feng, Kenneth Subject: RE: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code [AMD Official Use Only - Internal Distribution Only] Hi Kelvin, Thanks for the series that remove the duplicated one_vf mode check in all the amdgpu_dpm functions. Can we split the patch into two? One for amdgpu device sysfs attr code refine, with the new dev_attr structures, the other for retiring all the unnecessary one_vf mode support. thanks your comment. [kevin]: Q1, agree, i will split it into two patch. +enum amdgpu_device_attr_states { + ATTR_STATE_UNSUPPORT = 0, + ATTR_STATE_SUPPORT, + ATTR_STATE_DEAD, + ATTR_STATE_ALIVE, +}; + The attr_states seems unnecessary to me. You need a flag to mark whether a particular attribute is supported by specific ASIC or not, right? Then just a bool variable should be good enough for this purpose, Like attr->supported. I' d like to understand the use case for DEAD and ALIVE. Accordingly, you can simplify the logic that only remove the supported ones. [kevin]: Q2, the origin idea, it is used to store sysfs file state, but for this case, we can try to drop DEAD & ALIVE state, because the origin code logic will exit directly when create file fail. If we have to introduce more complicated flags to indicate different status, I'd prefer to go directly to initialize one_vf mode attr sets and bare-metal attr sets directly. [kevin]: Q3, i'd like to keep this patch code, in fact, not all sysfs devices need to be created on bare-metal mode. the driver must check it at runtime. eg: is_sw_smu_support(), if (asic_chip == XXX), etc.. In addition, the function naming like default_attr_perform also confusing me. Would it be the function that used to update attr status? +static int default_attr_perform(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, + uint32_t mask) [kevin]: Q4, yes, the function is used to update attr status according to asic information at runtime. maybe rename to "attr_update" is better. Best Regards, Kevin Regards, Hawking -Original Message- From: Wang, Kevin(Yang) Sent: Wednesday, May 6, 2020 14:23 To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Liu, Monk ; Feng, Kenneth ; Wang, Kevin(Yang) Subject: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code unified amdgpu device attribute node functions: 1. add some helper functions to create amdgpu device attribute node. 2. create device node according to device attr flags on different VF mode. 3. rename some functions name to adapt a new interface. 4. remove unneccessary virt mode check in inernal functions (xxx_show, xxx_store). Signed-off-by: Kevin Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 577 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 48 ++ 2 files changed, 271 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index c762deb5abc7..367ac79418b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -154,18 +154,15 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso * */ -static ssize_t amdgpu_get_dpm_state(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_state(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; int ret; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return 0; - ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -189,19 +186,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); } -static ssize_t amdgpu_set_dpm_state(st
Re: [PATCH] drm/amdgpu: force fbdev into vram
Am 06.05.20 um 16:14 schrieb Alex Deucher: We set the fb smem pointer to the offset into the BAR, so keep the fbdev bo in vram. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=207581 Fixes: 6c8d74caa2fa33 ("drm/amdgpu: Enable scatter gather display support") Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9ae7b61f696a..25ddb482466a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, u32 cpp; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED| - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_VRAM_CLEARED; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 03/14] drm/radeon: remove comparison to bool in ni_dpm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/ni_dpm.c:807:5-26: WARNING: Comparison to bool drivers/gpu/drm/radeon/ni_dpm.c:2466:5-36: WARNING: Comparison to boo drivers/gpu/drm/radeon/ni_dpm.c:3146:5-22: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/ni_dpm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c index b57c37ddd164..66c48ce107a5 100644 --- a/drivers/gpu/drm/radeon/ni_dpm.c +++ b/drivers/gpu/drm/radeon/ni_dpm.c @@ -804,7 +804,7 @@ static void ni_apply_state_adjust_rules(struct radeon_device *rdev, else max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - if (rdev->pm.dpm.ac_power == false) { + if (!rdev->pm.dpm.ac_power) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; @@ -2463,7 +2463,7 @@ static int ni_populate_power_containment_values(struct radeon_device *rdev, u32 power_boost_limit; u8 max_ps_percent; - if (ni_pi->enable_power_containment == false) + if (!ni_pi->enable_power_containment) return 0; if (state->performance_level_count == 0) @@ -3143,7 +3143,7 @@ static int ni_initialize_smc_cac_tables(struct radeon_device *rdev) int i, ret; u32 reg; - if (ni_pi->enable_cac == false) + if (!ni_pi->enable_cac) return 0; cac_tables = kzalloc(sizeof(PP_NIslands_CACTABLES), GFP_KERNEL); -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 10/14] drm/radeon: remove comparison to bool in rs600.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/rs600.c:1132:5-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/rs600.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c index c88b4906f7bc..a7ff0609a3eb 100644 --- a/drivers/gpu/drm/radeon/rs600.c +++ b/drivers/gpu/drm/radeon/rs600.c @@ -1129,7 +1129,7 @@ int rs600_init(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* check if cards are posted or not */ - if (radeon_boot_test_post_card(rdev) == false) + if (!radeon_boot_test_post_card(rdev)) return -EINVAL; /* Initialize clocks */ -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 02/14] drm/radeon: remove comparison to bool in ci_dpm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/ci_dpm.c:814:5-26: WARNING: Comparison to bool drivers/gpu/drm/radeon/ci_dpm.c:2916:6-21: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/ci_dpm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/ci_dpm.c b/drivers/gpu/drm/radeon/ci_dpm.c index 134aa2b01f90..c77ca911a8b6 100644 --- a/drivers/gpu/drm/radeon/ci_dpm.c +++ b/drivers/gpu/drm/radeon/ci_dpm.c @@ -811,7 +811,7 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev, else max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - if (rdev->pm.dpm.ac_power == false) { + if (!rdev->pm.dpm.ac_power) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; @@ -2913,7 +2913,7 @@ static int ci_populate_single_memory_level(struct radeon_device *rdev, if (pi->mclk_stutter_mode_threshold && (memory_clock <= pi->mclk_stutter_mode_threshold) && - (pi->uvd_enabled == false) && + !pi->uvd_enabled && (RREG32(DPG_PIPE_STUTTER_CONTROL) & STUTTER_ENABLE) && (rdev->pm.dpm.new_active_crtc_count <= 2)) memory_level->StutterEnable = true; -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 01/14] drm/radeon: remove comparison to bool in btc_dpm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/btc_dpm.c:2115:5-26: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/btc_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c index d1d8aaf8323c..60b32eba6f46 100644 --- a/drivers/gpu/drm/radeon/btc_dpm.c +++ b/drivers/gpu/drm/radeon/btc_dpm.c @@ -2112,7 +2112,7 @@ static void btc_apply_state_adjust_rules(struct radeon_device *rdev, else max_limits = &rdev->pm.dpm.dyn_state.max_clock_voltage_on_dc; - if (rdev->pm.dpm.ac_power == false) { + if (!rdev->pm.dpm.ac_power) { if (ps->high.mclk > max_limits->mclk) ps->high.mclk = max_limits->mclk; if (ps->high.sclk > max_limits->sclk) -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 11/14] drm/radeon: remove comparison to bool in rs690.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/rs690.c:190:6-35: WARNING: Comparison to bool drivers/gpu/drm/radeon/rs690.c:844:5-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/rs690.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/rs690.c b/drivers/gpu/drm/radeon/rs690.c index c296f94f9700..ddc3bfbb557c 100644 --- a/drivers/gpu/drm/radeon/rs690.c +++ b/drivers/gpu/drm/radeon/rs690.c @@ -187,7 +187,8 @@ static void rs690_mc_init(struct radeon_device *rdev) /* FastFB shall be used with UMA memory. Here it is simply disabled when sideport * memory is present. */ - if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) { + if (!rdev->mc.igp_sideport_enabled && + radeon_fastfb == 1) { DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n", (unsigned long long)rdev->mc.aper_base, k8_addr); rdev->mc.aper_base = (resource_size_t)k8_addr; -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 06/14] drm/radeon: remove comparison to bool in radeon_ttm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/radeon_ttm.c:141:6-62: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c index 5d50c9edbe80..d1fcb5f995b0 100644 --- a/drivers/gpu/drm/radeon/radeon_ttm.c +++ b/drivers/gpu/drm/radeon/radeon_ttm.c @@ -138,7 +138,7 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo, rbo = container_of(bo, struct radeon_bo, tbo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: - if (rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready == false) + if (!rbo->rdev->ring[radeon_copy_ring_index(rbo->rdev)].ready) radeon_ttm_placement_from_domain(rbo, RADEON_GEM_DOMAIN_CPU); else if (rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size && bo->mem.start < (rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT)) { -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 13/14] drm/radeon: remove comparison to bool in rv515.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/rv515.c:666:5-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/rv515.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c index 147e5cf8348d..77e6b9dcdb69 100644 --- a/drivers/gpu/drm/radeon/rv515.c +++ b/drivers/gpu/drm/radeon/rv515.c @@ -663,7 +663,7 @@ int rv515_init(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* check if cards are posted or not */ - if (radeon_boot_test_post_card(rdev) == false) + if (!radeon_boot_test_post_card(rdev)) return -EINVAL; /* Initialize clocks */ radeon_get_clock_info(rdev->ddev); -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 04/14] drm/radeon: remove comparison to bool in radeon_atpx_handler.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/radeon_atpx_handler.c:561:15-49: WARNING: Comparison to bool drivers/gpu/drm/radeon/radeon_atpx_handler.c:571:15-49: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_atpx_handler.c b/drivers/gpu/drm/radeon/radeon_atpx_handler.c index 6f93f54bf651..6131917322b4 100644 --- a/drivers/gpu/drm/radeon/radeon_atpx_handler.c +++ b/drivers/gpu/drm/radeon/radeon_atpx_handler.c @@ -558,7 +558,7 @@ static bool radeon_atpx_detect(void) while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) { vga_count++; - has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); + has_atpx |= radeon_atpx_pci_probe_handle(pdev); parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; @@ -568,7 +568,7 @@ static bool radeon_atpx_detect(void) while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_OTHER << 8, pdev)) != NULL) { vga_count++; - has_atpx |= (radeon_atpx_pci_probe_handle(pdev) == true); + has_atpx |= radeon_atpx_pci_probe_handle(pdev); parent_pdev = pci_upstream_bridge(pdev); d3_supported |= parent_pdev && parent_pdev->bridge_d3; -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 05/14] drm/radeon: remove comparison to bool in radeon_object.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/radeon_object.c:427:6-35: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/radeon_object.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index 140d94cc080d..f06c5e9dc72c 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -424,7 +424,7 @@ int radeon_bo_evict_vram(struct radeon_device *rdev) /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ #ifndef CONFIG_HIBERNATION if (rdev->flags & RADEON_IS_IGP) { - if (rdev->mc.igp_sideport_enabled == false) + if (!rdev->mc.igp_sideport_enabled) /* Useless to evict on IGP chips */ return 0; } -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 14/14] drm/radeon: remove comparison to bool in si_dpm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/si_dpm.c:1885:7-44: WARNING: Comparison to bool drivers/gpu/drm/radeon/si_dpm.c:2463:5-22: WARNING: Comparison to bool drivers/gpu/drm/radeon/si_dpm.c:3015:5-26: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/si_dpm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c index a167e1c36d24..98e288e5d8c9 100644 --- a/drivers/gpu/drm/radeon/si_dpm.c +++ b/drivers/gpu/drm/radeon/si_dpm.c @@ -1882,7 +1882,7 @@ static void si_initialize_powertune_defaults(struct radeon_device *rdev) update_dte_from_pl2 = true; break; default: - if (si_pi->dte_data.enable_dte_by_default == true) + if (si_pi->dte_data.enable_dte_by_default) DRM_ERROR("DTE is not enabled!\n"); break; } @@ -2460,7 +2460,7 @@ static int si_initialize_smc_dte_tables(struct radeon_device *rdev) if (dte_data == NULL) si_pi->enable_dte = false; - if (si_pi->enable_dte == false) + if (!si_pi->enable_dte) return 0; if (dte_data->k <= 0) @@ -3012,7 +3012,7 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev, if (ps->performance_levels[i].vddc > ps->performance_levels[i+1].vddc) ps->performance_levels[i].vddc = ps->performance_levels[i+1].vddc; } - if (rdev->pm.dpm.ac_power == false) { + if (!rdev->pm.dpm.ac_power) { for (i = 0; i < ps->performance_level_count; i++) { if (ps->performance_levels[i].mclk > max_limits->mclk) ps->performance_levels[i].mclk = max_limits->mclk; -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 00/14] drm/radeon: remove comparison to bool
Zheng Bin (14): drm/radeon: remove comparison to bool in btc_dpm.c drm/radeon: remove comparison to bool in ci_dpm.c drm/radeon: remove comparison to bool in ni_dpm.c drm/radeon: remove comparison to bool in radeon_atpx_handler.c drm/radeon: remove comparison to bool in radeon_object.c drm/radeon: remove comparison to bool in radeon_ttm.c drm/radeon: remove comparison to bool in r100.c drm/radeon: remove comparison to bool in r300.c drm/radeon: remove comparison to bool in r600.c drm/radeon: remove comparison to bool in rs600.c drm/radeon: remove comparison to bool in rs690.c drm/radeon: remove comparison to bool in rv6xx_dpm.c drm/radeon: remove comparison to bool in rv515.c drm/radeon: remove comparison to bool in si_dpm.c drivers/gpu/drm/radeon/btc_dpm.c | 2 +- drivers/gpu/drm/radeon/ci_dpm.c | 4 ++-- drivers/gpu/drm/radeon/ni_dpm.c | 6 +++--- drivers/gpu/drm/radeon/r100.c| 2 +- drivers/gpu/drm/radeon/r300.c| 2 +- drivers/gpu/drm/radeon/r600.c| 3 ++- drivers/gpu/drm/radeon/radeon_atpx_handler.c | 4 ++-- drivers/gpu/drm/radeon/radeon_object.c | 2 +- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/rs600.c | 2 +- drivers/gpu/drm/radeon/rs690.c | 3 ++- drivers/gpu/drm/radeon/rv515.c | 2 +- drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +- drivers/gpu/drm/radeon/si_dpm.c | 6 +++--- 14 files changed, 22 insertions(+), 20 deletions(-) -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 07/14] drm/radeon: remove comparison to bool in r100.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/r100.c:4065:5-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/r100.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 24c8db673931..298a9c22074a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -4062,7 +4062,7 @@ int r100_init(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* check if cards are posted or not */ - if (radeon_boot_test_post_card(rdev) == false) + if (!radeon_boot_test_post_card(rdev)) return -EINVAL; /* Set asic errata */ r100_errata(rdev); -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 12/14] drm/radeon: remove comparison to bool in rv6xx_dpm.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/rv6xx_dpm.c:1571:5-20: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/rv6xx_dpm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/rv6xx_dpm.c b/drivers/gpu/drm/radeon/rv6xx_dpm.c index 69d380fff22a..ebdb937730c2 100644 --- a/drivers/gpu/drm/radeon/rv6xx_dpm.c +++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c @@ -1568,7 +1568,7 @@ int rv6xx_dpm_enable(struct radeon_device *rdev) rv6xx_program_engine_speed_parameters(rdev); rv6xx_enable_display_gap(rdev, true); - if (pi->display_gap == false) + if (!pi->display_gap) rv6xx_enable_display_gap(rdev, false); rv6xx_program_power_level_enter_state(rdev); -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 08/14] drm/radeon: remove comparison to bool in r300.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/r300.c:1544:5-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/r300.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 3b7ead5be5bf..26448b6e97e6 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -1541,7 +1541,7 @@ int r300_init(struct radeon_device *rdev) RREG32(R_0007C0_CP_STAT)); } /* check if cards are posted or not */ - if (radeon_boot_test_post_card(rdev) == false) + if (!radeon_boot_test_post_card(rdev)) return -EINVAL; /* Set asic errata */ r300_errata(rdev); -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 09/14] drm/radeon: remove comparison to bool in r600.c
Fixes coccicheck warning: drivers/gpu/drm/radeon/r600.c:1494:8-37: WARNING: Comparison to bool Reported-by: Hulk Robot Signed-off-by: Zheng Bin --- drivers/gpu/drm/radeon/r600.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index d9a33ca768f3..a37f50907107 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -1491,7 +1491,8 @@ static int r600_mc_init(struct radeon_device *rdev) /* FastFB shall be used with UMA memory. Here it is simply disabled when sideport * memory is present. */ - if (rdev->mc.igp_sideport_enabled == false && radeon_fastfb == 1) { + if (!rdev->mc.igp_sideport_enabled && + radeon_fastfb == 1) { DRM_INFO("Direct mapping: aper base at 0x%llx, replaced by direct mapping base 0x%llx.\n", (unsigned long long)rdev->mc.aper_base, k8_addr); rdev->mc.aper_base = (resource_size_t)k8_addr; -- 2.26.0.106.g9fadedd ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: force fbdev into vram
We set the fb smem pointer to the offset into the BAR, so keep the fbdev bo in vram. Bug: https://bugzilla.kernel.org/show_bug.cgi?id=207581 Fixes: 6c8d74caa2fa33 ("drm/amdgpu: Enable scatter gather display support") Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 9ae7b61f696a..25ddb482466a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -133,8 +133,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev, u32 cpp; u64 flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | - AMDGPU_GEM_CREATE_VRAM_CLEARED| - AMDGPU_GEM_CREATE_CPU_GTT_USWC; + AMDGPU_GEM_CREATE_VRAM_CLEARED; info = drm_get_format_info(adev->ddev, mode_cmd); cpp = info->cpp[0]; -- 2.25.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
On Wed, May 06, 2020 at 10:10:56AM +, Pan, Xinhui wrote: > [AMD Official Use Only - Internal Distribution Only] > > no. below function checks if block is valid or not. > I think you need check your code_checker. or you were checking on a very old > codebase? > > /* check if ras is supported on block, say, sdma, gfx */ > static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, > unsigned int block) Ah! That's right. Thanks. What happens here is that Smatch thinks amdgpu_ras_is_supported() always returns false because there is a bug in how it tracks ras->supported. I will fix this. regards, dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code
[AMD Official Use Only - Internal Distribution Only] From: Zhang, Hawking Sent: Wednesday, May 6, 2020 5:26 PM To: Wang, Kevin(Yang) ; amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Liu, Monk ; Feng, Kenneth Subject: RE: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code [AMD Official Use Only - Internal Distribution Only] Hi Kelvin, Thanks for the series that remove the duplicated one_vf mode check in all the amdgpu_dpm functions. Can we split the patch into two? One for amdgpu device sysfs attr code refine, with the new dev_attr structures, the other for retiring all the unnecessary one_vf mode support. thanks your comment. [kevin]: Q1, agree, i will split it into two patch. +enum amdgpu_device_attr_states { + ATTR_STATE_UNSUPPORT = 0, + ATTR_STATE_SUPPORT, + ATTR_STATE_DEAD, + ATTR_STATE_ALIVE, +}; + The attr_states seems unnecessary to me. You need a flag to mark whether a particular attribute is supported by specific ASIC or not, right? Then just a bool variable should be good enough for this purpose, Like attr->supported. I' d like to understand the use case for DEAD and ALIVE. Accordingly, you can simplify the logic that only remove the supported ones. [kevin]: Q2, the origin idea, it is used to store sysfs file state, but for this case, we can try to drop DEAD & ALIVE state, because the origin code logic will exit directly when create file fail. If we have to introduce more complicated flags to indicate different status, I'd prefer to go directly to initialize one_vf mode attr sets and bare-metal attr sets directly. [kevin]: Q3, i'd like to keep this patch code, in fact, not all sysfs devices need to be created on bare-metal mode. the driver must check it at runtime. eg: is_sw_smu_support(), if (asic_chip == XXX), etc.. In addition, the function naming like default_attr_perform also confusing me. Would it be the function that used to update attr status? +static int default_attr_perform(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, + uint32_t mask) [kevin]: Q4, yes, the function is used to update attr status according to asic information at runtime. maybe rename to "attr_update" is better. Best Regards, Kevin Regards, Hawking -Original Message- From: Wang, Kevin(Yang) Sent: Wednesday, May 6, 2020 14:23 To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Liu, Monk ; Feng, Kenneth ; Wang, Kevin(Yang) Subject: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code unified amdgpu device attribute node functions: 1. add some helper functions to create amdgpu device attribute node. 2. create device node according to device attr flags on different VF mode. 3. rename some functions name to adapt a new interface. 4. remove unneccessary virt mode check in inernal functions (xxx_show, xxx_store). Signed-off-by: Kevin Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 577 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 48 ++ 2 files changed, 271 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index c762deb5abc7..367ac79418b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -154,18 +154,15 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso * */ -static ssize_t amdgpu_get_dpm_state(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_state(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; int ret; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return 0; - ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -189,19 +186,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); } -static ssize_t amdgpu_set_dpm_state(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) +static ssize_t amdgpu_set_power_dpm_state(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; en
Re: [PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset
Yes, exactly that one. Regards, Christian. Am 06.05.20 um 12:35 schrieb Zhou, Tiecheng: [AMD Official Use Only - Internal Distribution Only] Thanks, Christian, Is this the fix that you are mentioning: commit 1675c3a24d075d484377003789245f48c2114a0b Author: Christian König Date: Fri Feb 21 15:10:31 2020 +0100 drm/amdgpu: stop disable the scheduler during HW fini When we stop the HW for example for GPU reset we should not stop the front-end scheduler. Otherwise we run into intermediate failures during command submission. The scheduler should only be stopped in very few cases: 1. We can't get the hardware working in ring or IB test after a GPU reset. 2. The KIQ scheduler is not used in the front-end and should be disabled during GPU reset. 3. In amdgpu_ring_fini() when the driver unloads. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Nirmoy Das Test-by: Dennis Li Signed-off-by: Alex Deucher Thanks Tiecheng -Original Message- From: Christian König Sent: Wednesday, May 6, 2020 5:44 PM To: Zhou, Tiecheng ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset NAK, the fundamental problem was that we disabled the SDMA paging queue during reset: [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] [drm] schedpage1 is not ready, skipping This is fixed by now, so the problem should not happen any more. Regards, Christian. Am 06.05.20 um 11:36 schrieb Tiecheng Zhou: WHY: For V320 passthrough and "modprobe amdgpu lockup_timeout=500", there will be kernel NULL pointer when using quark ~ BACO reset, for instance: hang_vm_compute0_bad_cs_dispatch.lua hang_vm_dma0_corrupted_header.lua etc. - [ 884.792885] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring comp_1.0.0 timeout, signaled seq=3, emitted seq=4 [ 884.793772] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: process quark pid 16939 thread quark pid 16940 [ 884.859979] amdgpu: [powerplay] set virtualization GFX DPM policy success [ 884.861003] amdgpu: [powerplay] activate virtualization GFX DPM policy success [ 884.861065] amdgpu: [powerplay] set virtualization VCE DPM policy success [ 885.693554] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* Failed to initialize parser -125! [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] [drm] schedpage1 is not ready, skipping [ 885.694720] [drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-2) [ 885.695328] BUG: unable to handle kernel NULL pointer dereference at 0008 [ 885.695909] PGD 0 P4D 0 [ 885.696104] Oops: [#1] SMP PTI [ 885.696368] CPU: 2 PID: 16940 Comm: quark Tainted: G OE 4.19.52+ #6 [ 885.696945] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 885.697593] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... [ 885.705042] Call Trace: [ 885.705251] ? amdgpu_vm_bo_update_mapping+0xdf/0xf0 [amdgpu] [ 885.705696] ? amdgpu_vm_clear_freed+0xcc/0x1b0 [amdgpu] [ 885.706112] ? amdgpu_gem_va_ioctl+0x4a1/0x510 [amdgpu] [ 885.706493] ? __radix_tree_delete+0x7e/0xa0 [ 885.706822] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707220] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.707568] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707962] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.708294] ? drm_ioctl+0x3a7/0x3f0 [drm] [ 885.708632] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.709032] ? unmap_region+0xd9/0x120 [ 885.709328] ? amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [ 885.709684] ? do_vfs_ioctl+0xa1/0x620 [ 885.709971] ? do_munmap+0x32e/0x430 [ 885.710232] ? ksys_ioctl+0x66/0x70 [ 885.710513] ? __x64_sys_ioctl+0x16/0x20 [ 885.710806] ? do_syscall_64+0x55/0x100 [ 885.711092] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... [ 885.719408] ---[ end trace 7ee3180f42e9f572 ]--- [ 885.719766] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... - the NULL pointer (entity->rq == NULL in amdgpu_vm_sdma_commit()) as follows: 1. quark sends bad job that triggers job timeout; 2. guest KMD detects the job timeout and goes to gpu recovery, and it goes to ip_suspend for SDMA, and it sets sdma[].sched.ready to false; 3. quark sends UNMAP operation through amdgpu_gem_va_ioctl, and guest KMD goes through amdgpu_gem_va_update_vm and finally goes to amdgpu_vm_sdma_commit, it goes to amdgpu_job_submit to drm_sched_job_init 4. drm_sched_job_init fails at drm_sched_pick_best() since sdma[].sched.ready is set to false; in the meanwhile entity->rq becomes NULL; 5. quark sends other UNMAP operations through amdgpu_gem_va_ioctl, while this time there will be NULL pointer because entity->rq is NULL; the above sequence occurs only when "modprobe amdgpu lockup_timeout=500". it does not occur when lockup_time
RE: [PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset
[AMD Official Use Only - Internal Distribution Only] Thanks, Christian, Is this the fix that you are mentioning: commit 1675c3a24d075d484377003789245f48c2114a0b Author: Christian König Date: Fri Feb 21 15:10:31 2020 +0100 drm/amdgpu: stop disable the scheduler during HW fini When we stop the HW for example for GPU reset we should not stop the front-end scheduler. Otherwise we run into intermediate failures during command submission. The scheduler should only be stopped in very few cases: 1. We can't get the hardware working in ring or IB test after a GPU reset. 2. The KIQ scheduler is not used in the front-end and should be disabled during GPU reset. 3. In amdgpu_ring_fini() when the driver unloads. Signed-off-by: Christian König Reviewed-by: Alex Deucher Acked-by: Nirmoy Das Test-by: Dennis Li Signed-off-by: Alex Deucher Thanks Tiecheng -Original Message- From: Christian König Sent: Wednesday, May 6, 2020 5:44 PM To: Zhou, Tiecheng ; amd-gfx@lists.freedesktop.org Subject: Re: [PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset NAK, the fundamental problem was that we disabled the SDMA paging queue during reset: > [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] > [drm] schedpage1 is not ready, skipping This is fixed by now, so the problem should not happen any more. Regards, Christian. Am 06.05.20 um 11:36 schrieb Tiecheng Zhou: > WHY: > For V320 passthrough and "modprobe amdgpu lockup_timeout=500", there > will be kernel NULL pointer when using quark ~ BACO reset, for instance: >hang_vm_compute0_bad_cs_dispatch.lua >hang_vm_dma0_corrupted_header.lua >etc. > - > [ 884.792885] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring > comp_1.0.0 timeout, signaled seq=3, emitted seq=4 [ 884.793772] > [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: > process quark pid 16939 thread quark pid 16940 [ 884.859979] amdgpu: > [powerplay] set virtualization GFX DPM policy success [ 884.861003] > amdgpu: [powerplay] activate virtualization GFX DPM policy success [ > 884.861065] amdgpu: [powerplay] set virtualization VCE DPM policy success [ > 885.693554] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* Failed to initialize > parser -125! > [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] > [drm] schedpage1 is not ready, skipping [ 885.694720] > [drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-2) > [ 885.695328] BUG: unable to handle kernel NULL pointer dereference > at 0008 [ 885.695909] PGD 0 P4D 0 [ 885.696104] Oops: > [#1] SMP PTI > [ 885.696368] CPU: 2 PID: 16940 Comm: quark Tainted: G OE > 4.19.52+ #6 > [ 885.696945] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), > BIOS 1.10.2-1 04/01/2014 [ 885.697593] RIP: > 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... > [ 885.705042] Call Trace: > [ 885.705251] ? amdgpu_vm_bo_update_mapping+0xdf/0xf0 [amdgpu] [ > 885.705696] ? amdgpu_vm_clear_freed+0xcc/0x1b0 [amdgpu] [ > 885.706112] ? amdgpu_gem_va_ioctl+0x4a1/0x510 [amdgpu] [ 885.706493] > ? __radix_tree_delete+0x7e/0xa0 [ 885.706822] ? > amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707220] ? > drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.707568] ? > amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707962] ? > drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.708294] ? > drm_ioctl+0x3a7/0x3f0 [drm] [ 885.708632] ? > amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.709032] ? > unmap_region+0xd9/0x120 [ 885.709328] ? amdgpu_drm_ioctl+0x49/0x80 > [amdgpu] [ 885.709684] ? do_vfs_ioctl+0xa1/0x620 [ 885.709971] ? > do_munmap+0x32e/0x430 [ 885.710232] ? ksys_ioctl+0x66/0x70 [ > 885.710513] ? __x64_sys_ioctl+0x16/0x20 [ 885.710806] ? > do_syscall_64+0x55/0x100 [ 885.711092] ? > entry_SYSCALL_64_after_hwframe+0x44/0xa9 > ... > [ 885.719408] ---[ end trace 7ee3180f42e9f572 ]--- [ 885.719766] > RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... > - > > the NULL pointer (entity->rq == NULL in amdgpu_vm_sdma_commit()) as follows: > 1. quark sends bad job that triggers job timeout; 2. guest KMD detects > the job timeout and goes to gpu recovery, and it goes to > ip_suspend for SDMA, and it sets sdma[].sched.ready to false; 3. > quark sends UNMAP operation through amdgpu_gem_va_ioctl, and guest KMD goes > through amdgpu_gem_va_update_vm and finally goes to amdgpu_vm_sdma_commit, > it goes to amdgpu_job_submit to drm_sched_job_init 4. > drm_sched_job_init fails at drm_sched_pick_best() since > sdma[].sched.ready is set to false; in the meanwhile entity->rq > becomes NULL; 5. quark sends other UNMAP operations through > amdgpu_gem_va_ioctl, while this time > there will be NULL pointer because entity->rq is NULL; > > the above sequence occurs only when "modprobe amdgpu loc
Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
[AMD Official Use Only - Internal Distribution Only] no. below function checks if block is valid or not. I think you need check your code_checker. or you were checking on a very old codebase? /* check if ras is supported on block, say, sdma, gfx */ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block) From: Dan Carpenter Sent: Wednesday, May 6, 2020 5:17:34 PM To: Zhou1, Tao Cc: Pan, Xinhui ; amd-gfx@lists.freedesktop.org Subject: Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2) On Wed, May 06, 2020 at 07:26:16AM +, Zhou1, Tao wrote: > [AMD Public Use] > > Hi Dan: > > Please check the following piece of code in > amdgpu_ras_debugfs_ctrl_parse_data: > >if (op != -1) { >if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) >return -EINVAL; > >data->head.block = block_id; > > amdgpu_ras_find_block_id_by_name will return error directly if someone try to > provide an invalid block_name intentionally via debugfs. > No. It's the line after that which are the problem. drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 147 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, 148 const char __user *buf, size_t size, 149 loff_t *pos, struct ras_debug_if *data) 150 { 151 ssize_t s = min_t(u64, 64, size); 152 char str[65]; 153 char block_name[33]; 154 char err[9] = "ue"; 155 int op = -1; 156 int block_id; 157 uint32_t sub_block; 158 u64 address, value; 159 160 if (*pos) 161 return -EINVAL; 162 *pos = size; 163 164 memset(str, 0, sizeof(str)); 165 memset(data, 0, sizeof(*data)); 166 167 if (copy_from_user(str, buf, s)) 168 return -EINVAL; 169 170 if (sscanf(str, "disable %32s", block_name) == 1) 171 op = 0; 172 else if (sscanf(str, "enable %32s %8s", block_name, err) == 2) 173 op = 1; 174 else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) 175 op = 2; 176 else if (str[0] && str[1] && str[2] && str[3]) 177 /* ascii string, but commands are not matched. */ Say we don't write an ascii string. 178 return -EINVAL; 179 180 if (op != -1) { 181 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) 182 return -EINVAL; 183 184 data->head.block = block_id; 185 /* only ue and ce errors are supported */ 186 if (!memcmp("ue", err, 2)) 187 data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 188 else if (!memcmp("ce", err, 2)) 189 data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; 190 else 191 return -EINVAL; 192 193 data->op = op; 194 195 if (op == 2) { 196 if (sscanf(str, "%*s %*s %*s %u %llu %llu", 197 &sub_block, &address, &value) != 3) 198 if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", 199 &sub_block, &address, &value) != 3) 200 return -EINVAL; 201 data->head.sub_block_index = sub_block; 202 data->inject.address = address; 203 data->inject.value = value; 204 } 205 } else { 206 if (size < sizeof(*data)) 207 return -EINVAL; 208 209 if (copy_from_user(data, buf, sizeof(*data))) ^^^ This lets us set the data->head.block to whatever we want. Premusably there is a trusted app which knows how to write the correct values. But if it has a bug that will cause a crash and we'll have to find a way to disable it in the kernel for kernel lock down mode etc so either way we'll need to do a bit of work. 210 return -EINVAL; 211 } 212 213 return 0; 214 } regards, dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset
NAK, the fundamental problem was that we disabled the SDMA paging queue during reset: [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] [drm] schedpage1 is not ready, skipping This is fixed by now, so the problem should not happen any more. Regards, Christian. Am 06.05.20 um 11:36 schrieb Tiecheng Zhou: WHY: For V320 passthrough and "modprobe amdgpu lockup_timeout=500", there will be kernel NULL pointer when using quark ~ BACO reset, for instance: hang_vm_compute0_bad_cs_dispatch.lua hang_vm_dma0_corrupted_header.lua etc. - [ 884.792885] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring comp_1.0.0 timeout, signaled seq=3, emitted seq=4 [ 884.793772] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: process quark pid 16939 thread quark pid 16940 [ 884.859979] amdgpu: [powerplay] set virtualization GFX DPM policy success [ 884.861003] amdgpu: [powerplay] activate virtualization GFX DPM policy success [ 884.861065] amdgpu: [powerplay] set virtualization VCE DPM policy success [ 885.693554] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* Failed to initialize parser -125! [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] [drm] schedpage1 is not ready, skipping [ 885.694720] [drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-2) [ 885.695328] BUG: unable to handle kernel NULL pointer dereference at 0008 [ 885.695909] PGD 0 P4D 0 [ 885.696104] Oops: [#1] SMP PTI [ 885.696368] CPU: 2 PID: 16940 Comm: quark Tainted: G OE 4.19.52+ #6 [ 885.696945] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 885.697593] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... [ 885.705042] Call Trace: [ 885.705251] ? amdgpu_vm_bo_update_mapping+0xdf/0xf0 [amdgpu] [ 885.705696] ? amdgpu_vm_clear_freed+0xcc/0x1b0 [amdgpu] [ 885.706112] ? amdgpu_gem_va_ioctl+0x4a1/0x510 [amdgpu] [ 885.706493] ? __radix_tree_delete+0x7e/0xa0 [ 885.706822] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707220] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.707568] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707962] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.708294] ? drm_ioctl+0x3a7/0x3f0 [drm] [ 885.708632] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.709032] ? unmap_region+0xd9/0x120 [ 885.709328] ? amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [ 885.709684] ? do_vfs_ioctl+0xa1/0x620 [ 885.709971] ? do_munmap+0x32e/0x430 [ 885.710232] ? ksys_ioctl+0x66/0x70 [ 885.710513] ? __x64_sys_ioctl+0x16/0x20 [ 885.710806] ? do_syscall_64+0x55/0x100 [ 885.711092] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... [ 885.719408] ---[ end trace 7ee3180f42e9f572 ]--- [ 885.719766] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... - the NULL pointer (entity->rq == NULL in amdgpu_vm_sdma_commit()) as follows: 1. quark sends bad job that triggers job timeout; 2. guest KMD detects the job timeout and goes to gpu recovery, and it goes to ip_suspend for SDMA, and it sets sdma[].sched.ready to false; 3. quark sends UNMAP operation through amdgpu_gem_va_ioctl, and guest KMD goes through amdgpu_gem_va_update_vm and finally goes to amdgpu_vm_sdma_commit, it goes to amdgpu_job_submit to drm_sched_job_init 4. drm_sched_job_init fails at drm_sched_pick_best() since sdma[].sched.ready is set to false; in the meanwhile entity->rq becomes NULL; 5. quark sends other UNMAP operations through amdgpu_gem_va_ioctl, while this time there will be NULL pointer because entity->rq is NULL; the above sequence occurs only when "modprobe amdgpu lockup_timeout=500". it does not occur when lockup_timeout=1 (default) because step 2. KMD detects job timeout will be sometime after quark sends UNMAP operations; i.e. quark UNMAP opeartions are finished before sdma ip suspend. HOW: here is to add mutex_lock to wait to avoid using sdma during gpu reset. Signed-off-by: Tiecheng Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e205ecc75a21..018b88f3b6da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2047,6 +2047,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence *f = NULL; int r; + mutex_lock(&adev->lock_reset); + while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); @@ -2062,6 +2064,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); + mutex_unlock(&adev->lock_reset); return r; }
[PATCH] drm/amdgpu: avoid clearing freed bo with sdma in gpu reset
WHY: For V320 passthrough and "modprobe amdgpu lockup_timeout=500", there will be kernel NULL pointer when using quark ~ BACO reset, for instance: hang_vm_compute0_bad_cs_dispatch.lua hang_vm_dma0_corrupted_header.lua etc. - [ 884.792885] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring comp_1.0.0 timeout, signaled seq=3, emitted seq=4 [ 884.793772] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* Process information: process quark pid 16939 thread quark pid 16940 [ 884.859979] amdgpu: [powerplay] set virtualization GFX DPM policy success [ 884.861003] amdgpu: [powerplay] activate virtualization GFX DPM policy success [ 884.861065] amdgpu: [powerplay] set virtualization VCE DPM policy success [ 885.693554] [drm:amdgpu_cs_ioctl [amdgpu]] *ERROR* Failed to initialize parser -125! [ 885.694682] [drm] schedpage0 is not ready, skipping [ 885.694682] [drm] schedpage1 is not ready, skipping [ 885.694720] [drm:amdgpu_gem_va_ioctl [amdgpu]] *ERROR* Couldn't update BO_VA (-2) [ 885.695328] BUG: unable to handle kernel NULL pointer dereference at 0008 [ 885.695909] PGD 0 P4D 0 [ 885.696104] Oops: [#1] SMP PTI [ 885.696368] CPU: 2 PID: 16940 Comm: quark Tainted: G OE 4.19.52+ #6 [ 885.696945] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1 04/01/2014 [ 885.697593] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... [ 885.705042] Call Trace: [ 885.705251] ? amdgpu_vm_bo_update_mapping+0xdf/0xf0 [amdgpu] [ 885.705696] ? amdgpu_vm_clear_freed+0xcc/0x1b0 [amdgpu] [ 885.706112] ? amdgpu_gem_va_ioctl+0x4a1/0x510 [amdgpu] [ 885.706493] ? __radix_tree_delete+0x7e/0xa0 [ 885.706822] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707220] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.707568] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.707962] ? drm_ioctl_kernel+0xaa/0xf0 [drm] [ 885.708294] ? drm_ioctl+0x3a7/0x3f0 [drm] [ 885.708632] ? amdgpu_gem_va_map_flags+0x70/0x70 [amdgpu] [ 885.709032] ? unmap_region+0xd9/0x120 [ 885.709328] ? amdgpu_drm_ioctl+0x49/0x80 [amdgpu] [ 885.709684] ? do_vfs_ioctl+0xa1/0x620 [ 885.709971] ? do_munmap+0x32e/0x430 [ 885.710232] ? ksys_ioctl+0x66/0x70 [ 885.710513] ? __x64_sys_ioctl+0x16/0x20 [ 885.710806] ? do_syscall_64+0x55/0x100 [ 885.711092] ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ... [ 885.719408] ---[ end trace 7ee3180f42e9f572 ]--- [ 885.719766] RIP: 0010:amdgpu_vm_sdma_commit+0x59/0x130 [amdgpu] ... - the NULL pointer (entity->rq == NULL in amdgpu_vm_sdma_commit()) as follows: 1. quark sends bad job that triggers job timeout; 2. guest KMD detects the job timeout and goes to gpu recovery, and it goes to ip_suspend for SDMA, and it sets sdma[].sched.ready to false; 3. quark sends UNMAP operation through amdgpu_gem_va_ioctl, and guest KMD goes through amdgpu_gem_va_update_vm and finally goes to amdgpu_vm_sdma_commit, it goes to amdgpu_job_submit to drm_sched_job_init 4. drm_sched_job_init fails at drm_sched_pick_best() since sdma[].sched.ready is set to false; in the meanwhile entity->rq becomes NULL; 5. quark sends other UNMAP operations through amdgpu_gem_va_ioctl, while this time there will be NULL pointer because entity->rq is NULL; the above sequence occurs only when "modprobe amdgpu lockup_timeout=500". it does not occur when lockup_timeout=1 (default) because step 2. KMD detects job timeout will be sometime after quark sends UNMAP operations; i.e. quark UNMAP opeartions are finished before sdma ip suspend. HOW: here is to add mutex_lock to wait to avoid using sdma during gpu reset. Signed-off-by: Tiecheng Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index e205ecc75a21..018b88f3b6da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2047,6 +2047,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence *f = NULL; int r; + mutex_lock(&adev->lock_reset); + while (!list_empty(&vm->freed)) { mapping = list_first_entry(&vm->freed, struct amdgpu_bo_va_mapping, list); @@ -2062,6 +2064,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, amdgpu_vm_free_mapping(adev, vm, mapping, f); if (r) { dma_fence_put(f); + mutex_unlock(&adev->lock_reset); return r; } } @@ -2073,6 +2076,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, dma_fence_put(f); } + mutex_unlock(&adev->lock_reset); return 0; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman
RE: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code
[AMD Official Use Only - Internal Distribution Only] Hi Kelvin, Thanks for the series that remove the duplicated one_vf mode check in all the amdgpu_dpm functions. Can we split the patch into two? One for amdgpu device sysfs attr code refine, with the new dev_attr structures, the other for retiring all the unnecessary one_vf mode support. +enum amdgpu_device_attr_states { + ATTR_STATE_UNSUPPORT = 0, + ATTR_STATE_SUPPORT, + ATTR_STATE_DEAD, + ATTR_STATE_ALIVE, +}; + The attr_states seems unnecessary to me. You need a flag to mark whether a particular attribute is supported by specific ASIC or not, right? Then just a bool variable should be good enough for this purpose, Like attr->supported. I' d like to understand the use case for DEAD and ALIVE. Accordingly, you can simplify the logic that only remove the supported ones. If we have to introduce more complicated flags to indicate different status, I'd prefer to go directly to initialize one_vf mode attr sets and bare-metal attr sets directly. In addition, the function naming like default_attr_perform also confusing me. Would it be the function that used to update attr status? +static int default_attr_perform(struct amdgpu_device *adev, struct amdgpu_device_attr *attr, + uint32_t mask) Regards, Hawking -Original Message- From: Wang, Kevin(Yang) Sent: Wednesday, May 6, 2020 14:23 To: amd-gfx@lists.freedesktop.org Cc: Zhang, Hawking ; Deucher, Alexander ; Liu, Monk ; Feng, Kenneth ; Wang, Kevin(Yang) Subject: [PATCH 2/3] drm/amdgpu: optimize amdgpu device attribute code unified amdgpu device attribute node functions: 1. add some helper functions to create amdgpu device attribute node. 2. create device node according to device attr flags on different VF mode. 3. rename some functions name to adapt a new interface. 4. remove unneccessary virt mode check in inernal functions (xxx_show, xxx_store). Signed-off-by: Kevin Wang --- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 577 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 48 ++ 2 files changed, 271 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index c762deb5abc7..367ac79418b9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -154,18 +154,15 @@ int amdgpu_dpm_read_sensor(struct amdgpu_device *adev, enum amd_pp_sensors senso * */ -static ssize_t amdgpu_get_dpm_state(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_state(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type pm; int ret; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return 0; - ret = pm_runtime_get_sync(ddev->dev); if (ret < 0) return ret; @@ -189,19 +186,16 @@ static ssize_t amdgpu_get_dpm_state(struct device *dev, (pm == POWER_STATE_TYPE_BALANCED) ? "balanced" : "performance"); } -static ssize_t amdgpu_set_dpm_state(struct device *dev, - struct device_attribute *attr, - const char *buf, - size_t count) +static ssize_t amdgpu_set_power_dpm_state(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = ddev->dev_private; enum amd_pm_state_type state; int ret; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) - return -EINVAL; - if (strncmp("battery", buf, strlen("battery")) == 0) state = POWER_STATE_TYPE_BATTERY; else if (strncmp("balanced", buf, strlen("balanced")) == 0) @@ -294,18 +288,15 @@ static ssize_t amdgpu_set_dpm_state(struct device *dev, * */ -static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev, - struct device_attribute *attr, - char *buf) +static ssize_t amdgpu_get_power_dpm_force_performance_level(struct device *dev, + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev
Re: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
On Wed, May 06, 2020 at 07:26:16AM +, Zhou1, Tao wrote: > [AMD Public Use] > > Hi Dan: > > Please check the following piece of code in > amdgpu_ras_debugfs_ctrl_parse_data: > > if (op != -1) { > if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) > return -EINVAL; > > data->head.block = block_id; > > amdgpu_ras_find_block_id_by_name will return error directly if someone try to > provide an invalid block_name intentionally via debugfs. > No. It's the line after that which are the problem. drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c 147 static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, 148 const char __user *buf, size_t size, 149 loff_t *pos, struct ras_debug_if *data) 150 { 151 ssize_t s = min_t(u64, 64, size); 152 char str[65]; 153 char block_name[33]; 154 char err[9] = "ue"; 155 int op = -1; 156 int block_id; 157 uint32_t sub_block; 158 u64 address, value; 159 160 if (*pos) 161 return -EINVAL; 162 *pos = size; 163 164 memset(str, 0, sizeof(str)); 165 memset(data, 0, sizeof(*data)); 166 167 if (copy_from_user(str, buf, s)) 168 return -EINVAL; 169 170 if (sscanf(str, "disable %32s", block_name) == 1) 171 op = 0; 172 else if (sscanf(str, "enable %32s %8s", block_name, err) == 2) 173 op = 1; 174 else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) 175 op = 2; 176 else if (str[0] && str[1] && str[2] && str[3]) 177 /* ascii string, but commands are not matched. */ Say we don't write an ascii string. 178 return -EINVAL; 179 180 if (op != -1) { 181 if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) 182 return -EINVAL; 183 184 data->head.block = block_id; 185 /* only ue and ce errors are supported */ 186 if (!memcmp("ue", err, 2)) 187 data->head.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; 188 else if (!memcmp("ce", err, 2)) 189 data->head.type = AMDGPU_RAS_ERROR__SINGLE_CORRECTABLE; 190 else 191 return -EINVAL; 192 193 data->op = op; 194 195 if (op == 2) { 196 if (sscanf(str, "%*s %*s %*s %u %llu %llu", 197 &sub_block, &address, &value) != 3) 198 if (sscanf(str, "%*s %*s %*s 0x%x 0x%llx 0x%llx", 199 &sub_block, &address, &value) != 3) 200 return -EINVAL; 201 data->head.sub_block_index = sub_block; 202 data->inject.address = address; 203 data->inject.value = value; 204 } 205 } else { 206 if (size < sizeof(*data)) 207 return -EINVAL; 208 209 if (copy_from_user(data, buf, sizeof(*data))) ^^^ This lets us set the data->head.block to whatever we want. Premusably there is a trusted app which knows how to write the correct values. But if it has a bug that will cause a crash and we'll have to find a way to disable it in the kernel for kernel lock down mode etc so either way we'll need to do a bit of work. 210 return -EINVAL; 211 } 212 213 return 0; 214 } regards, dan carpenter ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 1/4] drm/amdgpu: switch to common xgmi ta helpers
[AMD Public Use] Reviewed-by: John Clements -Original Message- From: Zhou1, Tao Sent: Wednesday, May 6, 2020 4:39 PM To: Zhang, Hawking ; amd-gfx@lists.freedesktop.org; Deucher, Alexander ; Clements, John ; Gao, Likun ; Chen, Guchun ; Li, Dennis Cc: Zhang, Hawking Subject: RE: [PATCH 1/4] drm/amdgpu: switch to common xgmi ta helpers [AMD Public Use] The series is: Reviewed-by: Tao Zhou > -Original Message- > From: Hawking Zhang > Sent: 2020年5月6日 14:39 > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander > ; Clements, John ; > Gao, Likun ; Chen, Guchun ; > Li, Dennis ; Zhou1, Tao > Cc: Zhang, Hawking > Subject: [PATCH 1/4] drm/amdgpu: switch to common xgmi ta helpers > > get_hive_id/get_node_id/get_topology_info/set_topology_info > are common xgmi command supported by TA for all the ASICs that support > xgmi link. They should be implemented as common helper functions to > avoid duplicated code per IP generation > > Signed-off-by: Hawking Zhang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 115 > ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 24 +++ > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 121 > > 3 files changed, 123 insertions(+), 137 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index f061ad6..bb5b510 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -664,6 +664,121 @@ int psp_xgmi_initialize(struct psp_context *psp) > return ret; > } > > +int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + int ret; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; > + > + /* Invoke xgmi ta to get hive id */ > + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); > + if (ret) > + return ret; > + > + *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; > + > + return 0; > +} > + > +int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + int ret; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; > + > + /* Invoke xgmi ta to get the node id */ > + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); > + if (ret) > + return ret; > + > + *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id; > + > + return 0; > +} > + > +int psp_xgmi_get_topology_info(struct psp_context *psp, > +int number_devices, > +struct psp_xgmi_topology_info *topology) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; > + struct ta_xgmi_cmd_get_topology_info_output > *topology_info_output; > + int i; > + int ret; > + > + if (!topology || topology->num_nodes > > TA_XGMI__MAX_CONNECTED_NODES) > + return -EINVAL; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + /* Fill in the shared memory with topology information as input */ > + topology_info_input = &xgmi_cmd- > >xgmi_in_message.get_topology_info; > + xgmi_cmd->cmd_id = > TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; > + topology_info_input->num_nodes = number_devices; > + > + for (i = 0; i < topology_info_input->num_nodes; i++) { > + topology_info_input->nodes[i].node_id = topology- > >nodes[i].node_id; > + topology_info_input->nodes[i].num_hops = topology- > >nodes[i].num_hops; > + topology_info_input->nodes[i].is_sharing_enabled = > topology->nodes[i].is_sharing_enabled; > + topology_info_input->nodes[i].sdma_engine = topology- > >nodes[i].sdma_engine; > + } > + > + /* Invoke xgmi ta to get the topology information */ > + ret = psp_xgmi_invoke(psp, > TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); > + if (ret) > + return ret; > + > + /* Read the output topology information from the shared memory */ > + topology_info_output = &xgmi_cmd- > >xgmi_out_message.get_topology_info; > + topology->num_nodes = xgmi_cmd- > >xgmi_out_message.get_topology_info.num_nodes; > + for (i = 0; i < topology->num_nodes; i++) { > + topology->nodes[i].node_id = topology_info_output- > >nodes[i].node_id; > + topology->nodes[i].num_hops = topology_info_output- > >nodes[i].num_hops; > + topology->nodes[i].is_sharing_enabled = > topology_
RE: [PATCH] drm/amdgpu: use node_id and node_size to calcualte dram_base_address
[AMD Official Use Only - Internal Distribution Only] Reviewed-by: John Clements -Original Message- From: Hawking Zhang Sent: Wednesday, May 6, 2020 2:42 PM To: amd-gfx@lists.freedesktop.org; Deucher, Alexander ; Clements, John Cc: Zhang, Hawking Subject: [PATCH] drm/amdgpu: use node_id and node_size to calcualte dram_base_address physical_node_id * node_segment_size should be the dram_base_address for current gpu node in xgmi config Signed-off-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_df.h | 3 -- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 27 ++-- drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 54 3 files changed, 2 insertions(+), 82 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h index 057f6ea..61a26c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h @@ -52,9 +52,6 @@ struct amdgpu_df_funcs { uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val); void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val, uint32_t ficadl_val, uint32_t ficadh_val); - uint64_t (*get_dram_base_addr)(struct amdgpu_device *adev, - uint32_t df_inst); - uint32_t (*get_df_inst_id)(struct amdgpu_device *adev); }; struct amdgpu_df { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 48c0ce1..90610b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -649,31 +649,8 @@ void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) { - uint32_t df_inst_id; - uint64_t dram_base_addr = 0; - const struct amdgpu_df_funcs *df_funcs = adev->df.funcs; - - if ((!df_funcs) || - (!df_funcs->get_df_inst_id) || - (!df_funcs->get_dram_base_addr)) { - dev_warn(adev->dev, -"XGMI: relative phy_addr algorithm is not supported\n"); - return addr; - } - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_DISALLOW)) { - dev_warn(adev->dev, -"failed to disable DF-Cstate, DF register may not be accessible\n"); - return addr; - } - - df_inst_id = df_funcs->get_df_inst_id(adev); - dram_base_addr = df_funcs->get_dram_base_addr(adev, df_inst_id); - - if (amdgpu_dpm_set_df_cstate(adev, DF_CSTATE_ALLOW)) - dev_warn(adev->dev, "failed to enable DF-Cstate\n"); - - return addr + dram_base_addr; + struct amdgpu_xgmi *xgmi = &adev->gmc.xgmi; + return (addr + xgmi->physical_node_id * xgmi->node_segment_size); } static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg) diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 5a1bd8e..a7b8292 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -686,58 +686,6 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev, } } -static uint64_t df_v3_6_get_dram_base_addr(struct amdgpu_device *adev, - uint32_t df_inst) -{ - uint32_t base_addr_reg_val = 0; - uint64_t base_addr = 0; - - base_addr_reg_val = RREG32_PCIE(smnDF_CS_UMC_AON0_DramBaseAddress0 + - df_inst * DF_3_6_SMN_REG_INST_DIST); - - if (REG_GET_FIELD(base_addr_reg_val, - DF_CS_UMC_AON0_DramBaseAddress0, - AddrRngVal) == 0) { - DRM_WARN("address range not valid"); - return 0; - } - - base_addr = REG_GET_FIELD(base_addr_reg_val, - DF_CS_UMC_AON0_DramBaseAddress0, - DramBaseAddr); - - return base_addr << 28; -} - -static uint32_t df_v3_6_get_df_inst_id(struct amdgpu_device *adev) -{ - uint32_t xgmi_node_id = 0; - uint32_t df_inst_id = 0; - - /* Walk through DF dst nodes to find current XGMI node */ - for (df_inst_id = 0; df_inst_id < DF_3_6_INST_CNT; df_inst_id++) { - - xgmi_node_id = RREG32_PCIE(smnDF_CS_UMC_AON0_DramLimitAddress0 + - df_inst_id * DF_3_6_SMN_REG_INST_DIST); - xgmi_node_id = REG_GET_FIELD(xgmi_node_id, -DF_CS_UMC_AON0_DramLimitAddress0, -DstFabricID); - - /* TODO: establish reason dest fabric id is offset by 7 */ - xgmi_node_id = xgmi_node_id >> 7; - - if (adev->gmc.xgmi.physical_
RE: [PATCH 1/4] drm/amdgpu: switch to common xgmi ta helpers
[AMD Public Use] The series is: Reviewed-by: Tao Zhou > -Original Message- > From: Hawking Zhang > Sent: 2020年5月6日 14:39 > To: amd-gfx@lists.freedesktop.org; Deucher, Alexander > ; Clements, John > ; Gao, Likun ; Chen, > Guchun ; Li, Dennis ; > Zhou1, Tao > Cc: Zhang, Hawking > Subject: [PATCH 1/4] drm/amdgpu: switch to common xgmi ta helpers > > get_hive_id/get_node_id/get_topology_info/set_topology_info > are common xgmi command supported by TA for all the ASICs that support > xgmi link. They should be implemented as common helper functions to avoid > duplicated code per IP generation > > Signed-off-by: Hawking Zhang > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 115 > ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 24 +++ > drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 121 > 3 files changed, 123 insertions(+), 137 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index f061ad6..bb5b510 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -664,6 +664,121 @@ int psp_xgmi_initialize(struct psp_context *psp) > return ret; > } > > +int psp_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + int ret; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; > + > + /* Invoke xgmi ta to get hive id */ > + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); > + if (ret) > + return ret; > + > + *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; > + > + return 0; > +} > + > +int psp_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + int ret; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; > + > + /* Invoke xgmi ta to get the node id */ > + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); > + if (ret) > + return ret; > + > + *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id; > + > + return 0; > +} > + > +int psp_xgmi_get_topology_info(struct psp_context *psp, > +int number_devices, > +struct psp_xgmi_topology_info *topology) { > + struct ta_xgmi_shared_memory *xgmi_cmd; > + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; > + struct ta_xgmi_cmd_get_topology_info_output > *topology_info_output; > + int i; > + int ret; > + > + if (!topology || topology->num_nodes > > TA_XGMI__MAX_CONNECTED_NODES) > + return -EINVAL; > + > + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp- > >xgmi_context.xgmi_shared_buf; > + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); > + > + /* Fill in the shared memory with topology information as input */ > + topology_info_input = &xgmi_cmd- > >xgmi_in_message.get_topology_info; > + xgmi_cmd->cmd_id = > TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; > + topology_info_input->num_nodes = number_devices; > + > + for (i = 0; i < topology_info_input->num_nodes; i++) { > + topology_info_input->nodes[i].node_id = topology- > >nodes[i].node_id; > + topology_info_input->nodes[i].num_hops = topology- > >nodes[i].num_hops; > + topology_info_input->nodes[i].is_sharing_enabled = > topology->nodes[i].is_sharing_enabled; > + topology_info_input->nodes[i].sdma_engine = topology- > >nodes[i].sdma_engine; > + } > + > + /* Invoke xgmi ta to get the topology information */ > + ret = psp_xgmi_invoke(psp, > TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); > + if (ret) > + return ret; > + > + /* Read the output topology information from the shared memory */ > + topology_info_output = &xgmi_cmd- > >xgmi_out_message.get_topology_info; > + topology->num_nodes = xgmi_cmd- > >xgmi_out_message.get_topology_info.num_nodes; > + for (i = 0; i < topology->num_nodes; i++) { > + topology->nodes[i].node_id = topology_info_output- > >nodes[i].node_id; > + topology->nodes[i].num_hops = topology_info_output- > >nodes[i].num_hops; > + topology->nodes[i].is_sharing_enabled = > topology_info_output->nodes[i].is_sharing_enabled; > + topology->nodes[i].sdma_engine = topology_info_output- > >nodes[i].sdma_engine; > + } > + > + return 0; > +} > + > +int psp_xgmi_set_topology_info(struct psp_context *psp, > +int number_devices, > +struct psp_xgmi_topology_info *topology)
Re: [PATCH 1/1] drm/amdgpu: Use GEM obj reference for KFD BOs
Am 06.05.20 um 02:59 schrieb Felix Kuehling: Releasing the AMDGPU BO ref directly leads to problems when BOs were exported as DMA bufs. Releasing the GEM reference makes sure that the AMDGPU/TTM BO is not freed too early. Also take a GEM reference when importing BOs from DMABufs to keep references to imported BOs balances properly. Signed-off-by: Felix Kuehling Tested-by: Alex Sierra Acked-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1247938b1ec1..da8b31a53291 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1354,7 +1354,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ - amdgpu_bo_unref(&mem->bo); + drm_gem_object_put_unlocked(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1699,7 +1699,8 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, | KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE | KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE; - (*mem)->bo = amdgpu_bo_ref(bo); + drm_gem_object_get(&bo->tbo.base); + (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset v4
Am 06.05.20 um 05:45 schrieb Zhao, Jiange: [AMD Official Use Only - Internal Distribution Only] Hi Christian, Hi Jiange, well that looks correct to me, but seems to be a bit to complicated. What exactly was wrong with version 3? (1) If you open amdgpu_autodump, use it and close it, then you can't open it again, because wait_for_completion_interruptible_timeout() would decrement amdgpu_autodump.dumping.done to 0, then .open() would always return failure except the first time. In this case we should probably just use complete_all() instead of just complete(). So that the struct complete stays in the completed state. (2) reset lock is not optimal in this case. Because usermode app would take any operation at any time and there are so many race conditions to solve. A dedicated lock would be simpler and clearer. I don't think that this will work. Using the reset lock is mandatory here or otherwise we always race between a new process opening the file and an ongoing GPU reset. Just imagine what happens when the process which waited for the GPU reset event doesn't do a dump, but just closes and immediately reopens the file while the last reset is still ongoing. What we could do here is using mutex_trylock() on the reset lock and return -EBUSY when a reset is ongoing. Or maybe better mutex_lock_interruptible(). Please completely drop this extra check. Waking up the queue and waiting for completion should always work when done right. This check is very necessary, because if there is no usermode app listening, the following wait_for_completion_interruptible_timeout() would wait until timeout anyway, which is 10 minutes for nothing. This is not what we wanted. See the wait_event_* documentation, exactly that's what you should never do. Instead just signal the struct completion with complete_all() directly after it is created. This way the wakeup is a no-op and waiting for the struct completion continues immediately. Regards, Christian. Jiange -Original Message- From: Koenig, Christian Sent: Wednesday, April 29, 2020 10:09 PM To: Pelloux-prayer, Pierre-eric ; Zhao, Jiange ; amd-gfx@lists.freedesktop.org Cc: Kuehling, Felix ; Deucher, Alexander ; Liu, Monk ; Zhang, Hawking Subject: Re: [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset v4 Am 29.04.20 um 16:04 schrieb Pierre-Eric Pelloux-Prayer: Hi Jiange, This version seems to work fine. Tested-by: Pierre-Eric Pelloux-Prayer On 29/04/2020 07:08, Zhao, Jiange wrote: [AMD Official Use Only - Internal Distribution Only] Hi all, I worked out the race condition and here is version 5. Please have a look. Jiange - - - - - - - - - - - - - - *From:* Zhao, Jiange *Sent:* Wednesday, April 29, 2020 1:06 PM *To:* amd-gfx@lists.freedesktop.org *Cc:* Koenig, Christian ; Kuehling, Felix ; Pelloux-prayer, Pierre-eric ; Deucher, Alexander ; Zhang, Hawking ; Liu, Monk ; Zhao, Jiange *Subject:* [PATCH] drm/amdgpu: Add autodump debugfs node for gpu reset v4 From: Jiange Zhao When GPU got timeout, it would notify an interested part of an opportunity to dump info before actual GPU reset. A usermode app would open 'autodump' node under debugfs system and poll() for readable/writable. When a GPU reset is due, amdgpu would notify usermode app through wait_queue_head and give it 10 minutes to dump info. After usermode app has done its work, this 'autodump' node is closed. On node closure, amdgpu gets to know the dump is done through the completion that is triggered in release(). There is no write or read callback because necessary info can be obtained through dmesg and umr. Messages back and forth between usermode app and amdgpu are unnecessary. v2: (1) changed 'registered' to 'app_listening' (2) add a mutex in open() to prevent race condition v3 (chk): grab the reset lock to avoid race in autodump_open, rename debugfs file to amdgpu_autodump, provide autodump_read as well, style and code cleanups v4
RE: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2)
[AMD Public Use] Hi Dan: Please check the following piece of code in amdgpu_ras_debugfs_ctrl_parse_data: if (op != -1) { if (amdgpu_ras_find_block_id_by_name(block_name, &block_id)) return -EINVAL; data->head.block = block_id; amdgpu_ras_find_block_id_by_name will return error directly if someone try to provide an invalid block_name intentionally via debugfs. Regards, Tao > -Original Message- > From: amd-gfx On Behalf Of Dan > Carpenter > Sent: 2020年5月5日 17:13 > To: Pan, Xinhui > Cc: amd-gfx@lists.freedesktop.org > Subject: [bug report] drm/amdgpu: add amdgpu_ras.c to support ras (v2) > > Hello xinhui pan, > > The patch c030f2e4166c: "drm/amdgpu: add amdgpu_ras.c to support ras > (v2)" from Oct 31, 2018, leads to the following static checker > warning: > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c:620 > amdgpu_ras_feature_enable() > warn: uncapped user index 'ras_block_string[head->block]' > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c >587 int amdgpu_ras_feature_enable(struct amdgpu_device *adev, >588 struct ras_common_if *head, bool enable) >589 { >590 struct amdgpu_ras *con = amdgpu_ras_get_context(adev); >591 union ta_ras_cmd_input info; >592 int ret; >593 >594 if (!con) >595 return -EINVAL; >596 >597 if (!enable) { >598 info.disable_features = (struct > ta_ras_disable_features_input) > { >599 .block_id = > amdgpu_ras_block_to_ta(head->block), >600 .error_type = > amdgpu_ras_error_to_ta(head->type), >601 }; >602 } else { >603 info.enable_features = (struct > ta_ras_enable_features_input) > { >604 .block_id = > amdgpu_ras_block_to_ta(head->block), >605 .error_type = > amdgpu_ras_error_to_ta(head->type), >606 }; >607 } >608 >609 /* Do not enable if it is not allowed. */ >610 WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, > head)); >611 /* Are we alerady in that state we are going to set? */ >612 if (!(!!enable ^ !!amdgpu_ras_is_feature_enabled(adev, head))) >613 return 0; >614 >615 if (!amdgpu_ras_intr_triggered()) { >616 ret = psp_ras_enable_features(&adev->psp, &info, > enable); >617 if (ret) { >618 amdgpu_ras_parse_status_code(adev, >619 enable ? > "enable":"disable", >620 > ras_block_str(head->block), > > ^^^ The head->block > value can be set to anything using debugfs. That's a problem because it > could easily lead to a kernel panic (which is > annoying) and also because these days we try to restrict what root can do. > >621 (enum > ta_ras_status)ret); >622 if (ret == TA_RAS_STATUS__RESET_NEEDED) >623 return -EAGAIN; >624 return -EINVAL; >625 } >626 } >627 >628 /* setup the obj */ >629 __amdgpu_ras_feature_enable(adev, head, enable); >630 >631 return 0; >632 } > > regards, > dan carpenter > ___ > amd-gfx mailing list > amd-gfx@lists.freedesktop.org > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.f > reedesktop.org%2Fmailman%2Flistinfo%2Famd- > gfx&data=02%7C01%7Ctao.zhou1%40amd.com%7Cd9925eca763149180 > ea508d7f0d47cfe%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C6 > 37242667754892396&sdata=k%2FMTgz9D1jIJGRFBu2Yuu6wuHP%2BPbR > vEcNJp87slIJE%3D&reserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/1] drm/amdgpu: Take a reference to an exported BO
Am 05.05.20 um 19:29 schrieb Felix Kuehling: What we could probably do to detect this is adding a BUG_ON() in TTMs release function and checking if the GEM reference count is really dead. The problem is, that we have to guess whether there are still any dmabuf references to the GEM BO. There is no way amdgpu can know that. You can't make amdgpu responsible for keeping a reference to the TTM BO while the GEM BO is still referenced by entities completely out of the control of amdgpu. That sounds like you don't understand how this works on the graphics side, so here is a brief overview once more. The last object still around is always the TTM BO, which can even be resurrected from the dead (kref_init() called again) if necessary for delayed delete. Then we have the GEM object which holds a reference to the TTM BO. This reference gets dropped when the GEM object gets destroyed. Then we optionally have the DMA-buf object for exported BOs which holds a reference to the GEM object and the driver. This construct guarantees that the GEM object is never destroyed nor the driver unloaded before the DMA-buf object referencing it is destroyed. Additional to that the reference from the GEM object to the TTM BO guarantees that the TTM BO is never destroyed before the GEM object is destroyed. Another weird thing I see is that amdgpu_gem_free_object calls amdgpu_bo_unref. That implies that the GEM object conceptually holds a reference to the amdpu/TTM BO. But that is not really the case. Amdgpu never takes that reference that GEM is supposed to own. If it did, we would leak all our memory because nobody would ever drop that reference. See amdgpu_bo_do_create(), the GEM object is initialized with drm_gem_private_object_init() with a reference count of 1. Then the TTM BO is initialized with ttm_bo_init_reserved() with a reference count of 1. In general there are two use cases here, the first one is userspace allocations with a GEM object handle. In this case the initial reference is owned by the GEM object and dropped in amdgpu_gem_free_object(). The other use case are kernel internal allocations like page tables and other general buffers. In this case the GEM object is ignored and the last TTM BO reference is dropped by calling amdgpu_bo_unref(). I think the correct solution is for amdgpu_bo_ref/unref to delegate its reference counting to drm_gem_object_get/put instead of ttm_bo_get/put. The amdgpu BO would hold one token reference to the TTM BO, which it can drop when the GEM BO refcount drops to 0. Finally, the amdgpu BO should only be freed once the TTM BO refcount also becomes 0. Just the other way around, but yes the long term plan should probably be to merge the two. I need a short term solution. Because I have a bug that causes a kernel oops with applications that are valid and correct, as far as I can tell. I'm thinking a solution that doesn't require major changes to the way TTM and GEM interact would put amdgpu in charge of coordinating the two. Unfortunately that would mean adding a third reference count in amdgpu_bo, in addition to the ones in TTM and GEM. The amdgpu BO would hold one token reference to each of the GEM and TTM BO. When amdgpu refcount goes to 0 it releases that GEM BO token reference. When the GEM BO refcount goes to 0, we get a callback amdgpu_gem_object_free. There we can drop the token reference to the TTM BO. Once the TTM BO reference goes to 0 we free the memory. Does this sound feasible? Well of hand it sounds like it makes the whole thing much more complicated without any gain. I probably still haven't understood what the core problem here is. Regards, Christian. Regards, Felix The difficult is currently we have a mismatch what locks could be taken when we drop the references. Regards, Christian. Regards, Felix Regards, Christian. Am 01.05.20 um 16:44 schrieb Felix Kuehling: [dropping my gmail address] We saw this backtrace showing the call chain while investigating a kernel oops caused by this issue on the DKMS branch with the KFD IPC API. It happens after a dma-buf file is released with fput: [ 1255.049330] BUG: kernel NULL pointer dereference, address: 051e [ 1255.049727] #PF: supervisor read access in kernel mode [ 1255.050092] #PF: error_code(0x) - not-present page [ 1255.050416] PGD 0 P4D 0 [ 1255.050736] Oops: [#1] SMP PTI [ 1255.051060] CPU: 27 PID: 2292 Comm: kworker/27:2 Tainted: G OE 5.3.0-46-generic #38~18.04.1-Ubuntu [ 1255.051400] Hardware name: Supermicro SYS-4029GP-TRT2/X11DPG-OT-CPU, BIOS 3.0a 02/26/2019 [ 1255.051752] Workqueue: events delayed_fput [ 1255.052111] RIP: 0010:drm_gem_object_put_unlocked+0x1c/0x70 [drm] [ 1255.052465] Code: 4d 80 c8 ee 0f 0b eb d8 66 0f 1f 44 00 00 0f 1f 44 00 00 48 85 ff 74 34 55 48 89 e5 41 54 53 48 89 fb 48 8b 7f 08 48 8b 47 20 <48> 83 b8 a0 00 00 00 00 74 1a 4c 8d 67 68 48 89 df 4c 89 e6
[PATCH -next] drm/amdgpu/navi10: fix unsigned comparison with 0
Fixes warning because size is uint32_t and can never be negtative drivers/gpu/drm/amd/amdgpu/../powerplay/navi10_ppt.c:1296:12: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits] if (size < 0) Reported-by: Hulk Robot Signed-off-by: ChenTao --- drivers/gpu/drm/amd/powerplay/navi10_ppt.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c index 2184d247a9f7..0c9be864d072 100644 --- a/drivers/gpu/drm/amd/powerplay/navi10_ppt.c +++ b/drivers/gpu/drm/amd/powerplay/navi10_ppt.c @@ -1293,8 +1293,6 @@ static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, u } if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) { - if (size < 0) - return -EINVAL; ret = smu_update_table(smu, SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT, -- 2.22.0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
RE: [PATCH 4/4] drm/amdgpu: switch to common rlc_autoload helper
[AMD Public Use] One spelling typo in the commit message, 'functon' should be 'function'. Apart from this, series is: Reviewed-by: Guchun Chen ' drop IP specific psp functon for rlc autoload since the autoload_supported ' Regards, Guchun -Original Message- From: Hawking Zhang Sent: Wednesday, May 6, 2020 2:39 PM To: amd-gfx@lists.freedesktop.org; Deucher, Alexander ; Clements, John ; Gao, Likun ; Chen, Guchun ; Li, Dennis ; Zhou1, Tao Cc: Zhang, Hawking Subject: [PATCH 4/4] drm/amdgpu: switch to common rlc_autoload helper drop IP specific psp functon for rlc autoload since the autoload_supported was introduced to mark ASICs that support rlc_autoload Signed-off-by: Hawking Zhang --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 3 --- drivers/gpu/drm/amd/amdgpu/psp_v11_0.c | 6 -- 3 files changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 19f09b4..5146542 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1646,7 +1646,7 @@ static int psp_np_fw_load(struct psp_context *psp) /* Start rlc autoload after psp recieved all the gfx firmware */ if (psp->autoload_supported && ucode->ucode_id == (amdgpu_sriov_vf(adev) ? AMDGPU_UCODE_ID_CP_MEC2 : AMDGPU_UCODE_ID_RLC_G)) { - ret = psp_rlc_autoload(psp); + ret = psp_rlc_autoload_start(psp); if (ret) { DRM_ERROR("Failed to start rlc autoload\n"); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 46bd85f..2a56ad9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -95,7 +95,6 @@ struct psp_funcs enum psp_ring_type ring_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); - int (*rlc_autoload_start)(struct psp_context *psp); int (*mem_training_init)(struct psp_context *psp); void (*mem_training_fini)(struct psp_context *psp); int (*mem_training)(struct psp_context *psp, uint32_t ops); @@ -307,8 +306,6 @@ struct amdgpu_psp_funcs { ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) #define psp_mode1_reset(psp) \ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) -#define psp_rlc_autoload(psp) \ - ((psp)->funcs->rlc_autoload_start ? (psp)->funcs->rlc_autoload_start((psp)) : 0) #define psp_mem_training_init(psp) \ ((psp)->funcs->mem_training_init ? (psp)->funcs->mem_training_init((psp)) : 0) #define psp_mem_training_fini(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index cfbf30a..1de89cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -524,11 +524,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp) return 0; } -static int psp_v11_0_rlc_autoload_start(struct psp_context *psp) -{ - return psp_rlc_autoload_start(psp); -} - static int psp_v11_0_memory_training_send_msg(struct psp_context *psp, int msg) { int ret; @@ -825,7 +820,6 @@ static const struct psp_funcs psp_v11_0_funcs = { .ring_stop = psp_v11_0_ring_stop, .ring_destroy = psp_v11_0_ring_destroy, .mode1_reset = psp_v11_0_mode1_reset, - .rlc_autoload_start = psp_v11_0_rlc_autoload_start, .mem_training_init = psp_v11_0_memory_training_init, .mem_training_fini = psp_v11_0_memory_training_fini, .mem_training = psp_v11_0_memory_training, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx