[PATCH] drm/amdgpu/display: Protect some functions with CONFIG_DRM_AMD_DC_DCN
Protect remove_hpo_dp_link_enc_from_ctx() and release_hpo_dp_link_enc() with CONFIG_DRM_AMD_DC_DCN as the functions are only called from code that is protected by CONFIG_DRM_AMD_DC_DCN. Fixes build fail with -Werror=unused-function. Fixes: 9b0e0d433f74 ("drm/amd/display: Add dependant changes for DCN32/321") Reported-by: Stephen Rothwell Signed-off-by: Alex Deucher Cc: Aurabindo Pillai --- drivers/gpu/drm/amd/display/dc/core/dc_resource.c | 4 1 file changed, 4 insertions(+) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c index b087452e4590..a098fd0cb240 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c @@ -1801,6 +1801,7 @@ static inline void retain_hpo_dp_link_enc( res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]++; } +#if defined(CONFIG_DRM_AMD_DC_DCN) static inline void release_hpo_dp_link_enc( struct resource_context *res_ctx, int enc_index) @@ -1808,6 +1809,7 @@ static inline void release_hpo_dp_link_enc( ASSERT(res_ctx->hpo_dp_link_enc_ref_cnts[enc_index] > 0); res_ctx->hpo_dp_link_enc_ref_cnts[enc_index]--; } +#endif static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx, const struct resource_pool *pool, @@ -1832,6 +1834,7 @@ static bool add_hpo_dp_link_enc_to_ctx(struct resource_context *res_ctx, return pipe_ctx->link_res.hpo_dp_link_enc != NULL; } +#if defined(CONFIG_DRM_AMD_DC_DCN) static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx, struct pipe_ctx *pipe_ctx, struct dc_stream_state *stream) @@ -1845,6 +1848,7 @@ static void remove_hpo_dp_link_enc_from_ctx(struct resource_context *res_ctx, pipe_ctx->link_res.hpo_dp_link_enc = NULL; } } +#endif /* TODO: release audio object */ void update_audio_usage( -- 2.35.3
Re: [PATCH 1/1] drm/amdgpu: Update PDEs flush TLB if PDEs is moved
Am 2022-06-01 um 19:12 schrieb Philip Yang: Update PDEs, PTEs don't need flush TLB after updating mapping, this will remove the unnecessary TLB flush to reduce map to GPUs time. This description is unclear. I think what this change does is, flush TLBs when existing PDEs are updated (because a PTB or PDB moved). But it avoids unnecessary TLB flushes when new PDBs or PTBs are added to the page table, which commonly happens when memory is mapped for the first time. Regards, Felix Suggested-by: Christian König Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9596c22fded6..8cdfd09fd70d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -755,6 +755,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, goto error; list_for_each_entry(entry, >relocated, vm_status) { + /* vm_flush_needed after updating moved PDEs */ + if (entry->moved) + atomic64_inc(>tlb_seq); + r = amdgpu_vm_pde_update(, entry); if (r) goto error; @@ -764,9 +768,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - /* vm_flush_needed after updating PDEs */ - atomic64_inc(>tlb_seq); - while (!list_empty(>relocated)) { entry = list_first_entry(>relocated, struct amdgpu_vm_bo_base,
[PATCH 1/1] drm/amdgpu: Update PDEs flush TLB if PDEs is moved
Update PDEs, PTEs don't need flush TLB after updating mapping, this will remove the unnecessary TLB flush to reduce map to GPUs time. Suggested-by: Christian König Signed-off-by: Philip Yang --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 9596c22fded6..8cdfd09fd70d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -755,6 +755,10 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, goto error; list_for_each_entry(entry, >relocated, vm_status) { + /* vm_flush_needed after updating moved PDEs */ + if (entry->moved) + atomic64_inc(>tlb_seq); + r = amdgpu_vm_pde_update(, entry); if (r) goto error; @@ -764,9 +768,6 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - /* vm_flush_needed after updating PDEs */ - atomic64_inc(>tlb_seq); - while (!list_empty(>relocated)) { entry = list_first_entry(>relocated, struct amdgpu_vm_bo_base, -- 2.35.1
[PATCH v2] drm/amdkfd: Add available memory ioctl
From: Daniel Phillips Add a new KFD ioctl to return the largest possible memory size that can be allocated as a buffer object using kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same accept/reject criteria as that function so that allocating a new buffer object of the size returned by this new ioctl is guaranteed to succeed, barring races with other allocating tasks. This IOCTL will be used by libhsakmt: https://www.mail-archive.com/amd-gfx@lists.freedesktop.org/msg75743.html Signed-off-by: Daniel Phillips Signed-off-by: David Yat Sin --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 37 +-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 + include/uapi/linux/kfd_ioctl.h| 14 ++- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..0b0ab1de76ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 34ba9e776521..105af82d41a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -38,6 +38,12 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +154,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* +* Conservatively round up the allocation requirement to 2 MB to avoid fragmentation +* caused by 4K allocations in the tail 2M BO chunk +*/ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +184,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > -adev->gmc.real_vram_size - reserved_for_pt)) { +adev->gmc.real_vram_size - +atomic64_read(>vram_pin_size) - +reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +218,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -1492,6 +1505,22 @@ int amdgpu_amdkfd_criu_resume(void *p) return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(>vram_pin_size) + - reserved_for_pt; +
Re: [PATCH] drm/amdkfd: Add available memory ioctl
On Wed, Jun 1, 2022 at 4:53 PM David Yat Sin wrote: > > From: Daniel Phillips > > Add a new KFD ioctl to return the largest possible memory size that > can be allocated as a buffer object using > kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same > accept/reject criteria as that function so that allocating a new > buffer object of the size returned by this new ioctl is guaranteed to > succeed, barring races with other allocating tasks. > > Signed-off-by: Daniel Phillips > Signed-off-by: David Yat Sin Got a link to the new UMD code which uses this? Please include that in the commit message. Thanks, Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 37 +-- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 + > include/uapi/linux/kfd_ioctl.h| 14 ++- > 4 files changed, 80 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index f8b9f27adcf5..0b0ab1de76ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct > amdgpu_device *adev, > void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, > void *drm_priv); > uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); > int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > struct amdgpu_device *adev, uint64_t va, uint64_t size, > void *drm_priv, struct kgd_mem **mem, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 34ba9e776521..105af82d41a4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -38,6 +38,12 @@ > */ > #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 > > +/* > + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K > allocations in the tail 2MB > + * BO chunk > + */ > +#define VRAM_ALLOCATION_ALIGN (1 << 21) > + > /* Impose limit on how much memory KFD can use */ > static struct { > uint64_t max_system_mem_limit; > @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) > * compromise that should work in most cases without reserving too > * much memory for page tables unnecessarily (factor 16K, >> 14). > */ > -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) > +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), > AMDGPU_VM_RESERVED_VRAM) > > static size_t amdgpu_amdkfd_acc_size(uint64_t size) > { > @@ -148,7 +154,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct > amdgpu_device *adev, > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { > system_mem_needed = acc_size; > ttm_mem_needed = acc_size; > - vram_needed = size; > + > + /* > +* Conservatively round up the allocation requirement to 2 MB > to avoid fragmentation > +* caused by 4K allocations in the tail 2M BO chunk > +*/ > + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { > system_mem_needed = acc_size + size; > ttm_mem_needed = acc_size; > @@ -173,7 +184,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct > amdgpu_device *adev, > (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > > kfd_mem_limit.max_ttm_mem_limit) || > (adev->kfd.vram_used + vram_needed > > -adev->gmc.real_vram_size - reserved_for_pt)) { > +adev->gmc.real_vram_size - > +atomic64_read(>vram_pin_size) - > +reserved_for_pt)) { > ret = -ENOMEM; > goto release; > } > @@ -205,7 +218,7 @@ static void unreserve_mem_limit(struct amdgpu_device > *adev, > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { > kfd_mem_limit.system_mem_used -= acc_size; > kfd_mem_limit.ttm_mem_used -= acc_size; > - adev->kfd.vram_used -= size; > + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { > kfd_mem_limit.system_mem_used -= (acc_size + size); > kfd_mem_limit.ttm_mem_used -= acc_size; > @@ -1492,6 +1505,22 @@ int amdgpu_amdkfd_criu_resume(void *p) > return ret; > } > > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) > +{ > + uint64_t reserved_for_pt = > + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); > + size_t
[PATCH] drm/amdkfd: Add available memory ioctl
From: Daniel Phillips Add a new KFD ioctl to return the largest possible memory size that can be allocated as a buffer object using kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same accept/reject criteria as that function so that allocating a new buffer object of the size returned by this new ioctl is guaranteed to succeed, barring races with other allocating tasks. Signed-off-by: Daniel Phillips Signed-off-by: David Yat Sin --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 37 +-- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 + include/uapi/linux/kfd_ioctl.h| 14 ++- 4 files changed, 80 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..0b0ab1de76ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct amdgpu_device *adev, uint64_t va, uint64_t size, void *drm_priv, struct kgd_mem **mem, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 34ba9e776521..105af82d41a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -38,6 +38,12 @@ */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +/* + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB + * BO chunk + */ +#define VRAM_ALLOCATION_ALIGN (1 << 21) + /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * compromise that should work in most cases without reserving too * much memory for page tables unnecessarily (factor 16K, >> 14). */ -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) static size_t amdgpu_amdkfd_acc_size(uint64_t size) { @@ -148,7 +154,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { system_mem_needed = acc_size; ttm_mem_needed = acc_size; - vram_needed = size; + + /* +* Conservatively round up the allocation requirement to 2 MB to avoid fragmentation +* caused by 4K allocations in the tail 2M BO chunk +*/ + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = acc_size + size; ttm_mem_needed = acc_size; @@ -173,7 +184,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || (adev->kfd.vram_used + vram_needed > -adev->gmc.real_vram_size - reserved_for_pt)) { +adev->gmc.real_vram_size - +atomic64_read(>vram_pin_size) - +reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -205,7 +218,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { kfd_mem_limit.system_mem_used -= acc_size; kfd_mem_limit.ttm_mem_used -= acc_size; - adev->kfd.vram_used -= size; + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= (acc_size + size); kfd_mem_limit.ttm_mem_used -= acc_size; @@ -1492,6 +1505,22 @@ int amdgpu_amdkfd_criu_resume(void *p) return ret; } +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +{ + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); + size_t available; + + spin_lock(_mem_limit.mem_limit_lock); + available = adev->gmc.real_vram_size + - adev->kfd.vram_used + - atomic64_read(>vram_pin_size) + - reserved_for_pt; + spin_unlock(_mem_limit.mem_limit_lock); + + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); +} + int
[PATCH v2] drm: Don't block HDR_OUTPUT_METADATA on unknown EOTF
The EDID of an HDR display defines EOTFs that are supported by the display and can be set in the HDR metadata infoframe. Userspace is expected to read the EDID and set an appropriate HDR_OUTPUT_METADATA. In drm_parse_hdr_metadata_block the kernel reads the supported EOTFs from the EDID and stores them in the drm_connector->hdr_sink_metadata. While doing so it also filters the EOTFs to the EOTFs the kernel knows about. When an HDR_OUTPUT_METADATA is set it then checks to make sure the EOTF is a supported EOTF. In cases where the kernel doesn't know about a new EOTF this check will fail, even if the EDID advertises support. Since it is expected that userspace reads the EDID to understand what the display supports it doesn't make sense for DRM to block an HDR_OUTPUT_METADATA if it contains an EOTF the kernel doesn't understand. This comes with the added benefit of future-proofing metadata support. If the spec defines a new EOTF there is no need to update DRM and an compositor can immediately make use of it. Fixes: https://gitlab.freedesktop.org/wayland/weston/-/issues/609 v2: Distinguish EOTFs defind in kernel and ones defined in EDID in the commit description (Pekka) Signed-off-by: Harry Wentland Cc: ppaala...@gmail.com Cc: sebastian.w...@redhat.com Cc: vpros...@amd.com Cc: Uma Shankar Cc: Ville Syrjälä Acked-by: Pekka Paalanen --- drivers/gpu/drm/drm_edid.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 12893e7be89b..223f96a72064 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -5691,10 +5691,8 @@ drm_hdmi_infoframe_set_hdr_metadata(struct hdmi_drm_infoframe *frame, /* Sink EOTF is Bit map while infoframe is absolute values */ if (!is_eotf_supported(hdr_metadata->hdmi_metadata_type1.eotf, - connector->hdr_sink_metadata.hdmi_type1.eotf)) { - DRM_DEBUG_KMS("EOTF Not Supported\n"); - return -EINVAL; - } + connector->hdr_sink_metadata.hdmi_type1.eotf)) + DRM_DEBUG_KMS("Unknown EOTF %d\n", hdr_metadata->hdmi_metadata_type1.eotf); err = hdmi_drm_infoframe_init(frame); if (err < 0) -- 2.36.1
Re: [PATCH] drm/amdgpu: delete duplicate condition in gfx_v11_0_soft_reset()
Applied. Thanks! Alex On Mon, May 30, 2022 at 7:42 AM Dan Carpenter wrote: > > We know that "grbm_soft_reset" is true because we're already inside an > if (grbm_soft_reset) condition. No need to test again. > > Signed-off-by: Dan Carpenter > --- > drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 24 +++- > 1 file changed, 11 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > index 8c0a3fc7aaa6..4bca63a346b4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c > @@ -4780,19 +4780,17 @@ static int gfx_v11_0_soft_reset(void *handle) > /* Disable MEC parsing/prefetching */ > gfx_v11_0_cp_compute_enable(adev, false); > > - if (grbm_soft_reset) { > - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > - tmp |= grbm_soft_reset; > - dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); > - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); > - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > - > - udelay(50); > - > - tmp &= ~grbm_soft_reset; > - WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); > - tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > - } > + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > + tmp |= grbm_soft_reset; > + dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp); > + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); > + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > + > + udelay(50); > + > + tmp &= ~grbm_soft_reset; > + WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp); > + tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET); > > /* Wait a little for things to settle down */ > udelay(50); > -- > 2.35.1 >
Re: [kbuild] drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c:1433 amdgpu_discovery_get_vcn_info() error: buffer overflow 'adev->vcn.vcn_codec_disable_mask' 2 <= 3
On Fri, May 27, 2022 at 3:46 AM Dan Carpenter wrote: > > [ kbuild bot sent this warning on May 4 but I never heard back and it's > May 27 now so sending a duplicate warning is probably for the best. -dan] > > tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git > master > head: 7e284070abe53d448517b80493863595af4ab5f0 > commit: 622469c87fc3e6c90a980be3e2287d82bd55c977 drm/amdgpu/discovery: add a > function to parse the vcn info table > config: arc-randconfig-m031-20220524 > (https://download.01.org/0day-ci/archive/20220527/202205271546.ov14n2r8-...@intel.com/config > ) > compiler: arceb-elf-gcc (GCC) 11.3.0 > > If you fix the issue, kindly add following tag where applicable > Reported-by: kernel test robot > Reported-by: Dan Carpenter > > smatch warnings: > drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c:1433 > amdgpu_discovery_get_vcn_info() error: buffer overflow > 'adev->vcn.vcn_codec_disable_mask' 2 <= 3 > > vim +1433 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c > > 622469c87fc3e6 Alex Deucher 2022-03-30 1403 int > amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) > 622469c87fc3e6 Alex Deucher 2022-03-30 1404 { > 622469c87fc3e6 Alex Deucher 2022-03-30 1405struct binary_header *bhdr; > 622469c87fc3e6 Alex Deucher 2022-03-30 1406union vcn_info *vcn_info; > 622469c87fc3e6 Alex Deucher 2022-03-30 1407u16 offset; > 622469c87fc3e6 Alex Deucher 2022-03-30 1408int v; > 622469c87fc3e6 Alex Deucher 2022-03-30 1409 > 622469c87fc3e6 Alex Deucher 2022-03-30 1410if > (!adev->mman.discovery_bin) { > 622469c87fc3e6 Alex Deucher 2022-03-30 1411DRM_ERROR("ip > discovery uninitialized\n"); > 622469c87fc3e6 Alex Deucher 2022-03-30 1412return -EINVAL; > 622469c87fc3e6 Alex Deucher 2022-03-30 1413} > 622469c87fc3e6 Alex Deucher 2022-03-30 1414 > 622469c87fc3e6 Alex Deucher 2022-03-30 1415if (adev->vcn.num_vcn_inst > > VCN_INFO_TABLE_MAX_NUM_INSTANCES) { > > Capped to 4 > > 622469c87fc3e6 Alex Deucher 2022-03-30 1416dev_err(adev->dev, > "invalid vcn instances\n"); > 622469c87fc3e6 Alex Deucher 2022-03-30 1417return -EINVAL; > 622469c87fc3e6 Alex Deucher 2022-03-30 1418} > 622469c87fc3e6 Alex Deucher 2022-03-30 1419 > 622469c87fc3e6 Alex Deucher 2022-03-30 1420bhdr = (struct binary_header > *)adev->mman.discovery_bin; > 622469c87fc3e6 Alex Deucher 2022-03-30 1421offset = > le16_to_cpu(bhdr->table_list[VCN_INFO].offset); > 622469c87fc3e6 Alex Deucher 2022-03-30 1422 > 622469c87fc3e6 Alex Deucher 2022-03-30 1423if (!offset) { > 622469c87fc3e6 Alex Deucher 2022-03-30 1424dev_err(adev->dev, > "invalid vcn table offset\n"); > 622469c87fc3e6 Alex Deucher 2022-03-30 1425return -EINVAL; > 622469c87fc3e6 Alex Deucher 2022-03-30 1426} > 622469c87fc3e6 Alex Deucher 2022-03-30 1427 > 622469c87fc3e6 Alex Deucher 2022-03-30 1428vcn_info = (union vcn_info > *)(adev->mman.discovery_bin + offset); > 622469c87fc3e6 Alex Deucher 2022-03-30 1429 > 622469c87fc3e6 Alex Deucher 2022-03-30 1430switch > (le16_to_cpu(vcn_info->v1.header.version_major)) { > 622469c87fc3e6 Alex Deucher 2022-03-30 1431case 1: > 622469c87fc3e6 Alex Deucher 2022-03-30 1432for (v = 0; v < > adev->vcn.num_vcn_inst; v++) { > 622469c87fc3e6 Alex Deucher 2022-03-30 @1433 > adev->vcn.vcn_codec_disable_mask[v] = > > But this array doesn't have 4 elements Correct, but num_vcn_inst can't be larger than AMDGPU_MAX_VCN_INSTANCES (2) at the moment thanks to: https://patchwork.freedesktop.org/patch/486289/ Alex > > 622469c87fc3e6 Alex Deucher 2022-03-30 1434 > le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); > 622469c87fc3e6 Alex Deucher 2022-03-30 1435} > 622469c87fc3e6 Alex Deucher 2022-03-30 1436break; > 622469c87fc3e6 Alex Deucher 2022-03-30 1437default: > 622469c87fc3e6 Alex Deucher 2022-03-30 1438dev_err(adev->dev, > 622469c87fc3e6 Alex Deucher 2022-03-30 1439"Unhandled > VCN info table %d.%d\n", > 622469c87fc3e6 Alex Deucher 2022-03-30 1440 > le16_to_cpu(vcn_info->v1.header.version_major), > 622469c87fc3e6 Alex Deucher 2022-03-30 1441 > le16_to_cpu(vcn_info->v1.header.version_minor)); > 622469c87fc3e6 Alex Deucher 2022-03-30 1442return -EINVAL; > 622469c87fc3e6 Alex Deucher 2022-03-30 1443} > 622469c87fc3e6 Alex Deucher 2022-03-30 1444return 0; > f39f5bb1c9d68d Xiaojie Yuan 2019-06-20 1445 } > > -- > 0-DAY CI Kernel Test Service > https://01.org/lkp > ___ > kbuild mailing list -- kbu...@lists.01.org > To unsubscribe send an email to kbuild-le...@lists.01.org >
RE: [PATCH v2] drm/amdgpu: Resolve RAS GFX error count issue v2
[AMD Official Use Only - General] Thanks, added the tag before pushing. Thanks, Candice -Original Message- From: Alex Deucher Sent: Thursday, June 2, 2022 2:02 AM To: Li, Candice Cc: amd-gfx list Subject: Re: [PATCH v2] drm/amdgpu: Resolve RAS GFX error count issue v2 On Wed, Jun 1, 2022 at 2:01 PM Alex Deucher wrote: > > On Wed, Jun 1, 2022 at 1:57 PM Candice Li wrote: > > > > Fix misleading indentation and add ras unsupported checking > > for gfx ras late init. > > > > Signed-off-by: Candice Li > > Reviewed-by: Alex Deucher Also, if this was a recent change, a Fixes: tag would be nice. Alex > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +--- > > 1 file changed, 5 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > index 99c1a2d3dae84d..16699158e00d8c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > @@ -599,13 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device > > *adev, struct ras_common_if *r > > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > > amdgpu_ras_reset_error_status(adev, > > AMDGPU_RAS_BLOCK__GFX); > > > > - r = amdgpu_ras_block_late_init(adev, ras_block); > > - if (r) > > - return r; > > + r = amdgpu_ras_block_late_init(adev, ras_block); > > + if (r) > > + return r; > > > > r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); > > if (r) > > goto late_fini; > > + } else { > > + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); > > } > > > > return 0; > > -- > > 2.17.1 > >
Re: [PATCH v2] drm/amdgpu: Resolve RAS GFX error count issue v2
On Wed, Jun 1, 2022 at 2:01 PM Alex Deucher wrote: > > On Wed, Jun 1, 2022 at 1:57 PM Candice Li wrote: > > > > Fix misleading indentation and add ras unsupported checking > > for gfx ras late init. > > > > Signed-off-by: Candice Li > > Reviewed-by: Alex Deucher Also, if this was a recent change, a Fixes: tag would be nice. Alex > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +--- > > 1 file changed, 5 insertions(+), 3 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > index 99c1a2d3dae84d..16699158e00d8c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > > @@ -599,13 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device > > *adev, struct ras_common_if *r > > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > > amdgpu_ras_reset_error_status(adev, > > AMDGPU_RAS_BLOCK__GFX); > > > > - r = amdgpu_ras_block_late_init(adev, ras_block); > > - if (r) > > - return r; > > + r = amdgpu_ras_block_late_init(adev, ras_block); > > + if (r) > > + return r; > > > > r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); > > if (r) > > goto late_fini; > > + } else { > > + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); > > } > > > > return 0; > > -- > > 2.17.1 > >
Re: [PATCH v2] drm/amdgpu: Resolve RAS GFX error count issue v2
On Wed, Jun 1, 2022 at 1:57 PM Candice Li wrote: > > Fix misleading indentation and add ras unsupported checking > for gfx ras late init. > > Signed-off-by: Candice Li Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +--- > 1 file changed, 5 insertions(+), 3 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index 99c1a2d3dae84d..16699158e00d8c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -599,13 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device > *adev, struct ras_common_if *r > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > amdgpu_ras_reset_error_status(adev, > AMDGPU_RAS_BLOCK__GFX); > > - r = amdgpu_ras_block_late_init(adev, ras_block); > - if (r) > - return r; > + r = amdgpu_ras_block_late_init(adev, ras_block); > + if (r) > + return r; > > r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); > if (r) > goto late_fini; > + } else { > + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); > } > > return 0; > -- > 2.17.1 >
[PATCH v2] drm/amdgpu: Resolve RAS GFX error count issue v2
Fix misleading indentation and add ras unsupported checking for gfx ras late init. Signed-off-by: Candice Li --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 99c1a2d3dae84d..16699158e00d8c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -599,13 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; + } else { + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); } return 0; -- 2.17.1
Re: Explicit VM updates
On Wed, Jun 1, 2022 at 2:40 PM Christian König wrote: > > Hey guys, > > so today Bas came up with a new requirement regarding the explicit > synchronization to VM updates and a bunch of prototype patches. > > I've been thinking about that stuff for quite some time before, but to > be honest it's one of the most trickiest parts of the driver. > > So my current thinking is that we could potentially handle those > requirements like this: > > 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This > way we either get the new behavior for the whole CS+VM or the old one, > but never both mixed. Any VM wide flag would be a concern because libdrm_amdgpu merges things into a single drm fd for potentially multiple driver (i.e. radv + amdvlk + radeonsi). I'm also not sure what you'd need a VM flag for? > > 2. When memory is unmapped we keep around the last unmap operation > inside the bo_va. > > 3. When memory is freed we add all the CS fences which could access that > memory + the last unmap operation as BOOKKEEP fences to the BO and as > mandatory sync fence to the VM. > > Memory freed either because of an eviction or because of userspace > closing the handle will be seen as a combination of unmap+free. > > > The result is the following semantic for userspace to avoid implicit > synchronization as much as possible: > > 1. When you allocate and map memory it is mandatory to either wait for > the mapping operation to complete or to add it as dependency for your CS. > If this isn't followed the application will run into CS faults > (that's what we pretty much already implemented). > > 2. When memory is freed you must unmap that memory first and then wait > for this unmap operation to complete before freeing the memory. > If this isn't followed the kernel will add a forcefully wait to the > next CS to block until the unmap is completed. > > 3. All VM operations requested by userspace will still be executed in > order, e.g. we can't run unmap + map in parallel or something like this. > > Is that something you guys can live with? As far as I can see it should > give you the maximum freedom possible, but is still doable. > > Regards, > Christian.
Re: Explicit VM updates
Am 2022-06-01 um 13:22 schrieb Christian König: Am 01.06.22 um 19:07 schrieb Felix Kuehling: Am 2022-06-01 um 12:29 schrieb Christian König: Am 01.06.22 um 17:05 schrieb Felix Kuehling: Am 2022-06-01 um 08:40 schrieb Christian König: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). This makes sense. 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. This would work for now, but it won't work for user mode submission in the future. I find it weird that user mode needs to wait for the unmap. For user mode, unmap and free should always be asynchronous. I can't think of any good reasons to make user mode wait for the driver to clean up its stuff. Could the waiting be done in kernel mode instead? TTM already does delayed freeing if there are fences outstanding on a BO being freed. This should make it easy to delay freeing until the unmap is done without blocking the user mode thread. This is not about blocking, but synchronization dependencies. Then I must have misunderstood this sentence: "When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory." If the pronoun "you" is the user mode driver, it means user mode must wait for kernel mode to finish unmapping memory before freeing it. Was that not what you meant? Ah, yes. The UMD must wait for the kernel to finish unmapping all the maps from the BO before it drops the handle of the BO and with that frees it. In other words the free is not waiting for the unmap to complete, but causes command submissions through the kernel to depend on the unmap. I guess I don't understand that dependency. The next command submission obviously cannot use the memory that was unmapped. But why does it need to synchronize with the unmap operation? Because of the necessary TLB flush, only after that one is executed we can be sure that nobody has access to the memory any more and actually free it. So freeing the memory has to wait for the TLB flush. Why does the next command submission need to wait? User mode submissions are completely unrelated to that. I mention user mode command submission because there is no way to enforce the synchronization you describe here on a user mode queue. So this approach is not very future proof. With user mode queues you need to wait for the work on the queue to finish anyway or otherwise you run into VM faults if you just unmap or free the memory. If the next command submission doesn't use the unmapped/freed memory, why does it need to wait for the TLB flush? If it is using the unmapped/freed memory, that's a user mode bug. But waiting for the TLB flush won't fix that. It will only turn a likely VM fault into a certain VM fault. The guarantee you need to give is, that the memory is not freed and reused by anyone else until the TLB flush is done. This dependency requires synchronization of the "free" operation with the TLB flush. It does not require synchronization with any future command submissions in the context that freed the memory. Regards, Felix The signal that TLB flush is completed comes from the MES in this case. Regards, Christian. Regards, Felix Regards, Christian. Regards, Felix 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the
RE: [PATCH] drm/amdgpu: Resolve RAS GFX error count issue v2
[Public] Thanks for the review. To fix the indentation will require the else case to let amdgpu_ras_block_late_init can go to ras unsupported code path. That's why I want to keep them in one patch. I will update the commit message and coding style as you suggested. Thanks, Candice -Original Message- From: Alex Deucher Sent: Thursday, June 2, 2022 1:19 AM To: Li, Candice Cc: amd-gfx list Subject: Re: [PATCH] drm/amdgpu: Resolve RAS GFX error count issue v2 On Wed, Jun 1, 2022 at 1:10 PM Candice Li wrote: > > Fix misleading indentation > Might want to split this into two patches, one to fix the indentation and one to fix the missing function call. Also you should mention the missing function call in the else case. > Signed-off-by: Candice Li > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 + > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index 99c1a2d3dae84d..424990e1bec10c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -599,14 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device > *adev, struct ras_common_if *r > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > amdgpu_ras_reset_error_status(adev, > AMDGPU_RAS_BLOCK__GFX); > > - r = amdgpu_ras_block_late_init(adev, ras_block); > - if (r) > - return r; > + r = amdgpu_ras_block_late_init(adev, ras_block); > + if (r) > + return r; > > r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); > if (r) > goto late_fini; > - } > + } else > + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); Coding style. The else case needs { } as well to match kernel coding style guidelines. Alex > > return 0; > late_fini: > -- > 2.17.1 >
Re: [PATCH] drm/amdgpu: enable tmz by default for GC 10.3.7
On Mon, May 30, 2022 at 10:27 PM Sunil Khatri wrote: > > Add IP GC 10.3.7 in the list of target to have > tmz enabled by default. > > Signed-off-by: Sunil Khatri Assuming this is validated and working, Reviewed-by: Alex Deucher > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > index 798c56214a23..aebc384531ac 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c > @@ -518,6 +518,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) > case IP_VERSION(9, 1, 0): > /* RENOIR looks like RAVEN */ > case IP_VERSION(9, 3, 0): > + /* GC 10.3.7 */ > + case IP_VERSION(10, 3, 7): > if (amdgpu_tmz == 0) { > adev->gmc.tmz_enabled = false; > dev_info(adev->dev, > @@ -540,8 +542,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) > case IP_VERSION(10, 3, 1): > /* YELLOW_CARP*/ > case IP_VERSION(10, 3, 3): > - /* GC 10.3.7 */ > - case IP_VERSION(10, 3, 7): > /* Don't enable it by default yet. > */ > if (amdgpu_tmz < 1) { > -- > 2.25.1 >
Re: Explicit VM updates
Am 01.06.22 um 19:07 schrieb Felix Kuehling: Am 2022-06-01 um 12:29 schrieb Christian König: Am 01.06.22 um 17:05 schrieb Felix Kuehling: Am 2022-06-01 um 08:40 schrieb Christian König: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). This makes sense. 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. This would work for now, but it won't work for user mode submission in the future. I find it weird that user mode needs to wait for the unmap. For user mode, unmap and free should always be asynchronous. I can't think of any good reasons to make user mode wait for the driver to clean up its stuff. Could the waiting be done in kernel mode instead? TTM already does delayed freeing if there are fences outstanding on a BO being freed. This should make it easy to delay freeing until the unmap is done without blocking the user mode thread. This is not about blocking, but synchronization dependencies. Then I must have misunderstood this sentence: "When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory." If the pronoun "you" is the user mode driver, it means user mode must wait for kernel mode to finish unmapping memory before freeing it. Was that not what you meant? Ah, yes. The UMD must wait for the kernel to finish unmapping all the maps from the BO before it drops the handle of the BO and with that frees it. In other words the free is not waiting for the unmap to complete, but causes command submissions through the kernel to depend on the unmap. I guess I don't understand that dependency. The next command submission obviously cannot use the memory that was unmapped. But why does it need to synchronize with the unmap operation? Because of the necessary TLB flush, only after that one is executed we can be sure that nobody has access to the memory any more and actually free it. User mode submissions are completely unrelated to that. I mention user mode command submission because there is no way to enforce the synchronization you describe here on a user mode queue. So this approach is not very future proof. With user mode queues you need to wait for the work on the queue to finish anyway or otherwise you run into VM faults if you just unmap or free the memory. The signal that TLB flush is completed comes from the MES in this case. Regards, Christian. Regards, Felix Regards, Christian. Regards, Felix 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
Re: [PATCH] drm/amdgpu: Resolve RAS GFX error count issue v2
On Wed, Jun 1, 2022 at 1:10 PM Candice Li wrote: > > Fix misleading indentation > Might want to split this into two patches, one to fix the indentation and one to fix the missing function call. Also you should mention the missing function call in the else case. > Signed-off-by: Candice Li > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 + > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index 99c1a2d3dae84d..424990e1bec10c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -599,14 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device > *adev, struct ras_common_if *r > if (!amdgpu_persistent_edc_harvesting_supported(adev)) > amdgpu_ras_reset_error_status(adev, > AMDGPU_RAS_BLOCK__GFX); > > - r = amdgpu_ras_block_late_init(adev, ras_block); > - if (r) > - return r; > + r = amdgpu_ras_block_late_init(adev, ras_block); > + if (r) > + return r; > > r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); > if (r) > goto late_fini; > - } > + } else > + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); Coding style. The else case needs { } as well to match kernel coding style guidelines. Alex > > return 0; > late_fini: > -- > 2.17.1 >
[PATCH] drm/amdgpu: Resolve RAS GFX error count issue v2
Fix misleading indentation Signed-off-by: Candice Li --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 99c1a2d3dae84d..424990e1bec10c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -599,14 +599,15 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; - } + } else + amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0); return 0; late_fini: -- 2.17.1
Re: Explicit VM updates
Am 2022-06-01 um 12:29 schrieb Christian König: Am 01.06.22 um 17:05 schrieb Felix Kuehling: Am 2022-06-01 um 08:40 schrieb Christian König: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). This makes sense. 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. This would work for now, but it won't work for user mode submission in the future. I find it weird that user mode needs to wait for the unmap. For user mode, unmap and free should always be asynchronous. I can't think of any good reasons to make user mode wait for the driver to clean up its stuff. Could the waiting be done in kernel mode instead? TTM already does delayed freeing if there are fences outstanding on a BO being freed. This should make it easy to delay freeing until the unmap is done without blocking the user mode thread. This is not about blocking, but synchronization dependencies. Then I must have misunderstood this sentence: "When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory." If the pronoun "you" is the user mode driver, it means user mode must wait for kernel mode to finish unmapping memory before freeing it. Was that not what you meant? In other words the free is not waiting for the unmap to complete, but causes command submissions through the kernel to depend on the unmap. I guess I don't understand that dependency. The next command submission obviously cannot use the memory that was unmapped. But why does it need to synchronize with the unmap operation? User mode submissions are completely unrelated to that. I mention user mode command submission because there is no way to enforce the synchronization you describe here on a user mode queue. So this approach is not very future proof. Regards, Felix Regards, Christian. Regards, Felix 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
RE: [PATCH] drm/amdgpu: Correct if identation that causes GCC warning
[AMD Official Use Only - General] Hi Rodrigo, Sorry, my bad! Thanks for the fix, but only adjust the indentation will miss another code path for amdgpu_ras_block_late_init. Let me submit a new one to fix it. Thanks, Candice -Original Message- From: Siqueira, Rodrigo Sent: Wednesday, June 1, 2022 10:14 PM To: amd-gfx@lists.freedesktop.org Cc: Deucher, Alexander ; Li, Candice ; Zhang, Hawking Subject: [PATCH] drm/amdgpu: Correct if identation that causes GCC warning GCC is complaining about misleading indentation: error: this ‘if’ clause does not guard... [-Werror=misleading-indentation] 603 | if (r) This commit adjusts the commit indentation. Cc: Candice Li Cc: Hawking Zhang Signed-off-by: Rodrigo Siqueira --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 99c1a2d3dae8..e1f4c5f30645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -599,9 +599,9 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) -- 2.25.1
Re: [PATCH 3/3] drm/amdkfd: Extend KFD device topology to surface peer-to-peer links
Am 2022-05-31 um 13:03 schrieb Ramesh Errabolu: Extend KFD device topology to surface peer-to-peer links among GPU devices connected over PCIe or xGMI. Enabling HSA_AMD_P2P is REQUIRED to surface peer-to-peer links. This patch needs more of an explanation. Old upstream KFD did not expose to user mode any P2P links or indirect links that go over two or more direct hops. Old versions of the Thunk used to make up their own P2P and indirect links without the information about peer-accessibility and chipset support available to the kernel mode driver. In this patch we expose P2P links in a new sysfs directory to provide more reliable P2P link information to user mode. Old versions of the Thunk will continue to work as before and ignore the new directory. This avoids conflicts between P2P links exposed by KFD and P2P links created by the Thunk itself. New versions of the Thunk will use only the P2P links provided in the new p2p_links directory, if it exists, or fall back to the old code path on older KFDs that don't expose p2p_links. See more comments inline. Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 332 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 3 + 2 files changed, 333 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8d50d207cf66..6bdc22d6f6ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -40,6 +40,7 @@ #include "kfd_svm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_ras.h" +#include "amdgpu.h" /* topology_device_list - Master list of all topology devices */ static struct list_head topology_device_list; @@ -148,6 +149,7 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) struct kfd_mem_properties *mem; struct kfd_cache_properties *cache; struct kfd_iolink_properties *iolink; + struct kfd_iolink_properties *p2plink; struct kfd_perf_properties *perf; list_del(>list); @@ -173,6 +175,13 @@ static void kfd_release_topology_device(struct kfd_topology_device *dev) kfree(iolink); } + while (dev->p2p_link_props.next != >p2p_link_props) { + p2plink = container_of(dev->p2p_link_props.next, + struct kfd_iolink_properties, list); + list_del(>list); + kfree(p2plink); + } + while (dev->perf_props.next != >perf_props) { perf = container_of(dev->perf_props.next, struct kfd_perf_properties, list); @@ -214,6 +223,7 @@ struct kfd_topology_device *kfd_create_topology_device( INIT_LIST_HEAD(>mem_props); INIT_LIST_HEAD(>cache_props); INIT_LIST_HEAD(>io_link_props); + INIT_LIST_HEAD(>p2p_link_props); INIT_LIST_HEAD(>perf_props); list_add_tail(>list, device_list); @@ -465,6 +475,8 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr, dev->node_props.caches_count); sysfs_show_32bit_prop(buffer, offs, "io_links_count", dev->node_props.io_links_count); + sysfs_show_32bit_prop(buffer, offs, "p2p_links_count", + dev->node_props.p2p_links_count); sysfs_show_32bit_prop(buffer, offs, "cpu_core_id_base", dev->node_props.cpu_core_id_base); sysfs_show_32bit_prop(buffer, offs, "simd_id_base", @@ -568,6 +580,7 @@ static void kfd_remove_sysfs_file(struct kobject *kobj, struct attribute *attr) static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) { + struct kfd_iolink_properties *p2plink; struct kfd_iolink_properties *iolink; struct kfd_cache_properties *cache; struct kfd_mem_properties *mem; @@ -585,6 +598,18 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) dev->kobj_iolink = NULL; } + if (dev->kobj_p2plink) { + list_for_each_entry(p2plink, >p2p_link_props, list) + if (p2plink->kobj) { + kfd_remove_sysfs_file(p2plink->kobj, + >attr); + p2plink->kobj = NULL; + } + kobject_del(dev->kobj_p2plink); + kobject_put(dev->kobj_p2plink); + dev->kobj_p2plink = NULL; + } + if (dev->kobj_cache) { list_for_each_entry(cache, >cache_props, list) if (cache->kobj) { @@ -631,6 +656,7 @@ static void kfd_remove_sysfs_node_entry(struct kfd_topology_device *dev) static int kfd_build_sysfs_node_entry(struct kfd_topology_device *dev, uint32_t id) { + struct kfd_iolink_properties
Re: Explicit VM updates
Bas has the problem that CS implicitly waits for VM updates. Currently when you unmap a BO the operation will only be executed after all the previously made CS are finished. Similar for mapping BOs. The next CS will only start after all the pending page table updates are completed. The mapping case was already handled by my prototype patch set, but the unmapping case still hurts a bit. This implicit sync between CS and map/unmap operations can really hurt the performance of applications which massively use PRTs. Regards, Christian. Am 01.06.22 um 18:27 schrieb Marek Olšák: Can you please summarize what this is about? Thanks, Marek On Wed, Jun 1, 2022 at 8:40 AM Christian König wrote: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
Re: Explicit VM updates
Am 01.06.22 um 17:05 schrieb Felix Kuehling: Am 2022-06-01 um 08:40 schrieb Christian König: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). This makes sense. 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. This would work for now, but it won't work for user mode submission in the future. I find it weird that user mode needs to wait for the unmap. For user mode, unmap and free should always be asynchronous. I can't think of any good reasons to make user mode wait for the driver to clean up its stuff. Could the waiting be done in kernel mode instead? TTM already does delayed freeing if there are fences outstanding on a BO being freed. This should make it easy to delay freeing until the unmap is done without blocking the user mode thread. This is not about blocking, but synchronization dependencies. In other words the free is not waiting for the unmap to complete, but causes command submissions through the kernel to depend on the unmap. User mode submissions are completely unrelated to that. Regards, Christian. Regards, Felix 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
Re: Explicit VM updates
Can you please summarize what this is about? Thanks, Marek On Wed, Jun 1, 2022 at 8:40 AM Christian König wrote: > Hey guys, > > so today Bas came up with a new requirement regarding the explicit > synchronization to VM updates and a bunch of prototype patches. > > I've been thinking about that stuff for quite some time before, but to > be honest it's one of the most trickiest parts of the driver. > > So my current thinking is that we could potentially handle those > requirements like this: > > 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This > way we either get the new behavior for the whole CS+VM or the old one, > but never both mixed. > > 2. When memory is unmapped we keep around the last unmap operation > inside the bo_va. > > 3. When memory is freed we add all the CS fences which could access that > memory + the last unmap operation as BOOKKEEP fences to the BO and as > mandatory sync fence to the VM. > > Memory freed either because of an eviction or because of userspace > closing the handle will be seen as a combination of unmap+free. > > > The result is the following semantic for userspace to avoid implicit > synchronization as much as possible: > > 1. When you allocate and map memory it is mandatory to either wait for > the mapping operation to complete or to add it as dependency for your CS. > If this isn't followed the application will run into CS faults > (that's what we pretty much already implemented). > > 2. When memory is freed you must unmap that memory first and then wait > for this unmap operation to complete before freeing the memory. > If this isn't followed the kernel will add a forcefully wait to the > next CS to block until the unmap is completed. > > 3. All VM operations requested by userspace will still be executed in > order, e.g. we can't run unmap + map in parallel or something like this. > > Is that something you guys can live with? As far as I can see it should > give you the maximum freedom possible, but is still doable. > > Regards, > Christian. >
Re: [PATCH 2/3] drm/amdgpu: Add peer-to-peer support among PCIe connected AMD GPUs
Am 2022-05-31 um 13:01 schrieb Ramesh Errabolu: Add support for peer-to-peer communication, in both data and control planes, among AMD GPUs that are connected PCIe and have large BAR vBIOS. Please don't use the "control plane", "data plane" terminology here. This is not common usage in this context. Also the reference to large-BAR BIOSes is incorrect because BARs can be resized. More comments inline ... Support REQUIRES enablement of config HSA_AMD_P2P. Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 1 + .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 328 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 30 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 + 4 files changed, 307 insertions(+), 60 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index f8b9f27adcf5..5c00ea1df21c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -48,6 +48,7 @@ enum kfd_mem_attachment_type { KFD_MEM_ATT_SHARED, /* Share kgd_mem->bo or another attachment's */ KFD_MEM_ATT_USERPTR,/* SG bo to DMA map pages from a userptr bo */ KFD_MEM_ATT_DMABUF, /* DMAbuf to DMA map TTM BOs */ + KFD_MEM_ATT_SG /* Tag to DMA map SG BOs */ }; struct kfd_mem_attachment { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 34ba9e776521..c2af82317a03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -241,6 +241,42 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) kfree(bo->kfd_bo); } +/** + * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * about USERPTR or DOOREBELL or MMIO BO. + * @adev: Device for which dmamap BO is being created + * @mem: BO of peer device that is being DMA mapped. Provides parameters + * in building the dmamap BO + * @bo_out: Output parameter updated with handle of dmamap BO + */ +static int +create_dmamap_sg_bo(struct amdgpu_device *adev, +struct kgd_mem *mem, struct amdgpu_bo **bo_out) +{ + struct drm_gem_object *gem_obj; + int ret, align; + + ret = amdgpu_bo_reserve(mem->bo, false); + if (ret) + return ret; + + align = 1; + ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, align, + AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE, + ttm_bo_type_sg, mem->bo->tbo.base.resv, _obj); + + amdgpu_bo_unreserve(mem->bo); + + if (ret) { + pr_err("Error in creating DMA mappable SG BO on domain: %d\n", ret); + return -EINVAL; + } + + *bo_out = gem_to_amdgpu_bo(gem_obj); + (*bo_out)->parent = amdgpu_bo_ref(mem->bo); + return ret; +} + /* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's * reservation object. * @@ -481,6 +517,38 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) return pte_flags; } +/** + * create_sg_table() - Create an sg_table for a contiguous DMA addr range + * @addr: The starting address to point to + * @size: Size of memory area in bytes being pointed to + * + * Allocates an instance of sg_table and initializes it to point to memory + * area specified by input parameters. The address used to build is assumed + * to be DMA mapped, if needed. + * + * DOORBELL or MMIO BOs use only one scatterlist node in their sg_table + * because they are physically contiguous. + * + * Return: Initialized instance of SG Table or NULL + */ +static struct sg_table *create_sg_table(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg_dma_address(sg->sgl) = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int kfd_mem_dmamap_userptr(struct kgd_mem *mem, struct kfd_mem_attachment *attachment) @@ -545,6 +613,87 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) return ttm_bo_validate(>tbo, >placement, ); } +/** + * kfd_mem_dmamap_sg_bo() - Create DMA mapped sg_table to access DOORBELL or MMIO BO + * @mem: SG BO of the DOORBELL or MMIO resource on the owning device + * @attachment: Virtual address attachment of the BO on accessing device + * + * An access request from the device that owns DOORBELL does not require DMA mapping. + * This is because the request doesn't go through PCIe root complex i.e. it instead + * loops back. The need to DMA map arises only when
Re: [PATCH 1/3] drm/amdkfd: Define config HSA_AMD_P2P to support peer-to-peer
Am 2022-05-31 um 13:02 schrieb Ramesh Errabolu: Extend current kernel config requirements of amdgpu by adding config HSA_AMD_P2P. Enabling HSA_AMD_P2P is REQUIRED to support peer-to-peer communication, in both data and control planes, among AMD GPU devices that are connected via PCIe and have large BAR vBIOS Signed-off-by: Ramesh Errabolu --- drivers/gpu/drm/amd/amdkfd/Kconfig | 8 1 file changed, 8 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index 8cc0a76ddf9f..26614f5f20ea 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -25,3 +25,11 @@ config HSA_AMD_SVM preemptions and one based on page faults. To enable page fault based memory management on most GFXv9 GPUs, set the module parameter amdgpu.noretry=0. + +config HSA_AMD_P2P + bool "HSA kernel driver support for peer-to-peer for AMD GPU devices" + depends on HSA_AMD && PCI_P2PDMA && DMABUF_MOVE_NOTIFY + help + Enable this if you want to access AMD GPU peer devices, in both data + and control planes, that are connected via PCIe and have large BAR vBIOS I have not seen the terms "data plane" and "control plane" used in the context of GPUs. As far as I can tell, this terminology is more common in the context network routing. I think it could cause confusion to introduce these terms without an explanation to users. The sentence "... if you want to access AMD GPU peer devices ..." seems to address someone writing an application. This help message is meant for users and admins building a kernel, who may want to run compute applications, not for compute application developers. I would also not mention large-BAR VBIOSes because the BAR can often be resized even with a small-BAR VBIOS. Therefore I would recommend an alternative text here that avoids uncommon terminology and addresses the concerns of users rather than application developers: Enable peer-to-peer (P2P) communication between AMD GPUs over the PCIe bus. This can improve performance of multi-GPU compute applications and libraries by enabling GPUs to access data directly in peer GPUs' memory without intermediate copies in system memory. This P2P feature is only enabled on compatible chipsets, and between GPUs with large memory BARs that expose the entire VRAM in PCI bus address space within the physical address limits of the GPUs. Regards, Felix +
Re: Explicit VM updates
Am 2022-06-01 um 08:40 schrieb Christian König: Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). This makes sense. 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. This would work for now, but it won't work for user mode submission in the future. I find it weird that user mode needs to wait for the unmap. For user mode, unmap and free should always be asynchronous. I can't think of any good reasons to make user mode wait for the driver to clean up its stuff. Could the waiting be done in kernel mode instead? TTM already does delayed freeing if there are fences outstanding on a BO being freed. This should make it easy to delay freeing until the unmap is done without blocking the user mode thread. Regards, Felix 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
Re: (REGRESSION bisected) Re: amdgpu errors (VM fault / GPU fault detected) with 5.19 merge window snapshots
Am 01.06.22 um 16:55 schrieb Alex Deucher: On Fri, May 27, 2022 at 8:58 AM Michal Kubecek wrote: On Fri, May 27, 2022 at 11:00:39AM +0200, Michal Kubecek wrote: Hello, while testing 5.19 merge window snapshots (commits babf0bb978e3 and 7e284070abe5), I keep getting errors like below. I have not seen them with 5.18 final or older. [ 247.150333] gmc_v8_0_process_interrupt: 46 callbacks suppressed [ 247.150336] amdgpu :0c:00.0: amdgpu: GPU fault detected: 147 0x00020802 for process firefox pid 6101 thread firefox:cs0 pid 6116 [ 247.150339] amdgpu :0c:00.0: amdgpu: VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x00107800 [ 247.150340] amdgpu :0c:00.0: amdgpu: VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x0D008002 [ 247.150341] amdgpu :0c:00.0: amdgpu: VM fault (0x02, vmid 6, pasid 32780) at page 1079296, write from 'TC2' (0x54433200) (8) [...] [ 249.925909] amdgpu :0c:00.0: amdgpu: IH ring buffer overflow (0x000844C0, 0x4A00, 0x44D0) [ 250.434986] [drm] Fence fallback timer expired on ring sdma0 [ 466.621568] gmc_v8_0_process_interrupt: 122 callbacks suppressed [...] There does not seem to be any apparent immediate problem with graphics but when running commit babf0bb978e3, there seemed to be a noticeable lag in some operations, e.g. when moving a window or repainting large part of the terminal window in konsole (no idea if it's related). My GPU is Radeon Pro WX 2100 (1002:6995). What other information should I collect to help debugging the issue? Bisected to commit 5255e146c99a ("drm/amdgpu: rework TLB flushing"). There seem to be later commits depending on it so I did not test a revert on top of current mainline. @Christian Koenig, @Yang, Philip Any ideas? I think there were some fix ups for this. Maybe those just haven't hit the tree yet? I need to double check, but as far as I know we have fixed all the fallout. Could be that something didn't went upstream because it came to late for the merge window. Christian. Alex I should also mention that most commits tested as "bad" during the bisect did behave much worse than current mainline (errors starting as early as with sddm, visibly damaged screen content, sometimes even crashes). But all of them issued messages similar to those above into kernel log. Michal Kubecek
Re: [PATCH 3/3] drm/amdgpu/swsmu: use new register offsets for smu_cmn.c
Yes, this is a good change--abstracting the SMU messaging registers for ASICs. Reviewed-by: Luben Tuikov Regards, Luben On 2022-05-26 14:00, Alex Deucher wrote: > Use the per asic offsets so the we don't have to have > asic specific logic in the common code. > > Signed-off-by: Alex Deucher > --- > drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 77 +++--- > 1 file changed, 7 insertions(+), 70 deletions(-) > > diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c > b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c > index 5215ead4978f..53cd62ccab5d 100644 > --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c > +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c > @@ -37,40 +37,6 @@ > #undef pr_info > #undef pr_debug > > -/* > - * Although these are defined in each ASIC's specific header file. > - * They share the same definitions and values. That makes common > - * APIs for SMC messages issuing for all ASICs possible. > - */ > -#define mmMP1_SMN_C2PMSG_66 > 0x0282 > -#define mmMP1_SMN_C2PMSG_66_BASE_IDX > 0 > - > -#define mmMP1_SMN_C2PMSG_82 > 0x0292 > -#define mmMP1_SMN_C2PMSG_82_BASE_IDX > 0 > - > -#define mmMP1_SMN_C2PMSG_90 > 0x029a > -#define mmMP1_SMN_C2PMSG_90_BASE_IDX > 0 > - > -#define mmMP1_SMN_C2PMSG_66_V13_0_4 0x0282 > -#define mmMP1_SMN_C2PMSG_66_V13_0_4_BASE_IDX1 > - > -#define mmMP1_SMN_C2PMSG_82_V13_0_4 0x0292 > -#define mmMP1_SMN_C2PMSG_82_V13_0_4_BASE_IDX1 > - > -#define mmMP1_SMN_C2PMSG_90_V13_0_4 0x029a > -#define mmMP1_SMN_C2PMSG_90_V13_0_4_BASE_IDX 1 > - > -/* SMU 13.0.5 has its specific mailbox messaging registers */ > - > -#define mmMP1_C2PMSG_2 > (0xbee142 + 0xb0 / 4) > -#define mmMP1_C2PMSG_2_BASE_IDX > 0 > - > -#define mmMP1_C2PMSG_34 > (0xbee262 + 0xb0 / 4) > -#define mmMP1_C2PMSG_34_BASE_IDX > 0 > - > -#define mmMP1_C2PMSG_33 > (0xbee261 + 0xb0 / 4) > -#define mmMP1_C2PMSG_33_BASE_IDX > 0 > - > #define MP1_C2PMSG_90__CONTENT_MASK > 0xL > > #undef __SMU_DUMMY_MAP > @@ -99,12 +65,7 @@ static void smu_cmn_read_arg(struct smu_context *smu, > { > struct amdgpu_device *adev = smu->adev; > > - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 5)) > - *arg = RREG32_SOC15(MP1, 0, mmMP1_C2PMSG_34); > - else if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) > - *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82_V13_0_4); > - else > - *arg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_82); > + *arg = RREG32(smu->param_reg); > } > > /* Redefine the SMU error codes here. > @@ -150,12 +111,7 @@ static u32 __smu_cmn_poll_stat(struct smu_context *smu) > u32 reg; > > for ( ; timeout > 0; timeout--) { > - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 5)) > - reg = RREG32_SOC15(MP1, 0, mmMP1_C2PMSG_33); > - else if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4)) > - reg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90_V13_0_4); > - else > - reg = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_90); > + reg = RREG32(smu->resp_reg); > if ((reg & MP1_C2PMSG_90__CONTENT_MASK) != 0) > break; > > @@ -177,16 +133,8 @@ static void __smu_cmn_reg_print_error(struct smu_context > *smu, > > switch (reg_c2pmsg_90) { > case SMU_RESP_NONE: { > - if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 5)) { > - msg_idx = RREG32_SOC15(MP1, 0, mmMP1_C2PMSG_2); > - prm = RREG32_SOC15(MP1, 0, mmMP1_C2PMSG_34); > - } else if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, > 4)) { > - msg_idx = RREG32_SOC15(MP1, 0, > mmMP1_SMN_C2PMSG_66_V13_0_4); > - prm = RREG32_SOC15(MP1, 0, > mmMP1_SMN_C2PMSG_82_V13_0_4); > - } else { > - msg_idx = RREG32_SOC15(MP1, 0, mmMP1_SMN_C2PMSG_66); > - prm
Re: (REGRESSION bisected) Re: amdgpu errors (VM fault / GPU fault detected) with 5.19 merge window snapshots
On Fri, May 27, 2022 at 8:58 AM Michal Kubecek wrote: > > On Fri, May 27, 2022 at 11:00:39AM +0200, Michal Kubecek wrote: > > Hello, > > > > while testing 5.19 merge window snapshots (commits babf0bb978e3 and > > 7e284070abe5), I keep getting errors like below. I have not seen them > > with 5.18 final or older. > > > > > > [ 247.150333] gmc_v8_0_process_interrupt: 46 callbacks suppressed > > [ 247.150336] amdgpu :0c:00.0: amdgpu: GPU fault detected: 147 > > 0x00020802 for process firefox pid 6101 thread firefox:cs0 pid 6116 > > [ 247.150339] amdgpu :0c:00.0: amdgpu: > > VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x00107800 > > [ 247.150340] amdgpu :0c:00.0: amdgpu: > > VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x0D008002 > > [ 247.150341] amdgpu :0c:00.0: amdgpu: VM fault (0x02, vmid 6, pasid > > 32780) at page 1079296, write from 'TC2' (0x54433200) (8) > [...] > > [ 249.925909] amdgpu :0c:00.0: amdgpu: IH ring buffer overflow > > (0x000844C0, 0x4A00, 0x44D0) > > [ 250.434986] [drm] Fence fallback timer expired on ring sdma0 > > [ 466.621568] gmc_v8_0_process_interrupt: 122 callbacks suppressed > [...] > > > > > > There does not seem to be any apparent immediate problem with graphics > > but when running commit babf0bb978e3, there seemed to be a noticeable > > lag in some operations, e.g. when moving a window or repainting large > > part of the terminal window in konsole (no idea if it's related). > > > > My GPU is Radeon Pro WX 2100 (1002:6995). What other information should > > I collect to help debugging the issue? > > Bisected to commit 5255e146c99a ("drm/amdgpu: rework TLB flushing"). > There seem to be later commits depending on it so I did not test > a revert on top of current mainline. > @Christian Koenig, @Yang, Philip Any ideas? I think there were some fix ups for this. Maybe those just haven't hit the tree yet? Alex > I should also mention that most commits tested as "bad" during the > bisect did behave much worse than current mainline (errors starting as > early as with sddm, visibly damaged screen content, sometimes even > crashes). But all of them issued messages similar to those above into > kernel log. > > Michal Kubecek
Re: [PATCH] drm/amdgpu: fix scratch register access method in SRIOV
On Wed, Jun 1, 2022 at 3:27 AM ZhenGuo Yin wrote: > > The scratch register should be accessed through MMIO instead of RLCG > in SRIOV, since it being used in RLCG register access function. > > Fixes: 0e1314781b9c("drm/amdgpu: nuke dynamic gfx scratch reg allocation") Missing your signed-off-by. Alex > --- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index c5f46d264b23..8331e0c5e18e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -3784,7 +3784,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring > *ring) > unsigned i; > int r; > > - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); > + WREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0), 0xCAFEDEAD); > r = amdgpu_ring_alloc(ring, 3); > if (r) { > DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", > @@ -3799,7 +3799,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring > *ring) > amdgpu_ring_commit(ring); > > for (i = 0; i < adev->usec_timeout; i++) { > - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); > + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0)); > if (tmp == 0xDEADBEEF) > break; > if (amdgpu_emu_mode == 1) > -- > 2.35.1 >
[PATCH] drm/amdgpu: Correct if identation that causes GCC warning
GCC is complaining about misleading indentation: error: this ‘if’ clause does not guard... [-Werror=misleading-indentation] 603 | if (r) This commit adjusts the commit indentation. Cc: Candice Li Cc: Hawking Zhang Signed-off-by: Rodrigo Siqueira --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 99c1a2d3dae8..e1f4c5f30645 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -599,9 +599,9 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) -- 2.25.1
Re: [PATCH v10 0/4] Separate panel orientation property creating and value setting
On Tue, May 31, 2022 at 6:56 PM Hans de Goede wrote: > > Hi, > > On 5/30/22 13:34, Hsin-Yi Wang wrote: > > On Mon, May 30, 2022 at 4:53 PM Hans de Goede wrote: > >> > >> Hi, > >> > >> On 5/30/22 10:19, Hsin-Yi Wang wrote: > >>> Some drivers, eg. mtk_drm and msm_drm, rely on the panel to set the > >>> orientation. Panel calls drm_connector_set_panel_orientation() to create > >>> orientation property and sets the value. However, connector properties > >>> can't be created after drm_dev_register() is called. The goal is to > >>> separate the orientation property creation, so drm drivers can create it > >>> earlier before drm_dev_register(). > >> > >> Sorry for jumping in pretty late in the discussion (based on the v10 > >> I seem to have missed this before). > >> > >> This sounds to me like the real issue here is that drm_dev_register() > >> is getting called too early? > >> > > Right. > > > >> To me it seems sensible to delay calling drm_dev_register() and > >> thus allowing userspace to start detecting available displays + > >> features until after the panel has been probed. > >> > > > > Most panels set this value very late, in .get_modes callback (since it > > is when the connector is known), though the value was known during > > panel probe. > > Hmm I would expect the main drm/kms driver to register the drm_connector > object after probing the panel, right ? > > So maybe this is a problem with the panel API? How about adding > separate callback to the panel API to get the orientation, which the > main drm/kms driver can then call before registering the connector ? > > And then have the main drm/kms driver call > drm_connector_set_panel_orientation() with the returned orientation > on the connecter before registering it. > > The new get_orientation callback for the panel should of course > be optional (IOW amy be NULL), so we probably want a small > helper for drivers using panel (sub)drivers to take care of > the process of getting the panel orientation from the panel > (if supported) and then setting it on the connector. > Hi Hans, Thanks for the suggestion. I've sent a new version for this: https://patchwork.kernel.org/project/dri-devel/patch/20220601081823.1038797-2-hsi...@chromium.org/ Panel can implement the optional callback to return the orientation property, while drm/kms driver will call a drm API to get the value then they can call drm_connector_set_panel_orientation(). Panel .get_mode will still call drm_connector_set_panel_orientation() but now it will be a no-op as the value was set by drm/kms driver previously. This is similar to the small patch below: https://patchwork.kernel.org/project/linux-mediatek/patch/20220530113033.124072-1-hsi...@chromium.org/ But it's now using the panel API. > > > I think we can also let drm check if they have remote panel nodes: If > > there is a panel and the panel sets the orientation, let the drm read > > this value and set the property. Does this workflow sound reasonable? > > > > The corresponding patch to implement this: > > https://patchwork.kernel.org/project/linux-mediatek/patch/20220530113033.124072-1-hsi...@chromium.org/ > > That is a suprisingly small patch (which is good). I guess that > my suggestion to add a new panel driver callback to get > the orientation would be a bit bigget then this. Still I think > that that would be a bit cleaner, as it would also solve this > for cases where the orientation comes from the panel itself > (through say some EDID extenstion) rather then from devicetree. > > Still I think either way should be acceptable upstream. > > Opinions from other drm devs on the above are very much welcome! > > Your small patch nicely avoids the probe ordering problem, > so it is much better then this patch series. > > Regards, > > Hans > > > > > > > Thanks > > > >> I see a devicetree patch in this series, so I guess that the panel > >> is described in devicetree. Especially in the case of devicetree > >> I would expect the kernel to have enough info to do the right > >> thing and make sure the panel is probed before calling > >> drm_dev_register() ? > >> > >> Regards, > >> > >> Hans > >> > >> > >> > >> > >>> > >>> After this series, drm_connector_set_panel_orientation() works like > >>> before. It won't affect existing callers of > >>> drm_connector_set_panel_orientation(). The only difference is that > >>> some drm drivers can call drm_connector_init_panel_orientation_property() > >>> earlier. > >>> > >>> Hsin-Yi Wang (4): > >>> gpu: drm: separate panel orientation property creating and value > >>> setting > >>> drm/mediatek: init panel orientation property > >>> drm/msm: init panel orientation property > >>> arm64: dts: mt8183: Add panel rotation > >>> > >>> .../arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 1 + > >>> drivers/gpu/drm/drm_connector.c | 58 ++- > >>> drivers/gpu/drm/mediatek/mtk_dsi.c| 7 +++ > >>> drivers/gpu/drm/msm/dsi/dsi_manager.c | 4 ++ >
[PATCH] drm/amdgpu: fix refcount underflow in device reset
[why] A gfx job may be processed but not finished when reset begin from compute job timeout. drm_sched_resubmit_jobs_ext in sched_main assume submitted job unsignaled and always put parent fence. Resubmission for that job cause underflow. This fix is done in device reset to avoid changing drm sched_main. [how] Check if the job to submit has signaled and avoid submission if signaled in device reset for both advanced TDR and normal job resume. Signed-off-by: Yiqing Yao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 72 -- 1 file changed, 41 insertions(+), 31 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f16f105a737b..29b307af97eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -4980,39 +4980,43 @@ static void amdgpu_device_recheck_guilty_jobs( /* for the real bad job, it will be resubmitted twice, adding a dma_fence_get * to make sure fence is balanced */ dma_fence_get(s_job->s_fence->parent); - drm_sched_resubmit_jobs_ext(>sched, 1); - ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); - if (ret == 0) { /* timeout */ - DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", - ring->sched.name, s_job->id); + /* avoid submission for signaled hw fence */ + if(!dma_fence_is_signaled(s_job->s_fence->parent)){ - /* set guilty */ - drm_sched_increase_karma(s_job); + drm_sched_resubmit_jobs_ext(>sched, 1); + + ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); + if (ret == 0) { /* timeout */ + DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", + ring->sched.name, s_job->id); + + /* set guilty */ + drm_sched_increase_karma(s_job); retry: - /* do hw reset */ - if (amdgpu_sriov_vf(adev)) { - amdgpu_virt_fini_data_exchange(adev); - r = amdgpu_device_reset_sriov(adev, false); - if (r) - adev->asic_reset_res = r; - } else { - clear_bit(AMDGPU_SKIP_HW_RESET, - _context->flags); - r = amdgpu_do_asic_reset(device_list_handle, -reset_context); - if (r && r == -EAGAIN) - goto retry; - } + /* do hw reset */ + if (amdgpu_sriov_vf(adev)) { + amdgpu_virt_fini_data_exchange(adev); + r = amdgpu_device_reset_sriov(adev, false); + if (r) + adev->asic_reset_res = r; + } else { + clear_bit(AMDGPU_SKIP_HW_RESET, + _context->flags); + r = amdgpu_do_asic_reset(device_list_handle, + reset_context); + if (r && r == -EAGAIN) + goto retry; + } - /* -* add reset counter so that the following -* resubmitted job could flush vmid -*/ - atomic_inc(>gpu_reset_counter); - continue; + /* + * add reset counter so that the following + * resubmitted job could flush vmid + */ + atomic_inc(>gpu_reset_counter); + continue; + } } - /* got the hw fence, signal finished fence */ atomic_dec(ring->sched.score); dma_fence_put(s_job->s_fence->parent); @@ -5221,13 +5225,19 @@ int amdgpu_device_gpu_recover_imp(struct amdgpu_device *adev, for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = tmp_adev->rings[i]; - + struct drm_sched_job *s_job;
RE: [PATCH 10/10] drm/amdgpu: add gang submit frontend
[Public] Hi Christian, No other comments. With p->jobs[i] fixed, the test case worked. I have to clean up the code and send it for review. I wanted to add comparing the time with and without gang submission and fail test case if former is slow. I will do this later. I will send the test case for review first. Thank you, Yogesh -Original Message- From: Koenig, Christian Sent: Wednesday, June 1, 2022 5:42 PM To: Mohan Marimuthu, Yogesh ; Christian König ; amd-gfx@lists.freedesktop.org; Olsak, Marek Subject: Re: [PATCH 10/10] drm/amdgpu: add gang submit frontend Am 01.06.22 um 14:09 schrieb Mohan Marimuthu, Yogesh: > [SNIP] > - /* Make sure all BOs are remembered as writers */ > - amdgpu_bo_list_for_each_entry(e, p->bo_list) > + list_for_each_entry(e, >validated, tv.head) { > + > + /* Everybody except for the gang leader uses BOOKKEEP */ > + for (i = 0; i < (p->gang_size - 1); ++i) { > + dma_resv_add_fence(e->tv.bo->base.resv, > +>jobs[i]->base.s_fence->finished, > +DMA_RESV_USAGE_BOOKKEEP); > + } > + > + /* The gang leader as remembered as writer */ > e->tv.num_shared = 0; > + } > > > p->jobs[i] = NULL is done in previous loop. Now when running > >jobs[i]->base.s_fence->finished there is NULL pointer crash. Ah, yes good point. Going to fix that. Any more comments on this? Did you finished the test cases? Thanks, Christian.
Explicit VM updates
Hey guys, so today Bas came up with a new requirement regarding the explicit synchronization to VM updates and a bunch of prototype patches. I've been thinking about that stuff for quite some time before, but to be honest it's one of the most trickiest parts of the driver. So my current thinking is that we could potentially handle those requirements like this: 1. We add some new EXPLICIT flag to context (or CS?) and VM IOCTL. This way we either get the new behavior for the whole CS+VM or the old one, but never both mixed. 2. When memory is unmapped we keep around the last unmap operation inside the bo_va. 3. When memory is freed we add all the CS fences which could access that memory + the last unmap operation as BOOKKEEP fences to the BO and as mandatory sync fence to the VM. Memory freed either because of an eviction or because of userspace closing the handle will be seen as a combination of unmap+free. The result is the following semantic for userspace to avoid implicit synchronization as much as possible: 1. When you allocate and map memory it is mandatory to either wait for the mapping operation to complete or to add it as dependency for your CS. If this isn't followed the application will run into CS faults (that's what we pretty much already implemented). 2. When memory is freed you must unmap that memory first and then wait for this unmap operation to complete before freeing the memory. If this isn't followed the kernel will add a forcefully wait to the next CS to block until the unmap is completed. 3. All VM operations requested by userspace will still be executed in order, e.g. we can't run unmap + map in parallel or something like this. Is that something you guys can live with? As far as I can see it should give you the maximum freedom possible, but is still doable. Regards, Christian.
Re: [PATCH 10/10] drm/amdgpu: add gang submit frontend
Am 01.06.22 um 14:09 schrieb Mohan Marimuthu, Yogesh: [SNIP] - /* Make sure all BOs are remembered as writers */ - amdgpu_bo_list_for_each_entry(e, p->bo_list) + list_for_each_entry(e, >validated, tv.head) { + + /* Everybody except for the gang leader uses BOOKKEEP */ + for (i = 0; i < (p->gang_size - 1); ++i) { + dma_resv_add_fence(e->tv.bo->base.resv, + >jobs[i]->base.s_fence->finished, + DMA_RESV_USAGE_BOOKKEEP); + } + + /* The gang leader as remembered as writer */ e->tv.num_shared = 0; + } p->jobs[i] = NULL is done in previous loop. Now when running >jobs[i]->base.s_fence->finished there is NULL pointer crash. Ah, yes good point. Going to fix that. Any more comments on this? Did you finished the test cases? Thanks, Christian.
RE: [PATCH 10/10] drm/amdgpu: add gang submit frontend
[AMD Official Use Only - General] -Original Message- From: amd-gfx On Behalf Of Christian König Sent: Thursday, March 3, 2022 1:53 PM To: amd-gfx@lists.freedesktop.org; Olsak, Marek Cc: Koenig, Christian Subject: [PATCH 10/10] drm/amdgpu: add gang submit frontend Allows submitting jobs as gang which needs to run on multiple engines at the same time. All members of the gang get the same implicit, explicit and VM dependencies. So no gang member will start running until everything else is ready. The last job is considered the gang leader (usually a submission to the GFX ring) and used for signaling output dependencies. Each job is remembered individually as user of a buffer object, so there is no joining of work at the end. Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c| 244 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h| 9 +- drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 12 +- 3 files changed, 173 insertions(+), 92 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index c6541f7b8f54..7429e64919fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -69,6 +69,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, unsigned int *num_ibs) { struct drm_sched_entity *entity; + unsigned int i; int r; r = amdgpu_ctx_get_entity(p->ctx, chunk_ib->ip_type, @@ -83,11 +84,19 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p, return -EINVAL; /* Currently we don't support submitting to multiple entities */ - if (p->entity && p->entity != entity) + for (i = 0; i < p->gang_size; ++i) { + if (p->entities[i] == entity) + goto found; + } + + if (i == AMDGPU_CS_GANG_SIZE) return -EINVAL; - p->entity = entity; - ++(*num_ibs); + p->entities[i] = entity; + p->gang_size = i + 1; + +found: + ++(num_ibs[i]); return 0; } @@ -161,11 +170,12 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + unsigned int num_ibs[AMDGPU_CS_GANG_SIZE] = { }; struct amdgpu_vm *vm = >vm; uint64_t *chunk_array_user; uint64_t *chunk_array; - unsigned size, num_ibs = 0; uint32_t uf_offset = 0; + unsigned int size; int ret; int i; @@ -228,7 +238,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, if (size < sizeof(struct drm_amdgpu_cs_chunk_ib)) goto free_partial_kdata; - ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, _ibs); + ret = amdgpu_cs_p1_ib(p, p->chunks[i].kdata, num_ibs); if (ret) goto free_partial_kdata; break; @@ -265,21 +275,27 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, } } - ret = amdgpu_job_alloc(p->adev, num_ibs, >job, vm); - if (ret) - goto free_all_kdata; + if (!p->gang_size) + return -EINVAL; - ret = drm_sched_job_init(>job->base, p->entity, >vm); - if (ret) - goto free_all_kdata; + for (i = 0; i < p->gang_size; ++i) { + ret = amdgpu_job_alloc(p->adev, num_ibs[i], >jobs[i], vm); + if (ret) + goto free_all_kdata; + + ret = drm_sched_job_init(>jobs[i]->base, p->entities[i], +>vm); + if (ret) + goto free_all_kdata; + } - if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + if (p->ctx->vram_lost_counter != p->jobs[0]->vram_lost_counter) { ret = -ECANCELED; goto free_all_kdata; } if (p->uf_entry.tv.bo) - p->job->uf_addr = uf_offset; + p->jobs[p->gang_size - 1]->uf_addr = uf_offset; kvfree(chunk_array); /* Use this opportunity to fill in task info for the vm */ @@ -301,22 +317,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, return ret; } -static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, - struct amdgpu_cs_chunk *chunk, - unsigned int *num_ibs, - unsigned int *ce_preempt, - unsigned int *de_preempt) +static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p, struct amdgpu_job *job, + struct amdgpu_ib *ib, struct amdgpu_cs_chunk *chunk, + unsigned int *ce_preempt, unsigned int *de_preempt) { - struct amdgpu_ring *ring =
Re: [PATCH] drm/amdgpu: fix scratch register access method in SRIOV
Am 01.06.22 um 09:27 schrieb ZhenGuo Yin: The scratch register should be accessed through MMIO instead of RLCG in SRIOV, since it being used in RLCG register access function. Fixes: 0e1314781b9c("drm/amdgpu: nuke dynamic gfx scratch reg allocation") Maybe better but the register offset into a local constant then. Apart from that looks good to me. Regards, Christian. --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c5f46d264b23..8331e0c5e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3784,7 +3784,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -3799,7 +3799,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0)); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1)
Re: [PATCH 2/2] drm/amdgpu/gmc11: enable AGP aperture
Am 26.05.22 um 19:58 schrieb Alex Deucher: Enable the AGP aperture on chips with GMC v11. Signed-off-by: Alex Deucher Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c | 7 --- drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c| 1 + drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c | 6 +++--- 4 files changed, 11 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c index 5eccaa2c7ca0..f99d7641bb21 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0.c @@ -154,10 +154,11 @@ static void gfxhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, 0x00FF); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, regGCMC_VM_SYSTEM_APERTURE_LOW_ADDR, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b6daa4146dd3..635103c7e2a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -654,6 +654,7 @@ static void gmc_v11_0_vram_gtt_location(struct amdgpu_device *adev, amdgpu_gmc_vram_location(adev, >gmc, base); amdgpu_gmc_gart_location(adev, mc); + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = adev->mmhub.funcs->get_mc_fb_offset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index bc11b2de37ae..4926fa82c1c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -169,10 +169,10 @@ static void mmhub_v3_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FF); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); if (!amdgpu_sriov_vf(adev)) { /* diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 770be0a8f7ce..5e5b884d8357 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -162,10 +162,10 @@ static void mmhub_v3_0_2_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, 0x00FF); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, regMMMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); if (!amdgpu_sriov_vf(adev)) { /*
RE: [PATCH] drm/amdgpu: Resolve RAS GFX error count issue after cold boot on Arcturus
[AMD Official Use Only - General] Reviewed-by: Hawking Zhang Regards, Hawking -Original Message- From: amd-gfx On Behalf Of Candice Li Sent: Wednesday, June 1, 2022 18:01 To: amd-gfx@lists.freedesktop.org Cc: Li, Candice Subject: [PATCH] drm/amdgpu: Resolve RAS GFX error count issue after cold boot on Arcturus Adjust the sequence for ras late init and separate ras reset error status from query status. Signed-off-by: Candice Li --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 27 - 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ede2fa56f6c90d..99c1a2d3dae84d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -594,14 +594,15 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 31207f7eec0291..9c5e05ef8beb0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, if (amdgpu_ras_query_error_status(obj->adev, )) return -EINVAL; + /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); + } + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, )) return -EINVAL; - if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) - DRM_WARN("Failed to reset error counter and error status"); + dev_warn(obj->adev->dev, "Failed to reset error counter and error +status"); } return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, info->head.block); - return 0; } @@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (res) return res; + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } + ce += info.ce_count; ue += info.ue_count; } @@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) continue; amdgpu_ras_query_error_status(adev, ); + + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } } } -- 2.17.1
[PATCH] drm/amdgpu: Resolve RAS GFX error count issue after cold boot on Arcturus
Adjust the sequence for ras late init and separate ras reset error status from query status. Signed-off-by: Candice Li --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 7 --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 27 - 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ede2fa56f6c90d..99c1a2d3dae84d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -594,14 +594,15 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, ras_block); - if (r) - return r; if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + r = amdgpu_irq_get(adev, >gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 31207f7eec0291..9c5e05ef8beb0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf, if (amdgpu_ras_query_error_status(obj->adev, )) return -EINVAL; + /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); + } + s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n", "ue", info.ue_count, "ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev, if (amdgpu_ras_query_error_status(obj->adev, )) return -EINVAL; - if (obj->adev->asic_type == CHIP_ALDEBARAN) { + if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) - DRM_WARN("Failed to reset error counter and error status"); + dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); } return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, } } - if (!amdgpu_persistent_edc_harvesting_supported(adev)) - amdgpu_ras_reset_error_status(adev, info->head.block); - return 0; } @@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, if (res) return res; + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } + ce += info.ce_count; ue += info.ue_count; } @@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) continue; amdgpu_ras_query_error_status(adev, ); + + if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && + adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { + if (amdgpu_ras_reset_error_status(adev, info.head.block)) + dev_warn(adev->dev, "Failed to reset error counter and error status"); + } } } -- 2.17.1
[PATCH] drm/amdgpu: fix scratch register access method in SRIOV
The scratch register should be accessed through MMIO instead of RLCG in SRIOV, since it being used in RLCG register access function. Fixes: 0e1314781b9c("drm/amdgpu: nuke dynamic gfx scratch reg allocation") --- drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index c5f46d264b23..8331e0c5e18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -3784,7 +3784,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); if (r) { DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", @@ -3799,7 +3799,7 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring) amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0); + tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0)); if (tmp == 0xDEADBEEF) break; if (amdgpu_emu_mode == 1) -- 2.35.1
Re: [PATCH] drm/amdkfd: add pinned BOs to kfd_bo_list
Am 31.05.22 um 17:22 schrieb Felix Kuehling: Am 2022-05-31 um 04:34 schrieb Lang Yu: The kfd_bo_list is used to restore process BOs after evictions. As page tables could be destroyed during evictions, we should also update pinned BOs' page tables during restoring to make sure they are valid. So for pinned BOs, 1, Don't validate them, but update their page tables. 2, Don't add eviction fence for them. Signed-off-by: Lang Yu --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 39 ++- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 34ba9e776521..67c4bf1944d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1953,9 +1953,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, return -EINVAL; } - /* delete kgd_mem from kfd_bo_list to avoid re-validating - * this BO in BO's restoring after eviction. - */ mutex_lock(>process_info->lock); ret = amdgpu_bo_reserve(bo, true); @@ -1978,7 +1975,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, amdgpu_amdkfd_remove_eviction_fence( bo, mem->process_info->eviction_fence); - list_del_init(>validate_list.head); if (size) *size = amdgpu_bo_size(bo); @@ -2481,24 +2477,26 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) uint32_t domain = mem->domain; struct kfd_mem_attachment *attachment; - total_size += amdgpu_bo_size(bo); + if (!bo->tbo.pin_count) { I think this special case is not necessary. Validating pinned BOs that are already valid should have very low overhead. So adding a special case to avoid that is not really worth it. I would put this check into amdgpu_amdkfd_bo_validate(). Otherwise you get a nice error if a BO is pinned to VRAM and you try to validate it into GTT for example. On the other hand that error might be exactly what you want in this case. Anyway, with or without this, feel free to add an Acked-by: Christian König to this patch. Regards, Christian. Other than that, this patch looks good to me. Regards, Felix + total_size += amdgpu_bo_size(bo); - ret = amdgpu_amdkfd_bo_validate(bo, domain, false); - if (ret) { - pr_debug("Memory eviction: Validate BOs failed\n"); - failed_size += amdgpu_bo_size(bo); - ret = amdgpu_amdkfd_bo_validate(bo, - AMDGPU_GEM_DOMAIN_GTT, false); + ret = amdgpu_amdkfd_bo_validate(bo, domain, false); + if (ret) { + pr_debug("Memory eviction: Validate BOs failed\n"); + failed_size += amdgpu_bo_size(bo); + ret = amdgpu_amdkfd_bo_validate(bo, AMDGPU_GEM_DOMAIN_GTT, false); + if (ret) { + pr_debug("Memory eviction: Try again\n"); + goto validate_map_fail; + } + } + ret = amdgpu_sync_fence(_obj, bo->tbo.moving); if (ret) { - pr_debug("Memory eviction: Try again\n"); + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; } } - ret = amdgpu_sync_fence(_obj, bo->tbo.moving); - if (ret) { - pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); - goto validate_map_fail; - } + list_for_each_entry(attachment, >attachments, list) { if (!attachment->is_mapped) continue; @@ -2544,10 +2542,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) /* Attach new eviction fence to all BOs */ list_for_each_entry(mem, _info->kfd_bo_list, - validate_list.head) + validate_list.head) { + if (mem->bo->tbo.pin_count) + continue; + amdgpu_bo_fence(mem->bo, _info->eviction_fence->base, true); - + } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, _info->vm_list_head, vm_list_node) {
RE: [PATCH 0/3] Fix issues when unplung monitor under mst scenario
[AMD Official Use Only - General] Thanks for your time, Lyude : ) Regards, Wayne > -Original Message- > From: Lyude Paul > Sent: Wednesday, June 1, 2022 3:38 AM > To: Lin, Wayne ; amd-gfx@lists.freedesktop.org > Cc: Wentland, Harry ; Pillai, Aurabindo > ; Zuo, Jerry ; Siqueira, > Rodrigo > Subject: Re: [PATCH 0/3] Fix issues when unplung monitor under mst > scenario > > For the whole series: > > Acked-by: Lyude Paul > > This looks a lot better for sure :) > > On Tue, 2022-05-10 at 17:56 +0800, Wayne Lin wrote: > > This patch set is trying to resolve issues observed when unplug > > monitors under mst scenario. Revert few commits which cause side > > effects and seems no longer needed. And propose a patch to address the > > issue discussed within the thread: > > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fww > w. > > mail-archive.com%2Fdri- > devel%40lists.freedesktop.org%2Fmsg396300.html& > > > amp;data=05%7C01%7Cwayne.lin%40amd.com%7Ce9f99928cc0a4d2fd1cc08d > a433d1 > > > ce2%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C63789622700307 > 6525%7C > > > Unknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB > TiI6Ik1h > > > aWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7Csdata=E75eUgMj3%2Fek > MsLUjpzBCfU > > Y45twQE5w44%2B2KyxzT6c%3Dreserved=0 > > > > --- > > > > Wayne Lin (3): > > Revert "drm/amd/display: Add flag to detect dpms force off during HPD" > > Revert "drm/amd/display: turn DPMS off on connector unplug" > > drm/amd/display: Release remote dc_sink under mst scenario > > > > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 49 > > +++ > > .../gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h | 2 - > > .../amd/display/amdgpu_dm/amdgpu_dm_hdcp.c | 16 ++ > > .../amd/display/amdgpu_dm/amdgpu_dm_helpers.c | 18 +-- > > .../display/amdgpu_dm/amdgpu_dm_mst_types.c | 39 +- > - > > drivers/gpu/drm/amd/display/dc/core/dc.c | 13 - > > drivers/gpu/drm/amd/display/dc/dc_stream.h | 1 - > > 7 files changed, 46 insertions(+), 92 deletions(-) > > > > -- > Cheers, > Lyude Paul (she/her) > Software Engineer at Red Hat
Re: [PATCH v2] drm/amdkfd: add pinned BOs to kfd_bo_list
Am 2022-05-31 um 21:39 schrieb Lang Yu: The kfd_bo_list is used to restore process BOs after evictions. As page tables could be destroyed during evictions, we should also update pinned BOs' page tables during restoring to make sure they are valid. So for pinned BOs, 1, Validating them and update their page tables. 2, Don't add eviction fence for them. v2: - Don't handle pinned ones specially in BO validation.(Felix) Signed-off-by: Lang Yu Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 34ba9e776521..054e4a76ae2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1953,9 +1953,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, return -EINVAL; } - /* delete kgd_mem from kfd_bo_list to avoid re-validating -* this BO in BO's restoring after eviction. -*/ mutex_lock(>process_info->lock); ret = amdgpu_bo_reserve(bo, true); @@ -1978,7 +1975,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev, amdgpu_amdkfd_remove_eviction_fence( bo, mem->process_info->eviction_fence); - list_del_init(>validate_list.head); if (size) *size = amdgpu_bo_size(bo); @@ -2542,12 +2538,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(_fence->base); - /* Attach new eviction fence to all BOs */ + /* Attach new eviction fence to all BOs except pinned ones */ list_for_each_entry(mem, _info->kfd_bo_list, - validate_list.head) + validate_list.head) { + if (mem->bo->tbo.pin_count) + continue; + amdgpu_bo_fence(mem->bo, _info->eviction_fence->base, true); - + } /* Attach eviction fence to PD / PT BOs */ list_for_each_entry(peer_vm, _info->vm_list_head, vm_list_node) {