[PATCH] drm/amdgpu: Improve the MTYPE comments
Use words insteads of acronym for better understanding. Signed-off-by: Yong Zhao --- include/uapi/drm/amdgpu_drm.h | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index d65f9b4ba05c..0072ddb59747 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -564,15 +564,15 @@ struct drm_amdgpu_gem_op { #define AMDGPU_VM_MTYPE_MASK (0xf << 5) /* Default MTYPE. Pre-AI must use this. Recommended for newer ASICs. */ #define AMDGPU_VM_MTYPE_DEFAULT(0 << 5) -/* Use NC MTYPE instead of default MTYPE */ +/* Use Non Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_NC (1 << 5) -/* Use WC MTYPE instead of default MTYPE */ +/* Use Write Combine MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_WC (2 << 5) -/* Use CC MTYPE instead of default MTYPE */ +/* Use Cache Coherent MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_CC (3 << 5) -/* Use UC MTYPE instead of default MTYPE */ +/* Use UnCached MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_UC (4 << 5) -/* Use RW MTYPE instead of default MTYPE */ +/* Use Read Write MTYPE instead of default MTYPE */ #define AMDGPU_VM_MTYPE_RW (5 << 5) struct drm_amdgpu_gem_va { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Print CU information by default during initialization
This is convenient for multiple teams to obtain the information. Also, add device info by using dev_info(). Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 71ea56e220ae..423eed223aa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3170,7 +3170,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } - DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + dev_info(adev->dev, + "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, adev->gfx.config.max_sh_per_se, adev->gfx.config.max_cu_per_sh, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdgpu: Print CU information by default during initialization
This is convenient for multiple teams to obtain the information. Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 71ea56e220ae..92b7a1ff1dc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3170,7 +3170,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } - DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + DRM_INFO("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", adev->gfx.config.max_shader_engines, adev->gfx.config.max_sh_per_se, adev->gfx.config.max_cu_per_sh, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdkfd: Adjust three dmesg printings during initialization
Delete two printings which are not very useful, and change one from pr_info() to pr_debug(). Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c index de9f68d5c312..1009a3b8dcc2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c @@ -502,7 +502,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head *device_list, num_nodes = crat_table->num_domains; image_len = crat_table->length; - pr_info("Parsing CRAT table with %d nodes\n", num_nodes); + pr_debug("Parsing CRAT table with %d nodes\n", num_nodes); for (node_id = 0; node_id < num_nodes; node_id++) { top_dev = kfd_create_topology_device(device_list); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 5db42814dd51..46dcf74ee2e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -787,7 +787,6 @@ static int kfd_topology_update_sysfs(void) { int ret; - pr_info("Creating topology SYSFS entries\n"); if (!sys_props.kobj_topology) { sys_props.kobj_topology = kfd_alloc_struct(sys_props.kobj_topology); @@ -1048,7 +1047,6 @@ int kfd_topology_init(void) sys_props.generation_count++; kfd_update_system_properties(); kfd_debug_print_topology(); - pr_info("Finished initializing topology\n"); } else pr_err("Failed to update topology in sysfs ret=%d\n", ret); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amdgpu: Adjust the SDMA doorbell info printing
Add more detail while turning off the printing by default, because it is very useful. Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 352cd9632770..c0ca9a8229e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1851,7 +1851,7 @@ static int sdma_v4_0_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = true; - DRM_INFO("use_doorbell being set to: [%s]\n", + DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); /* doorbell size is 2 dwords, get DWORD offset */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index f7b2bcf3f293..764f455ada75 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1260,7 +1260,7 @@ static int sdma_v5_0_sw_init(void *handle) ring->ring_obj = NULL; ring->use_doorbell = true; - DRM_INFO("use_doorbell being set to: [%s]\n", + DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i, ring->use_doorbell?"true":"false"); ring->doorbell_index = (i == 0) ? -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Use pr_debug to print the message of reaching event limit
People are inclined to think of the previous pr_warn message as an error, so use pre_debug instead. Change-Id: I3ac565a2bd3b8d57345812104c872183898d237f Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 1f8365575b12..15476fca8fa6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -187,7 +187,7 @@ static int create_signal_event(struct file *devkfd, if (p->signal_mapped_size && p->signal_event_count == p->signal_mapped_size / 8) { if (!p->signal_event_limit_reached) { - pr_warn("Signal event wasn't created because limit was reached\n"); + pr_debug("Signal event wasn't created because limit was reached\n"); p->signal_event_limit_reached = true; } return -ENOSPC; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags
ALLOC_MEM_FLAGS_* used are the same as the KFD_IOC_ALLOC_MEM_FLAGS_*, but they are interweavedly used in kernel driver, resulting in bad readability. For example, KFD_IOC_ALLOC_MEM_FLAGS_COHERENT is not referenced in kernel, and it functions implicitly in kernel through ALLOC_MEM_FLAGS_COHERENT, causing unnecessary confusion. Replace all occurrences of ALLOC_MEM_FLAGS_* with KFD_IOC_ALLOC_MEM_FLAGS_* to solve the problem. Change-Id: Iced6ed3698167296c97b14e7e4569883859d619c Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 6 ++-- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 29 ++- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 13 + .../gpu/drm/amd/include/kgd_kfd_interface.h | 21 -- 4 files changed, 27 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 726c91ab6761..abfbe89e805e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -29,6 +29,7 @@ #include #include #include "amdgpu_xgmi.h" +#include static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -501,10 +502,11 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, metadata_size, _flags); if (flags) { *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + KFD_IOC_ALLOC_MEM_FLAGS_VRAM + : KFD_IOC_ALLOC_MEM_FLAGS_GTT; if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - *flags |= ALLOC_MEM_FLAGS_PUBLIC; + *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; } out_put: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e4481caed648..9dff792c9290 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -29,6 +29,7 @@ #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" +#include /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -400,18 +401,18 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; uint32_t mapping_flags; mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1160,24 +1161,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( /* * Check on which domain to allocate BO */ - if (flags & ALLOC_MEM_FLAGS_VRAM) { + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - } else if (flags & ALLOC_MEM_FLAGS_GTT) { + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; - } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flags = 0; if (!offset || !*offset) return -EINVAL; user_addr = untagged_addr(*offset); - } els
[PATCH] drm/amdgpu: Use better names to reflect it is CP MQD buffer
Add "CP" to AMDGPU_GEM_CREATE_MQD_GFX9 to indicate it is only for CP MQD. Change-Id: Ie69cd3ba477e4bac161ea5b20ec2919a35f3528e Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +-- include/uapi/drm/amdgpu_drm.h | 2 +- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index bc2e72a66db9..726c91ab6761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -224,7 +224,7 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9) + void **cpu_ptr, bool cp_mqd_gfx9) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; @@ -240,8 +240,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, bp.type = ttm_bo_type_kernel; bp.resv = NULL; - if (mqd_gfx9) - bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9; + if (cp_mqd_gfx9) + bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9; r = amdgpu_bo_create(adev, , ); if (r) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fca87bafe174..665db2353a78 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1043,7 +1043,7 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (amdgpu_bo_encrypted(abo)) flags |= AMDGPU_PTE_TMZ; - if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) { + if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) { uint64_t page_idx = 1; r = amdgpu_gart_bind(adev, gtt->offset, page_idx, @@ -1051,7 +1051,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (r) goto gart_bind_fail; - /* Patch mtype of the second part BO */ + /* The memory type of the first page defaults to UC. Now +* modify the memory type to NC from the second page of +* the BO onward. +*/ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index eaf94a421901..1e59c0146531 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -130,7 +130,7 @@ extern "C" { /* Flag that indicates allocating MQD gart on GFX9, where the mtype * for the second page onward should be set to NC. */ -#define AMDGPU_GEM_CREATE_MQD_GFX9 (1 << 8) +#define AMDGPU_GEM_CREATE_CP_MQD_GFX9 (1 << 8) /* Flag that BO may contain sensitive data that must be wiped before * releasing the memory */ -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags
ALLOC_MEM_FLAGS_* used are the same as the KFD_IOC_ALLOC_MEM_FLAGS_*, but they are interweavedly used in kernel driver, resulting in bad readability. For example, KFD_IOC_ALLOC_MEM_FLAGS_COHERENT is totally not referenced in kernel, and it functions in the kernel through ALLOC_MEM_FLAGS_COHERENT, causing unnecessary confusion. Replace all occurrences of ALLOC_MEM_FLAGS_* by KFD_IOC_ALLOC_MEM_FLAGS_* to solve the problem. Change-Id: Iced6ed3698167296c97b14e7e4569883859d619c Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 9 +++-- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 38 +++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 13 --- .../gpu/drm/amd/include/kgd_kfd_interface.h | 21 -- 4 files changed, 36 insertions(+), 45 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 726c91ab6761..affaa0d4b636 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -29,6 +29,7 @@ #include #include #include "amdgpu_xgmi.h" +#include static const unsigned int compute_vmid_bitmap = 0xFF00; @@ -500,11 +501,13 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, metadata_size, _flags); if (flags) { - *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? - ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) + *flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM; + else + *flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT; if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) - *flags |= ALLOC_MEM_FLAGS_PUBLIC; + *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC; } out_put: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e4481caed648..c81fe7011e88 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -29,6 +29,7 @@ #include "amdgpu_vm.h" #include "amdgpu_amdkfd.h" #include "amdgpu_dma_buf.h" +#include /* BO flag to indicate a KFD userptr BO */ #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) @@ -400,18 +401,18 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) { struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); - bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT; + bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; uint32_t mapping_flags; mapping_flags = AMDGPU_VM_PAGE_READABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE) + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE) mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE; - if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE) + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE) mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; switch (adev->asic_type) { case CHIP_ARCTURUS: - if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) { + if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { if (bo_adev == adev) mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; @@ -1160,24 +1161,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( /* * Check on which domain to allocate BO */ - if (flags & ALLOC_MEM_FLAGS_VRAM) { + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ? + alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - } else if (flags & ALLOC_MEM_FLAGS_GTT) { + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; - } else if (flags & ALLOC_MEM_FLAGS_USERPTR) { + } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_CPU; alloc_flag
[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround
Because too many things are involved in this workaround, we need more comments to avoid pitfalls. Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 5 - .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c| 18 +++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 1947a326de57..10f6f4b21b44 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (r) goto gart_bind_fail; - /* Patch mtype of the second part BO */ + /* The memory type of the first page defaults to UC. Now +* modify the memory type to NC from the second page of +* the BO onward. +*/ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 436b7f518979..5b11190ff6e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -87,9 +87,21 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, int retval; struct kfd_mem_obj *mqd_mem_obj = NULL; - /* From V9, for CWSR, the control stack is located on the next page -* boundary after the mqd, we will use the gtt allocation function -* instead of sub-allocation function. + /* For V9 only, due to a HW bug, the control stack of a user mode +* compute queue needs to be allocated just behind the page boundary +* of its regular MQD buffer. So we allocate an enlarged MQD buffer: +* the first page of the buffer serves as the regular MQD buffer +* purpose and the remaining is for control stack. Although the two +* parts are in the same buffer object, they need different memory +* types: MQD part needs UC (uncached) as usual, while control stack +* needs NC (non coherent), which is different from the UC type which +* is used when control stack is allocated in user space. +* +* Because of all those, we use the gtt allocation function instead +* of sub-allocation function for this enlarged MQD buffer. Moreover, +* in order to achieve two memory types in a single buffer object, we +* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct +* amdgpu memory functions to do so. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround
Because too many things are involved in this workaround, we need more comments to avoid pitfalls. Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 17 ++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 660867cf2597..a6c8e4cfc051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (r) goto gart_bind_fail; - /* Patch mtype of the second part BO */ + /* The memory type of the first page defaults to UC. Now +* modify the memory type to NC from the second page of +* the BO onward. +*/ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 436b7f518979..ff2e84872721 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -87,9 +87,20 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, int retval; struct kfd_mem_obj *mqd_mem_obj = NULL; - /* From V9, for CWSR, the control stack is located on the next page -* boundary after the mqd, we will use the gtt allocation function -* instead of sub-allocation function. + /* For V9 only, due to a HW bug, the control stack of a user mode +* compute queue needs to be allocated just behind the page boundary +* of its regular MQD buffer. So we allocate an enlarged MQD buffer: +* the first page of the buffer serves as the regular MQD buffer +* purpose and the remaining is for control stack. Although the two +* parts are in the same buffer object, they need different memory +* type: MQD part needs UC (uncached) as usual, while control stack +* needs NC (non coherent). +* +* Because of all those, we use the gtt allocation function instead +* of sub-allocation function for this enlarged MQD buffer. Moreover, +* in order to achieve two memory types in a single buffer object, we +* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct +* amdgpu memory functions to do so. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround
Because two many things are involved in this workaround, we need more comments to avoid pitfalls. Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 5 - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 16 +--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 660867cf2597..a6c8e4cfc051 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, if (r) goto gart_bind_fail; - /* Patch mtype of the second part BO */ + /* The memory type of the first page defaults to UC. Now +* modify the memory type to NC from the second page of +* the BO onward. +*/ flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK; flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 436b7f518979..06a3d9ead510 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -87,9 +87,19 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, int retval; struct kfd_mem_obj *mqd_mem_obj = NULL; - /* From V9, for CWSR, the control stack is located on the next page -* boundary after the mqd, we will use the gtt allocation function -* instead of sub-allocation function. + /* For V9 only, due to a HW bug, the control stack of user mode +* compute queues needs to be allocated just behind the page boundary +* of its MQD buffer. So we allocate a enlarged MQD buffer: the first +* page of which serves as the regular MQD buffer purpose. Although +* the two parts are in the same buffer object, they need different +* memory type: MQD part needs UC (uncached) as usual, while control +* stack needs NC (non coherent). +* +* Because of all those, we use the gtt allocation function instead +* of sub-allocation function for this enlarged MQD buffer. Moreover, +* in order to achieve two memory types in a single buffer object, we +* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct +* amdgpu memory functions to do so. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdkfd: Make get_tile_config() generic
Given we can query all the asic specific information from amdgpu_gfx_config, we can make get_tile_config() generic. Change-Id: I1080fec4d50c51bc84bb49b0145f8fec50081fce Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h| 3 ++ .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 1 - .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 33 --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 26 --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 26 --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 23 - .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 -- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 22 + drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 +- .../gpu/drm/amd/include/kgd_kfd_interface.h | 4 --- 10 files changed, 26 insertions(+), 116 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index b0ad3be0b03f..13feb313e9b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -242,6 +242,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, + struct tile_config *config); + /* KGD2KFD callbacks */ int kgd2kfd_init(void); void kgd2kfd_exit(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 4bcc175a149d..d6549e5ea7e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -319,7 +319,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, - .get_tile_config = kgd_gfx_v9_get_tile_config, .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, .get_hive_id = amdgpu_amdkfd_get_hive_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index ca91fffb8a36..4ec6d0c03201 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -42,38 +42,6 @@ enum hqd_dequeue_request_type { SAVE_WAVES }; -/* Because of REG_GET_FIELD() being used, we put this function in the - * asic specific file. - */ -static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, - struct tile_config *config) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - - config->gb_addr_config = adev->gfx.config.gb_addr_config; -#if 0 -/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but - * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu - * changes commented out related code, doing the same here for now but - * need to sync with Ken et al - */ - config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFBANK); - config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg, - MC_ARB_RAMCFG, NOOFRANKS); -#endif - - config->tile_config_ptr = adev->gfx.config.tile_mode_array; - config->num_tile_configs = - ARRAY_SIZE(adev->gfx.config.tile_mode_array); - config->macro_tile_config_ptr = - adev->gfx.config.macrotile_mode_array; - config->num_macro_tile_configs = - ARRAY_SIZE(adev->gfx.config.macrotile_mode_array); - - return 0; -} - static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd) { return (struct amdgpu_device *)kgd; @@ -805,7 +773,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .address_watch_get_offset = kgd_address_watch_get_offset, .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info, - .get_tile_config = amdgpu_amdkfd_get_tile_config, .set_vm_context_page_table_base = set_vm_context_page_table_base, .get_hive_id = amdgpu_amdkfd_get_hive_id, .get_unique_id = amdgpu_amdkfd_get_unique_id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 8f052e98a3c6..0b7e78748540 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -84,31 +84,6 @@ union TCP_WATCH_CNTL_BITS { float f32All; }; -/* Because of REG_GET_FIELD() being used, we put this function in the - * asic specific file. - */ -static int get_tile_config(struct kgd_dev *kgd, - str
[PATCH 1/2] drm/amdgpu: Add num_banks and num_ranks to gfx config structure
The two members will be used by KFD later. Change-Id: I36a605e359b242f2fe546fb67f8e402c48a62342 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 5 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 5 + 3 files changed, 12 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ca17ffb01301..37ba05b63b2a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -151,6 +151,8 @@ struct amdgpu_gfx_config { unsigned num_gpus; unsigned multi_gpu_tile_size; unsigned mc_arb_ramcfg; + unsigned num_banks; + unsigned num_ranks; unsigned gb_addr_config; unsigned num_rbs; unsigned gs_vgt_table_depth; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 8f20a5dd44fe..172905dadf9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -4338,6 +4338,11 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; if (adev->flags & AMD_IS_APU) { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index fa245973de12..e63f98b2d389 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -1820,6 +1820,11 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG); mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg; + adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFBANK); + adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg, + MC_ARB_RAMCFG, NOOFRANKS); + adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes; adev->gfx.config.mem_max_burst_length_bytes = 256; if (adev->flags & AMD_IS_APU) { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Increase timout on emulator to tenfold instead of twice
Since emulators are slower, sometime some operations like flushing tlb through FM need more than twice the regular timout of 100ms, so increase the timeout to 1s on emulators. Change-Id: Idf1ff571dd2fe69d3a236d916262ad65f86c44e8 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 28a86752567e..8ef8a49b9255 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2834,7 +2834,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; if (amdgpu_emu_mode == 1) - adev->usec_timeout *= 2; + adev->usec_timeout *= 10; adev->gmc.gart_size = 512 * 1024 * 1024; adev->accel_working = false; adev->num_rings = 0; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 5/6] drm/amdkfd: Delete excessive printings
Those printings are duplicated or useless. Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index a3c44d88314b..958275db3f55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -297,8 +297,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - print_queue(q); - dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c604a2ede3f5..3bfa5c8d9654 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -257,7 +257,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -278,7 +277,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; case KFD_QUEUE_TYPE_DIQ: @@ -299,7 +297,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n", + pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/6] drm/amdkfd: Rename queue_count to active_queue_count
The name is easier to understand the code. Change-Id: I9064dab1d022e02780023131f940fff578a06b72 Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +-- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 4 +- .../amd/amdkfd/kfd_process_queue_manager.c| 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 80d22bf702e8..7ef9b89f5c70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -359,7 +359,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) - dqm->queue_count++; + dqm->active_queue_count++; if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; @@ -494,7 +494,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } qpd->queue_count--; if (q->properties.is_active) - dqm->queue_count--; + dqm->active_queue_count--; return retval; } @@ -563,13 +563,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) /* * check active state vs. the previous state and modify * counter accordingly. map_queues_cpsch uses the -* dqm->queue_count to determine whether a new runlist must be +* dqm->active_queue_count to determine whether a new runlist must be * uploaded. */ if (q->properties.is_active && !prev_active) - dqm->queue_count++; + dqm->active_queue_count++; else if (!q->properties.is_active && prev_active) - dqm->queue_count--; + dqm->active_queue_count--; if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); @@ -618,7 +618,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - dqm->queue_count--; + dqm->active_queue_count--; if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -662,7 +662,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - dqm->queue_count--; + dqm->active_queue_count--; } retval = execute_queues_cpsch(dqm, qpd->is_debug ? @@ -731,7 +731,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - dqm->queue_count++; + dqm->active_queue_count++; if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -786,7 +786,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - dqm->queue_count++; + dqm->active_queue_count++; } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -899,7 +899,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); - dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; dqm->xgmi_sdma_queue_count = 0; @@ -924,7 +924,7 @@ static void uninitialize(struct device_queue_manager *dqm) { int i; - WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) @@ -1064,7 +1064,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); - dqm->queue_count = dqm->processes_count = 0; + dqm->active_queue_count = dqm->processes_count = 0;
[PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions
The previous SDMA queue counting was wrong. In addition, after confirming with MEC firmware team, we understands that only one unmap queue package, instead of one unmap queue package for CP and each SDMA engine, is needed, which results in much simpler driver code. Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 - .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++-- 3 files changed, 29 insertions(+), 68 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 958275db3f55..692abfd2088a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_xgmi_sdma_engines; } +static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) +{ + return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines @@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.is_active) increment_queue_count(dqm, q->properties.type); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; - /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - dqm->xgmi_sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - } else { + else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); return -EINVAL; @@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->active_cp_queue_count = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, int bit; if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - if (dqm->sdma_bitmap == 0) + if (dqm->sdma_bitmap == 0) { + pr_err("No more SDMA queue to allocate\n"); return -ENOMEM; + } + bit = __ffs64(dqm->sdma_bitmap); dqm->sdma_bitmap &= ~(1ULL << bit); q->sdma_id = bit; @@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - if (dqm->xgmi_sdma_bitmap == 0) + if (dqm->xgmi_sdma_bitmap == 0) { + pr_err("No more XGMI SDMA queue to allocate\n"); return -ENOMEM; + } bit = __ffs64(dqm->xgmi_sdma_bitmap); dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); q->sdma_id = bit; @@ -1081,8 +1082,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->processes_count = 0; dqm->active_cp_queue_count = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_runlist = false;
[PATCH 2/6] drm/amdkfd: Avoid ambiguity by indicating it's cp queue
The queues represented in queue_bitmap are only CP queues. Change-Id: I7e6a75de39718d7c4da608166b85b9377d06d1b3 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 12 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c| 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8609287620ea..ebe4b8f88e79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -126,7 +126,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* this is going to have a few of the MSBs set that we need to * clear */ - bitmap_complement(gpu_resources.queue_bitmap, + bitmap_complement(gpu_resources.cp_queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); @@ -137,7 +137,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) - clear_bit(i, gpu_resources.queue_bitmap); + clear_bit(i, gpu_resources.cp_queue_bitmap); amdgpu_doorbell_get_kfd_info(adev, _resources.doorbell_physical_address, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7ef9b89f5c70..973581c2b401 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -78,14 +78,14 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) /* queue is available for KFD usage if bit is 1 */ for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) if (test_bit(pipe_offset + i, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) return true; return false; } -unsigned int get_queues_num(struct device_queue_manager *dqm) +unsigned int get_cp_queues_num(struct device_queue_manager *dqm) { - return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, + return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, KGD_MAX_QUEUES); } @@ -908,7 +908,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) if (test_bit(pipe_offset + queue, -dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) dqm->allocated_queues[pipe] |= 1 << queue; } @@ -1029,7 +1029,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) / dqm->dev->shared_resources.num_pipe_per_mec; - if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) + if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) continue; /* only acquire queues from the first MEC */ @@ -1979,7 +1979,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { if (!test_bit(pipe_offset + queue, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) continue; r = dqm->dev->kfd2kgd->hqd_dump( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index ee3400e92c30..3f0fb0d28c01 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -219,7 +219,7 @@ void device_queue_manager_init_v10_navi10( struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manag
[PATCH 3/6] drm/amdkfd: Count active CP queues directly
The previous code of calculating active CP queues is problematic if some SDMA queues are inactive. Fix that by counting CP queues directly. Change-Id: I5ffaa75a95cbebc984558199ba2f3db6909c52a9 Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 47 +-- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- 3 files changed, 35 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 973581c2b401..a3c44d88314b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -132,6 +132,22 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +void increment_queue_count(struct device_queue_manager *dqm, + enum kfd_queue_type type) +{ + dqm->active_queue_count++; + if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count++; +} + +void decrement_queue_count(struct device_queue_manager *dqm, + enum kfd_queue_type type) +{ + dqm->active_queue_count--; + if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count--; +} + static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; @@ -359,7 +375,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; @@ -494,7 +510,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } qpd->queue_count--; if (q->properties.is_active) - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); return retval; } @@ -567,9 +583,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) * uploaded. */ if (q->properties.is_active && !prev_active) - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); else if (!q->properties.is_active && prev_active) - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); @@ -618,7 +634,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -662,7 +678,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); } retval = execute_queues_cpsch(dqm, qpd->is_debug ? @@ -731,7 +747,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -786,7 +802,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -900,6 +916,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = d
[PATCH 4/6] drm/amdkfd: Fix a memory leak in queue creation error handling
When the queue creation failed, some resources were not freed. Fix it. Change-Id: Ia24b6ad31528dceddfd4d1c58bb1d22c35d3eabf Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index b62ee2e3344a..c604a2ede3f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -329,6 +329,9 @@ int pqm_create_queue(struct process_queue_manager *pqm, return retval; err_create_queue: + uninit_queue(q); + if (kq) + kernel_queue_uninit(kq, false); kfree(pqn); err_allocate_pqn: /* check if queues list is empty unregister process from device */ -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/6] drm/amdkfd: Only count active sdma queues
Please disregard the patch 5 and 6, as I have a new version for them. Yong On 2020-02-05 6:39 p.m., Yong Zhao wrote: One minor fix added. Yong On 2020-02-05 6:28 p.m., Yong Zhao wrote: The sdma_queue_count was only used for inferring whether we should unmap SDMA queues under HWS mode. In contrast, We mapped active queues rather than all in map_queues_cpsch(). In order to match the map and unmap for SDMA queues, we should just count active SDMA queues. Meanwhile, rename sdma_queue_count to active_sdma_queue_count to reflect the new usage. Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 --- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c | 16 +++--- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 064108cf493b..cf77b866054a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -138,6 +138,10 @@ void increment_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count++; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count++; } void decrement_queue_count(struct device_queue_manager *dqm, @@ -146,6 +150,10 @@ void decrement_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count--; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count--; } static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) @@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.is_active) increment_queue_count(dqm, q->properties.type); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; - /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - dqm->xgmi_sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - } else { + else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); return -EINVAL; @@ -916,8 +917,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->processes_count = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(>list, >queues_list); qpd->queue_count++; - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; -
[PATCH 1/3] drm/amdkfd: Delete excessive printings
Those printings are duplicated or useless. Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index a3c44d88314b..958275db3f55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -297,8 +297,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - print_queue(q); - dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index c604a2ede3f5..3bfa5c8d9654 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -257,7 +257,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -278,7 +277,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; case KFD_QUEUE_TYPE_DIQ: @@ -299,7 +297,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n", + pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdkfd: Fix bugs in SDMA queues mapping in HWS mode
The sdma_queue_count was only used for inferring whether we should unmap SDMA queues under HWS mode. In contrast, We only mapped active queues rather than all in map_queues_cpsch(). In order to match the map and unmap for SDMA queues, we should just count active SDMA queues. Moreover, previously in execute_queues_cpsch(), we determined whether to unmap SDMA queues based on active_sdma_queue_count. However, its value only reflectd the "to be mapped" SDMA queue count, rather than the "mapped" count, which actually should be used. For example, if there is a SDMA queue mapped and the application is destroying it, when the driver reaches unmap_queues_cpsch(), active_sdma_queue_count is already 0, so unmap_sdma_queues() won't be triggered, which is a bug. Fix the issue by recording whether we should call unmap_sdma_queues() in next execute_queues_cpsch() before mapping all queues. An optimization is also made. Previously whenever unmapping SDMA queues, the code would send one unmapping packet for each SDMA engine to CP firmware regardless whether there are SDMA queues mapped on that engine. By introducing used_sdma_engines_bitmap, which is calculated during mapping, we can just send only necessary engines during unmapping. Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 131 +- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 4 +- .../amd/amdkfd/kfd_process_queue_manager.c| 16 +-- 3 files changed, 71 insertions(+), 80 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 958275db3f55..3ca660acaa1d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) return dqm->dev->device_info->num_xgmi_sdma_engines; } +static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) +{ + return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm); +} + unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) { return dqm->dev->device_info->num_sdma_engines @@ -133,19 +138,27 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, } void increment_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct queue *q) { + enum kfd_queue_type type = q->properties.type; + dqm->active_queue_count++; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + else + dqm->used_queues_on_sdma[q->properties.sdma_engine_id]++; } void decrement_queue_count(struct device_queue_manager *dqm, - enum kfd_queue_type type) + struct queue *q) { + enum kfd_queue_type type = q->properties.type; + dqm->active_queue_count--; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + else + dqm->used_queues_on_sdma[q->properties.sdma_engine_id]--; } static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) @@ -373,12 +386,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) - increment_queue_count(dqm, q->properties.type); - - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; + increment_queue_count(dqm, q); /* * Unconditionally increment this counter, regardless of the queue's @@ -460,15 +468,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - dqm->xgmi_sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - } else { + else { pr_de
[PATCH 2/3] drm/amdgpu: Use MAX_SDMA_ENGINE_NUM instead of a number
MAX_SDMA_ENGINE_NUM will be used in more than one place. Change-Id: I99c84086ee62612b373c547a9d29bc4a69e7c72e Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h| 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h index 3fa18003d4d6..9d41d983a40f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -52,7 +52,7 @@ struct amdgpu_doorbell_index { uint32_t userqueue_end; uint32_t gfx_ring0; uint32_t gfx_ring1; - uint32_t sdma_engine[8]; + uint32_t sdma_engine[MAX_SDMA_ENGINE_NUM]; uint32_t ih; union { struct { diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 55750890b73f..3709d3603fb0 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h @@ -35,6 +35,7 @@ struct pci_dev; #define KGD_MAX_QUEUES 128 +#define MAX_SDMA_ENGINE_NUM 8 struct kfd_dev; struct kgd_dev; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 5/6] drm/amdkfd: Only count active sdma queues
One minor fix added. Yong On 2020-02-05 6:28 p.m., Yong Zhao wrote: The sdma_queue_count was only used for inferring whether we should unmap SDMA queues under HWS mode. In contrast, We mapped active queues rather than all in map_queues_cpsch(). In order to match the map and unmap for SDMA queues, we should just count active SDMA queues. Meanwhile, rename sdma_queue_count to active_sdma_queue_count to reflect the new usage. Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 --- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c| 16 +++--- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 064108cf493b..cf77b866054a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -138,6 +138,10 @@ void increment_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count++; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count++; } void decrement_queue_count(struct device_queue_manager *dqm, @@ -146,6 +150,10 @@ void decrement_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count--; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count--; } static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) @@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.is_active) increment_queue_count(dqm, q->properties.type); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; - /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - dqm->xgmi_sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - } else { + else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); return -EINVAL; @@ -916,8 +917,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->processes_count = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(>list, >queues_list); qpd->que
[PATCH 6/6] drm/amdkfd: Delete excessive printings
Those printings are duplicated or useless. Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 -- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index cf77b866054a..3bfdc9b251b3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -305,8 +305,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, struct mqd_manager *mqd_mgr; int retval; - print_queue(q); - dqm_lock(dqm); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 941b5876f19f..cf11f4dce98a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -253,7 +253,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -274,7 +273,6 @@ int pqm_create_queue(struct process_queue_manager *pqm, pqn->q = q; pqn->kq = NULL; retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); - pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; case KFD_QUEUE_TYPE_DIQ: @@ -295,7 +293,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, } if (retval != 0) { - pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n", + pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n", pqm->process->pasid, type, retval); goto err_create_queue; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 4/6] drm/amdkfd: Fix a memory leak in queue creation error handling
When the queue creation is failed, some resources were not freed. Fix it. Change-Id: Ia24b6ad31528dceddfd4d1c58bb1d22c35d3eabf Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index b62ee2e3344a..c604a2ede3f5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -329,6 +329,9 @@ int pqm_create_queue(struct process_queue_manager *pqm, return retval; err_create_queue: + uninit_queue(q); + if (kq) + kernel_queue_uninit(kq, false); kfree(pqn); err_allocate_pqn: /* check if queues list is empty unregister process from device */ -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/6] drm/amdkfd: Avoid ambiguity by indicating it's cp queue
The queues represented in queue_bitmap are only CP queues. Change-Id: I7e6a75de39718d7c4da608166b85b9377d06d1b3 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 4 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 12 ++-- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h| 2 +- drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_topology.c| 2 +- drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 2 +- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 8609287620ea..ebe4b8f88e79 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -126,7 +126,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) /* this is going to have a few of the MSBs set that we need to * clear */ - bitmap_complement(gpu_resources.queue_bitmap, + bitmap_complement(gpu_resources.cp_queue_bitmap, adev->gfx.mec.queue_bitmap, KGD_MAX_QUEUES); @@ -137,7 +137,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i) - clear_bit(i, gpu_resources.queue_bitmap); + clear_bit(i, gpu_resources.cp_queue_bitmap); amdgpu_doorbell_get_kfd_info(adev, _resources.doorbell_physical_address, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 7ef9b89f5c70..973581c2b401 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -78,14 +78,14 @@ static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) /* queue is available for KFD usage if bit is 1 */ for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) if (test_bit(pipe_offset + i, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) return true; return false; } -unsigned int get_queues_num(struct device_queue_manager *dqm) +unsigned int get_cp_queues_num(struct device_queue_manager *dqm) { - return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, + return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap, KGD_MAX_QUEUES); } @@ -908,7 +908,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) if (test_bit(pipe_offset + queue, -dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) dqm->allocated_queues[pipe] |= 1 << queue; } @@ -1029,7 +1029,7 @@ static int set_sched_resources(struct device_queue_manager *dqm) mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) / dqm->dev->shared_resources.num_pipe_per_mec; - if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) + if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap)) continue; /* only acquire queues from the first MEC */ @@ -1979,7 +1979,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data) for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { if (!test_bit(pipe_offset + queue, - dqm->dev->shared_resources.queue_bitmap)) + dqm->dev->shared_resources.cp_queue_bitmap)) continue; r = dqm->dev->kfd2kgd->hqd_dump( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index ee3400e92c30..3f0fb0d28c01 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -219,7 +219,7 @@ void device_queue_manager_init_v10_navi10( struct device_queue_manager_asic_ops *asic_ops); void program_sh_mem_settings(struct device_queue_manag
[PATCH 5/6] drm/amdkfd: Only count active sdma queues
The sdma_queue_count was only used for inferring whether we should unmap SDMA queues under HWS mode. In contrast, We mapped active queues rather than all in map_queues_cpsch(). In order to match the map and unmap for SDMA queues, we should just count active SDMA queues. Meanwhile, rename sdma_queue_count to active_sdma_queue_count to reflect the new usage. Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 --- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 5 +- .../amd/amdkfd/kfd_process_queue_manager.c| 16 +++--- 3 files changed, 31 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 064108cf493b..cf77b866054a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -138,6 +138,10 @@ void increment_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count++; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count++; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count++; } void decrement_queue_count(struct device_queue_manager *dqm, @@ -146,6 +150,10 @@ void decrement_queue_count(struct device_queue_manager *dqm, dqm->active_queue_count--; if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) dqm->active_cp_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA) + dqm->active_sdma_queue_count--; + else if (type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->active_xgmi_sdma_queue_count--; } static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) @@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (q->properties.is_active) increment_queue_count(dqm, q->properties.type); - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; - /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. @@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; - if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { + if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) deallocate_hqd(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { - dqm->sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) deallocate_sdma_queue(dqm, q); - } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { - dqm->xgmi_sdma_queue_count--; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - } else { + else { pr_debug("q->properties.type %d is invalid\n", q->properties.type); return -EINVAL; @@ -916,8 +917,8 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); dqm->active_queue_count = dqm->processes_count = 0; - dqm->sdma_queue_count = 0; - dqm->xgmi_sdma_queue_count = 0; + dqm->active_sdma_queue_count = 0; + dqm->active_xgmi_sdma_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(>list, >queues_list); qpd->queue_count++; - if (q->properties.type == KFD_QUEUE_TYPE_S
[PATCH 3/6] drm/amdkfd: Count active CP queues directly
The previous code of calculating active CP queues is problematic if some SDMA queues are inactive. Fix that by counting CP queues directly. Change-Id: I5ffaa75a95cbebc984558199ba2f3db6909c52a9 Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +-- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 1 + .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 973581c2b401..064108cf493b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -132,6 +132,22 @@ void program_sh_mem_settings(struct device_queue_manager *dqm, qpd->sh_mem_bases); } +void increment_queue_count(struct device_queue_manager *dqm, + enum kfd_queue_type type) +{ + dqm->active_queue_count++; + if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count++; +} + +void decrement_queue_count(struct device_queue_manager *dqm, + enum kfd_queue_type type) +{ + dqm->active_queue_count--; + if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ) + dqm->active_cp_queue_count--; +} + static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) { struct kfd_dev *dev = qpd->dqm->dev; @@ -359,7 +375,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; @@ -494,7 +510,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } qpd->queue_count--; if (q->properties.is_active) - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); return retval; } @@ -567,9 +583,9 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) * uploaded. */ if (q->properties.is_active && !prev_active) - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); else if (!q->properties.is_active && prev_active) - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); @@ -618,7 +634,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -662,7 +678,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - dqm->active_queue_count--; + decrement_queue_count(dqm, q->properties.type); } retval = execute_queues_cpsch(dqm, qpd->is_debug ? @@ -731,7 +747,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -786,7 +802,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - dqm->active_queue_count++; + increment_queue_count(dqm, q->properties.type); } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -1158,7 +1174,7 @@ static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, dqm->total_queue_count); list_add(>list, >
[PATCH 1/6] drm/amdkfd: Rename queue_count to active_queue_count
The name is easier to understand the code. Change-Id: I9064dab1d022e02780023131f940fff578a06b72 Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +-- .../drm/amd/amdkfd/kfd_device_queue_manager.h | 2 +- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 4 +- .../amd/amdkfd/kfd_process_queue_manager.c| 2 +- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 80d22bf702e8..7ef9b89f5c70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -359,7 +359,7 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, list_add(>list, >queues_list); qpd->queue_count++; if (q->properties.is_active) - dqm->queue_count++; + dqm->active_queue_count++; if (q->properties.type == KFD_QUEUE_TYPE_SDMA) dqm->sdma_queue_count++; @@ -494,7 +494,7 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, } qpd->queue_count--; if (q->properties.is_active) - dqm->queue_count--; + dqm->active_queue_count--; return retval; } @@ -563,13 +563,13 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) /* * check active state vs. the previous state and modify * counter accordingly. map_queues_cpsch uses the -* dqm->queue_count to determine whether a new runlist must be +* dqm->active_queue_count to determine whether a new runlist must be * uploaded. */ if (q->properties.is_active && !prev_active) - dqm->queue_count++; + dqm->active_queue_count++; else if (!q->properties.is_active && prev_active) - dqm->queue_count--; + dqm->active_queue_count--; if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); @@ -618,7 +618,7 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = false; - dqm->queue_count--; + dqm->active_queue_count--; if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -662,7 +662,7 @@ static int evict_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = false; - dqm->queue_count--; + dqm->active_queue_count--; } retval = execute_queues_cpsch(dqm, qpd->is_debug ? @@ -731,7 +731,7 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( q->properties.type)]; q->properties.is_active = true; - dqm->queue_count++; + dqm->active_queue_count++; if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -786,7 +786,7 @@ static int restore_process_queues_cpsch(struct device_queue_manager *dqm, continue; q->properties.is_active = true; - dqm->queue_count++; + dqm->active_queue_count++; } retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); @@ -899,7 +899,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); - dqm->queue_count = dqm->next_pipe_to_allocate = 0; + dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->sdma_queue_count = 0; dqm->xgmi_sdma_queue_count = 0; @@ -924,7 +924,7 @@ static void uninitialize(struct device_queue_manager *dqm) { int i; - WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); + WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); kfree(dqm->allocated_queues); for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) @@ -1064,7 +1064,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) mutex_init(>lock_hidden); INIT_LIST_HEAD(>queues); - dqm->queue_count = dqm->processes_count = 0; + dqm->active_queue_count = dqm->processes_count = 0;
Re: [PATCH] drm/amdkfd: Fix a bug in SDMA RLC queue counting under HWS mode
True. It is a bug too. I am looking into it. Yong On 2020-01-30 5:51 p.m., Felix Kuehling wrote: On 2020-01-30 17:29, Yong Zhao wrote: The sdma_queue_count increment should be done before execute_queues_cpsch(), which calls pm_calc_rlib_size() where sdma_queue_count is used to calculate whether over_subscription is triggered. With the previous code, when a SDMA queue is created, compute_queue_count in pm_calc_rlib_size() is one more than the actual compute queue number, because the queue_count has been incremented while sdma_queue_count has not. This patch fixes that. Change-Id: I20353e657efd505353d0dd9f7eb2fab5085e7202 Signed-off-by: Yong Zhao Reviewed-by: Felix Kuehling But I took a look at pm_calc_rlib_size. I don't think subtracting dqm->sdma_queue_count from dqm->queue_count is not quite correct, because sdma_queue_count counts all SDMA queues, while queue_count only counts active queues. So an application that creates inactive SDMA queues will also create errors here. We probably need to count active compute and active SDMA queues separately in DQM to fix this properly. Regards, Felix --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2870553a2ce0..80d22bf702e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1237,16 +1237,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(>list, >queues_list); qpd->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; + if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Fix a bug in SDMA RLC queue counting under HWS mode
The sdma_queue_count increment should be done before execute_queues_cpsch(), which calls pm_calc_rlib_size() where sdma_queue_count is used to calculate whether over_subscription is triggered. With the previous code, when a SDMA queue is created, compute_queue_count in pm_calc_rlib_size() is one more than the actual compute queue number, because the queue_count has been incremented while sdma_queue_count has not. This patch fixes that. Change-Id: I20353e657efd505353d0dd9f7eb2fab5085e7202 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 2870553a2ce0..80d22bf702e8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1237,16 +1237,18 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, list_add(>list, >queues_list); qpd->queue_count++; + + if (q->properties.type == KFD_QUEUE_TYPE_SDMA) + dqm->sdma_queue_count++; + else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) + dqm->xgmi_sdma_queue_count++; + if (q->properties.is_active) { dqm->queue_count++; retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); } - if (q->properties.type == KFD_QUEUE_TYPE_SDMA) - dqm->sdma_queue_count++; - else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) - dqm->xgmi_sdma_queue_count++; /* * Unconditionally increment this counter, regardless of the queue's * type or whether the queue is active. -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore
Reviewed-by: Yong Zhao On 2020-01-17 8:37 p.m., Felix Kuehling wrote: Use a more meaningful variable name for the invalidation request that is distinct from the tmp variable that gets overwritten when acquiring the invalidation semaphore. Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in gmc9/gmc10") Signed-off-by: Felix Kuehling --- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 8 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 86f4ffe408e7..d914555e1212 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub); struct amdgpu_vmhub *hub = >vmhub[vmhub]; - u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type); + u32 tmp; /* Use register 17 for GART */ const unsigned eng = 17; unsigned int i; @@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 54bdc1786ab1..6d95de1413c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, { bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub); const unsigned eng = 17; - u32 j, tmp; + u32 j, inv_req, tmp; struct amdgpu_vmhub *hub; BUG_ON(vmhub >= adev->num_vmhubs); hub = >vmhub[vmhub]; - tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); + inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type); /* This is necessary for a HW workaround under SRIOV as well * as GFXOFF under bare metal @@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, uint32_t req = hub->vm_inv_eng0_req + eng; uint32_t ack = hub->vm_inv_eng0_ack + eng; - amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid); return; } @@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Timeout waiting for sem acquire in VM flush!\n"); } - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req); /* * Issue a dummy read to wait for the ACK register to be cleared ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Fix a compiling warning
The warning was introduced in commit b4267f2e687ff5e0402ab915c9a4d47f9a4eb73e Author: Yong Zhao Date: Fri Dec 13 11:31:48 2019 -0500 drm/amdkfd: Improve function get_sdma_rlc_reg_offset() Change-Id: I87da4f1ad8a190327a4a71f0ff78812cb942d6e0 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 2b26925623eb..f9011a07cb90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,7 +71,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base; + uint32_t sdma_engine_reg_base = 0; uint32_t sdma_rlc_reg_offset; switch (engine_id) { -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Add a message when SW scheduler is used
SW scheduler is previously called non HW scheduler, or non HWS. This message is useful when triaging issues from dmesg. Change-Id: I625518c88c043df5f60409d1ca520e7fc032251f Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d7eb6ac37f62..2870553a2ce0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm) static int start_nocpsch(struct device_queue_manager *dqm) { + pr_info("SW scheduler is used"); init_interrupts(dqm); if (dqm->dev->device_info->asic_family == CHIP_HAWAII) -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()
The SOC15_REG_OFFSET() macro needs to dereference adev->reg_offset[IP] pointer, which is NULL when there are fewer than 8 sdma engines. Avoid that by not initializing the array regardless. Change-Id: Iabae9bff7546b344720905d5d4a5cfc066a79d25 Signed-off-by: Yong Zhao --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 65 --- 1 file changed, 43 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3c119407dc34..2b26925623eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,32 +71,53 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, -mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, -mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, -mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, -mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, -mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, -mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, -mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, -mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; - - uint32_t retval = sdma_engine_reg_base[engine_id] + uint32_t sdma_engine_reg_base; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + default: + break; + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
One comment inline. On 2020-01-02 4:11 p.m., Alex Sierra wrote: [Why] Avoid reclaim filesystem while eviction lock is held called from MMU notifier. [How] Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked. Using memalloc_nofs_save / memalloc_nofs_restore API. Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 6 +++- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b999b67ff57a..d6aba4f9df74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -82,6 +82,32 @@ struct amdgpu_prt_cb { struct dma_fence_cb cb; }; +/** + * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(>eviction_lock); + vm->saved_flags = memalloc_nofs_save(); [yz] I feel memalloc_nofs_save() should be called before mutex_lock(). Not too sure though. +} + +static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(>eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} + +static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(>eviction_lock); +} + /** * amdgpu_vm_level_shift - return the addr shift for each level * @@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(>eviction_lock); + amdgpu_vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(>eviction_lock); + amdgpu_vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(>eviction_lock); + amdgpu_vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(, fence); error_unlock: - mutex_unlock(>eviction_lock); + amdgpu_vm_eviction_unlock(vm); return r; } @@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(_base->vm->eviction_lock)) + if (!amdgpu_vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(_base->vm->eviction_lock); + amdgpu_vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 100547f094ff..c21a36bebc0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -242,9 +243,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables +* use vm_eviction_lock/unlock(vm) +*/ struct mutexeviction_lock; boolevicting; + unsigned intsaved_flags; /* BOs who needs a validation */ struct list_headevicted; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
See one inline comment. Other than that: Acked-by: Yong Zhao On 2020-01-02 4:11 p.m., Alex Sierra wrote: This can be used directly from amdgpu and amdkfd to invalidate TLB through pasid. It supports gmc v7, v8, v9 and v10. Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 87 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 + drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 89 + 5 files changed, 249 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b499a3de8bb6..b6413a56f546 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +219,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f5725336a5f2..11a2252e60f6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) ++ vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = >gfx.kiq.ring; + + spin_lock(>gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); + amdgpu_fence_emit_polling(ring, ); + amdgpu_ring_commit(ring); + spin_unlock(>gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +425,47 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t v
Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
On 2019-12-20 6:50 p.m., Yong Zhao wrote: Inline. On 2019-12-20 4:35 p.m., Felix Kuehling wrote: On 2019-12-20 1:24, Alex Sierra wrote: [Why] TLB flush method has been deprecated using kfd2kgd interface. This implementation is now on the amdgpu_amdkfd API. [How] TLB flush functions now implemented in amdgpu_amdkfd. Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7 Signed-off-by: Alex Sierra Looks good to me. See my comment about the TODO inline. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 -- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..b7f6e70c5762 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + /* TODO: condition missing for FAMILY above NV */ I'm not sure what's missing here. NV and above don't need any special treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB needs to be flushed. Regards, Felix + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } This if else can be unified by for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); The all_hub parameter can be inferred from num_vmhubs in flush_gpu_tlb_pasid(), so it can be optimized out here. +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 536a153ac9a4..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728sdata=fNTunmAJObxfgbJBlNWWXu
Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid
On 2019-12-20 1:24 a.m., Alex Sierra wrote: This can be used directly from amdgpu and amdkfd to invalidate TLB through pasid. It supports gmc v7, v8, v9 and v10. Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490 Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 81 drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c | 33 ++ drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c | 34 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 84 + 5 files changed, 238 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index b499a3de8bb6..b6413a56f546 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid, uint32_t vmhub, uint32_t flush_type); + /* flush the vm tlb via pasid */ + int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid, + uint32_t flush_type, bool all_hub); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -216,6 +219,9 @@ struct amdgpu_gmc { }; #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type))) +#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \ + ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \ + ((adev), (pasid), (type), (allhub))) #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) #define amdgpu_gmc_map_mtype(adev, flags) (adev)->gmc.gmc_funcs->map_mtype((adev),(flags)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f5725336a5f2..b1a5408a8d7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -30,6 +30,8 @@ #include "hdp/hdp_5_0_0_sh_mask.h" #include "gc/gc_10_1_0_sh_mask.h" #include "mmhub/mmhub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_sh_mask.h" +#include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" #include "dcn/dcn_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" @@ -37,6 +39,7 @@ #include "navi10_enum.h" #include "soc15.h" +#include "soc15d.h" #include "soc15_common.h" #include "nbio_v2_3.h" @@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct amdgpu_device *adev, (!amdgpu_sriov_vf(adev))); } +static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info( + struct amdgpu_device *adev, + uint8_t vmid, uint16_t *p_pasid) +{ + uint32_t value; + + value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) ++ vmid); + *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK; + + return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK); +} + +static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev, + uint16_t pasid, uint32_t flush_type, + bool all_hub) +{ + signed long r; + uint32_t seq; + struct amdgpu_ring *ring = >gfx.kiq.ring; + + spin_lock(>gfx.kiq.ring_lock); + amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/ + amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0)); + amdgpu_ring_write(ring, + PACKET3_INVALIDATE_TLBS_DST_SEL(1) | + PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) | + PACKET3_INVALIDATE_TLBS_PASID(pasid) | + PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type)); + amdgpu_fence_emit_polling(ring, ); + amdgpu_ring_commit(ring); + spin_unlock(>gfx.kiq.ring_lock); + + r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout); + if (r < 1) { + DRM_ERROR("wait for kiq fence error: %ld.\n", r); + return -ETIME; + } + + return 0; +} + /* * GART * VMID 0 is the physical GPU addresses as used by the kernel. @@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); } +/** + * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid + * + * @adev: amdgpu_device pointer + * @pasid: pasid to be flush + * + * Flush the TLB for the requested pasid. + */
Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd
Inline. On 2019-12-20 4:35 p.m., Felix Kuehling wrote: On 2019-12-20 1:24, Alex Sierra wrote: [Why] TLB flush method has been deprecated using kfd2kgd interface. This implementation is now on the amdgpu_amdkfd API. [How] TLB flush functions now implemented in amdgpu_amdkfd. Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7 Signed-off-by: Alex Sierra Looks good to me. See my comment about the TODO inline. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 -- 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index d3da9dde4ee1..b7f6e70c5762 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + /* TODO: condition missing for FAMILY above NV */ I'm not sure what's missing here. NV and above don't need any special treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB needs to be flushed. Regards, Felix + if (adev->family == AMDGPU_FAMILY_AI) { + int i; + + for (i = 0; i < adev->num_vmhubs; i++) + amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + } else { + amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0); + } This if else can be unified by for (i = 0; i < adev->num_vmhubs; i++) amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0); + + return 0; +} + +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + uint32_t flush_type = 0; + bool all_hub = false; + + if (adev->gmc.xgmi.num_physical_nodes && + adev->asic_type == CHIP_VEGA20) + flush_type = 2; + + if (adev->family == AMDGPU_FAMILY_AI) + all_hub = true; + + return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub); +} + bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 069d5d230810..47b0f2957d1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t *ib_cmd, uint32_t ib_len); void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle); bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd); +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t vmid); +int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t pasid); bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 536a153ac9a4..25b90f70aecd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -32,6 +32,7 @@ #include #include #include "amdgpu_amdkfd.h" +#include "amdgpu.h" struct mm_struct; @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, void kfd_flush_tlb(struct kfd_process_device *pdd) { struct kfd_dev *dev = pdd->dev; - const struct kfd2kgd_calls *f2g = dev->kfd2kgd; if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { /* Nothing to flush until a VMID is assigned, which * only happens when the first queue is created. */ if (pdd->qpd.vmid) - f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid); + amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd, + pdd->qpd.vmid); } else { - f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid); + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd, + pdd->process->pasid); } } ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cyong.zhao%40amd.com%7C196afdae9b69425e58aa08d78594927a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124745521063472sdata=5ZZNtq%2FyOZX%2BdQrG1ydoidOU90ZrbWGz9tnuycEg4F4%3Dreserved=0 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock
One style comment inline. Yong On 2019-12-20 1:24 a.m., Alex Sierra wrote: [Why] Avoid reclaim filesystem while eviction lock is held called from MMU notifier. [How] Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked. Using memalloc_nofs_save / memalloc_nofs_restore API. Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c Signed-off-by: Alex Sierra --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b999b67ff57a..b36daa6230fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, } } - mutex_lock(>eviction_lock); + vm_eviction_lock(vm); vm->evicting = false; - mutex_unlock(>eviction_lock); + vm_eviction_unlock(vm); return 0; } @@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - mutex_lock(>eviction_lock); + vm_eviction_lock(vm); if (vm->evicting) { r = -EBUSY; goto error_unlock; @@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, r = vm->update_funcs->commit(, fence); error_unlock: - mutex_unlock(>eviction_lock); + vm_eviction_unlock(vm); return r; } @@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) return false; /* Try to block ongoing updates */ - if (!mutex_trylock(_base->vm->eviction_lock)) + if (!vm_eviction_trylock(bo_base->vm)) return false; /* Don't evict VM page tables while they are updated */ if (!dma_fence_is_signaled(bo_base->vm->last_direct) || !dma_fence_is_signaled(bo_base->vm->last_delayed)) { - mutex_unlock(_base->vm->eviction_lock); + vm_eviction_unlock(bo_base->vm); return false; } bo_base->vm->evicting = true; - mutex_unlock(_base->vm->eviction_lock); + vm_eviction_unlock(bo_base->vm); return true; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 100547f094ff..d35aa76469ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,6 +30,7 @@ #include #include #include +#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -242,9 +243,12 @@ struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; - /* Lock to prevent eviction while we are updating page tables */ + /* Lock to prevent eviction while we are updating page tables +* use vm_eviction_lock/unlock(vm) +*/ struct mutexeviction_lock; boolevicting; + unsigned intsaved_flags; [yz] The tabs should be used here instead of spaces. /* BOs who needs a validation */ struct list_headevicted; @@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); +/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS + * happens while holding this lock anywhere to prevent deadlocks when + * an MMU notifier runs in reclaim-FS context. + */ +static inline void vm_eviction_lock(struct amdgpu_vm *vm) +{ + mutex_lock(>eviction_lock); + vm->saved_flags = memalloc_nofs_save(); +} +static inline int vm_eviction_trylock(struct amdgpu_vm *vm) +{ + if (mutex_trylock(>eviction_lock)) { + vm->saved_flags = memalloc_nofs_save(); + return 1; + } + return 0; +} +static inline void vm_eviction_unlock(struct amdgpu_vm *vm) +{ + memalloc_nofs_restore(vm->saved_flags); + mutex_unlock(>eviction_lock); +} #endif ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()
This prevents the NULL pointer access when there are fewer than 8 sdma engines. Change-Id: Iabae9bff7546b344720905d5d4a5cfc066a79d25 Signed-off-by: Yong Zhao --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 64 --- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3c119407dc34..2ad088f10493 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -71,32 +71,52 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device *adev, unsigned int engine_id, unsigned int queue_id) { - uint32_t sdma_engine_reg_base[8] = { - SOC15_REG_OFFSET(SDMA0, 0, -mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA1, 0, -mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA2, 0, -mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA3, 0, -mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA4, 0, -mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA5, 0, -mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA6, 0, -mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL, - SOC15_REG_OFFSET(SDMA7, 0, -mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL - }; - - uint32_t retval = sdma_engine_reg_base[engine_id] + uint32_t sdma_engine_reg_base; + uint32_t sdma_rlc_reg_offset; + + switch (engine_id) { + case 0: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0, + mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL; + break; + case 1: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0, + mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL; + break; + case 2: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0, + mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL; + break; + case 3: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0, + mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL; + break; + case 4: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0, + mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL; + break; + case 5: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0, + mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL; + break; + case 6: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0, + mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL; + break; + case 7: + sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0, + mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL; + break; + + } + + sdma_rlc_reg_offset = sdma_engine_reg_base + queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL); pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id, - queue_id, retval); + queue_id, sdma_rlc_reg_offset); - return retval; + return sdma_rlc_reg_offset; } static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()
The first one was already fixed and pushed a week ago. Regards, Yong On 2019-12-12 7:25 p.m., Felix Kuehling wrote: I agree with Christian's comments on patch 1. With those fixed, the series is Reviewed-by: Felix Kuehling Regards, Felix On 2019-12-02 20:42, Yong Zhao wrote: Since Arcturus has it own function pointer, we can move Arcturus specific logic to there rather than leaving it entangled with other GFX9 chips. Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7 Signed-off-by: Yong Zhao --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 20 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++-- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 -- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index b6713e0ed1b2..3c119407dc34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -46,6 +46,8 @@ #include "soc15.h" #include "soc15d.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v9_4.h" #define HQD_N_REGS 56 #define DUMP_REG(addr) do { \ @@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } +static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); + + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); +} + const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, - .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 6f1a4676ddde..e7861f0ef415 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -40,7 +40,6 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" -#include "mmhub_v9_4.h" enum hqd_dequeue_request_type { @@ -758,8 +757,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, + uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -769,14 +768,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi return; } - /* TODO: take advantage of per-process address space size. For - * now, all processes share the same address space size, like - * on GFX8 and older. - */ - if (adev->asic_type == CHIP_ARCTURUS) { - mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); - } else - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index d9e9ad22b2bd..02b1426d17d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, _
[PATCH] drm/amdgpu: Add CU info print log
The log will be useful for easily getting the CU info on various emulation models or ASICs. Change-Id: Ic1c914938aa3445d8dbfdf6a237bc1d58b0d5267 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8992506541d8..c778b6db5e42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3032,6 +3032,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } + DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se, + adev->gfx.config.max_cu_per_sh, + adev->gfx.cu_info.number); + adev->accel_working = true; amdgpu_vm_check_compute_bug(adev); -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Add CU info print log
The log will be useful for easily getting the CU info on various emulation models or ASICs. Change-Id: Ic1c914938aa3445d8dbfdf6a237bc1d58b0d5267 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 6 ++ drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++ 7 files changed, 22 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8992506541d8..df9732510012 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1529,6 +1529,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); + + DRM_INFO("gpu_info: SE %d, SH per SE %d, CU per SH %d\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se, + adev->gfx.config.max_cu_per_sh); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gpu_info_fw->gc_num_tccs); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f95092741c38..8001a067700c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -388,6 +388,12 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev) adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) + le32_to_cpu(gc_info->gc_num_wgp1_per_sa)); adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se); + + DRM_INFO("discovery: SE %d, SH per SE %d, CU per SH %d\n", + adev->gfx.config.max_shader_engines, + adev->gfx.config.max_sh_per_se, + adev->gfx.config.max_cu_per_sh); + adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se); adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c); adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 8cdef79de9d4..a26892e71680 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -5432,6 +5432,8 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_mask = ao_cu_mask; cu_info->simd_per_cu = NUM_SIMD_PER_CU; + DRM_INFO("active_cu_number: %d\n", cu_info->number); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 95bb2422b27c..bb05a94690d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -3620,6 +3620,8 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev) cu_info->number = active_cu_number; cu_info->ao_cu_mask = ao_cu_mask; + + DRM_INFO("active_cu_number: %d\n", cu_info->number); } const struct amdgpu_ip_block_version gfx_v6_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 648d767d14e7..6d16216d5c7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5159,6 +5159,8 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev) cu_info->max_scratch_slots_per_cu = 32; cu_info->wave_front_size = 64; cu_info->lds_size = 64; + + DRM_INFO("active_cu_number: %d\n", cu_info->number); } const struct amdgpu_ip_block_version gfx_v7_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 8b9f440688ed..1073eb5c3cec 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -7141,6 +7141,8 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev) cu_info->max_scratch_slots_per_cu = 32; cu_info->wave_front_size = 64; cu_info->lds_size = 64; + + DRM_INFO("active_cu_number: %d\n
Re: [PATCH] drm/amdgpu/gfx: Improvement on EDC GPR workarounds
Not sure whether we should add the issue ticket info here. Reviewed-by: Yong Zhao On 2019-12-03 3:45 p.m., James Zhu wrote: SPI limits total CS waves in flight per SE to no more than 32 * num_cu and we need to stuff 40 waves on a CU to completely clean the SGPR. This is accomplished in the WR by cleaning the SE in two steps, half of the CU per step. Signed-off-by: James Zhu --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 83 ++- 1 file changed, 63 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 6a251a3..147c08f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3938,24 +3938,37 @@ static const struct soc15_reg_entry vgpr_init_regs[] = { { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x17f }, /* VGPRS=15 (256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x40 }, /* 64KB LDS */ }; -static const struct soc15_reg_entry sgpr_init_regs[] = { - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* CU_GROUP_COUNT=1 */ - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 }, +static const struct soc15_reg_entry sgpr1_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x00ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x00ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x00ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x00ff }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, +}; + +static const struct soc15_reg_entry sgpr2_init_regs[] = { + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xff00 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 }, { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 }, - { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 GPRS) */ + { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */ { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 }, }; @@ -4065,7 +4078,9 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) total_size = ((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4; total_size += - ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4; + ((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4; + total_size += + ((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4; total_size = ALIGN(total_size, 256); vgpr_offset = total_size; total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256); @@ -4108,7 +4123,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) /* write dispatch packet */ ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); - ib.ptr[ib.length_dw++] = 256; /* x */ + ib.ptr[ib.length_dw++] = 0x40*2; /* x */ + ib.ptr[ib.length_dw++] = 1; /* y */ + ib.ptr[ib.length_dw++] = 1; /* z */ + ib.ptr[ib.length_dw
[PATCH] drm/amdkfd: Contain MMHUB number in mmhub_v9_4_setup_vm_pt_regs()
Adjust the exposed function prototype so that the caller does not need to know the MMHUB number. Change-Id: I4420d1715984f703954f074682b075fc59e2a330 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 8 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 14 -- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h | 2 ++ 4 files changed, 16 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 47c853ef1051..6f1a4676ddde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -40,7 +40,7 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" -#include "gmc_v9_0.h" +#include "mmhub_v9_4.h" enum hqd_dequeue_request_type { @@ -774,9 +774,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi * on GFX8 and older. */ if (adev->asic_type == CHIP_ARCTURUS) { - /* Two MMHUBs */ - mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); - mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); } else mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 971c0840358f..49e8be761214 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -36,12 +36,4 @@ extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; - -/* amdgpu_amdkfd*.c */ -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, - uint32_t vmid, uint64_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 8599bfdb9a9e..d9301e80522a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -54,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) return base; } -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, +static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to @@ -80,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, { uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -101,6 +101,16 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, (u32)(adev->gmc.gart_end >> 44)); } +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + int i; + + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid, + page_table_base); +} + static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, int hubid) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h index 354a4b7e875b..1b979773776c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -34,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev); int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdkfd: Contain MMHUB number in the implementation
Adjust the exposed function prototype so that the caller does not need to know the MMHUB number. Change-Id: I4420d1715984f703954f074682b075fc59e2a330 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 6 ++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h | 8 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 13 +++-- drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h | 2 ++ 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 47c853ef1051..6f1a4676ddde 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -40,7 +40,7 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" -#include "gmc_v9_0.h" +#include "mmhub_v9_4.h" enum hqd_dequeue_request_type { @@ -774,9 +774,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi * on GFX8 and older. */ if (adev->asic_type == CHIP_ARCTURUS) { - /* Two MMHUBs */ - mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base); - mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base); + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); } else mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index 971c0840358f..49e8be761214 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -36,12 +36,4 @@ extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; - -/* amdgpu_amdkfd*.c */ -void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, - uint64_t value); -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, - uint32_t vmid, uint64_t value); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index 8599bfdb9a9e..0b621bf8bbd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -54,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev) return base; } -void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid, +static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int hubid, uint32_t vmid, uint64_t value) { /* two registers distance between mmVML2VC0_VM_CONTEXT0_* to @@ -80,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, { uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); - mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base); + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base); WREG32_SOC15_OFFSET(MMHUB, 0, mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -101,6 +101,15 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct amdgpu_device *adev, (u32)(adev->gmc.gart_end >> 44)); } +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) +{ + int i; + for (i = 0; i < MMHUB_NUM_INSTANCES; i++) { + mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid, page_table_base); + } +} + static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev, int hubid) { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h index 354a4b7e875b..1b979773776c 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h @@ -34,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev); int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags); +void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()
Since Arcturus has it own function pointer, we can move Arcturus specific logic to there rather than leaving it entangled with other GFX9 chips. Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7 Signed-off-by: Yong Zhao --- .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 20 ++- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++-- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h | 2 -- 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index b6713e0ed1b2..3c119407dc34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -46,6 +46,8 @@ #include "soc15.h" #include "soc15d.h" #include "amdgpu_amdkfd_gfx_v9.h" +#include "gfxhub_v1_0.h" +#include "mmhub_v9_4.h" #define HQD_N_REGS 56 #define DUMP_REG(addr) do {\ @@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd, return 0; } +static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, + uint64_t page_table_base) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + + if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { + pr_err("trying to set page table base for wrong VMID %u\n", + vmid); + return; + } + + mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); + + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); +} + const struct kfd2kgd_calls arcturus_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .get_tile_config = kgd_gfx_v9_get_tile_config, - .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, + .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base, .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs, .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid, .get_hive_id = amdgpu_amdkfd_get_hive_id, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 6f1a4676ddde..e7861f0ef415 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -40,7 +40,6 @@ #include "soc15d.h" #include "mmhub_v1_0.h" #include "gfxhub_v1_0.h" -#include "mmhub_v9_4.h" enum hqd_dequeue_request_type { @@ -758,8 +757,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base) +static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, + uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -769,14 +768,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmi return; } - /* TODO: take advantage of per-process address space size. For -* now, all processes share the same address space size, like -* on GFX8 and older. -*/ - if (adev->asic_type == CHIP_ARCTURUS) { - mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base); - } else - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index d9e9ad22b2bd..02b1426d17d1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd, uint8_t vmid, uint16_t *p_pasid); -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint64_t page_table_base); int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdkfd: Remove duplicate functions update_mqd_hiq()
Pushed. Thanks! Yong On 2019-11-21 11:21 p.m., Liu, Zhan wrote: Looks good to me. Reviewed-by: Zhan Liu -Original Message- From: amd-gfx On Behalf Of Yong Zhao Sent: 2019/November/21, Thursday 4:25 PM To: amd-gfx@lists.freedesktop.org Cc: Zhao, Yong Subject: [PATCH] drm/amdkfd: Remove duplicate functions update_mqd_hiq() The functions are the same as update_mqd(). Change-Id: Ic8d8f23cdde6b7806ab766ddf3d71fa668cca5fb Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 4 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8d21325b5cbb..7832ec6e480b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -282,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v10_compute_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); @@ - 436,7 +424,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index df77d67ec9aa..aa9010995eaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -325,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v9_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); @@ -475,7 +463,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 3b6b5671964c..a5e8ff1e5945 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_propertie
[PATCH 1/2] drm/amdkfd: Rename pm_release_ib() to pm_destroy_runlist_ib()
Its counterparty is called pm_create_runlist_ib(). The new name makes it easier to navigate in the code. Accordingly, Add rl_ to the variable names to indicate it is runlist. Change-Id: Id63bfebeb8a5ed6aaefbebe98858d84724fd26be Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c| 18 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 6 +++--- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f7f6df40875e..510f2d1bb8bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1355,7 +1355,7 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, if (retval) return retval; - pm_release_ib(>packets); + pm_destroy_runlist_ib(>packets); dqm->active_runlist = false; return retval; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 6cabed06ef5d..4a9433257428 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -98,15 +98,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, mutex_lock(>lock); retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size, - >ib_buffer_obj); + >rl_ib_obj); if (retval) { pr_err("Failed to allocate runlist IB\n"); goto out; } - *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; - *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; + *(void **)rl_buffer = pm->rl_ib_obj->cpu_ptr; + *rl_gpu_buffer = pm->rl_ib_obj->gpu_addr; memset(*rl_buffer, 0, *rl_buffer_size); pm->allocated = true; @@ -138,7 +138,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, return retval; *rl_size_bytes = alloc_size_bytes; - pm->ib_size_bytes = alloc_size_bytes; + pm->rl_ib_size_bytes = alloc_size_bytes; pr_debug("Building runlist ib process count: %d queues count %d\n", pm->dqm->processes_count, pm->dqm->queue_count); @@ -149,7 +149,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm, /* build map process packet */ if (proccesses_mapped >= pm->dqm->processes_count) { pr_debug("Not enough space left in runlist IB\n"); - pm_release_ib(pm); + pm_destroy_runlist_ib(pm); return -ENOMEM; } @@ -337,7 +337,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) fail_acquire_packet_buffer: mutex_unlock(>lock); fail_create_runlist_ib: - pm_release_ib(pm); + pm_destroy_runlist_ib(pm); return retval; } @@ -401,11 +401,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, return retval; } -void pm_release_ib(struct packet_manager *pm) +void pm_destroy_runlist_ib(struct packet_manager *pm) { mutex_lock(>lock); if (pm->allocated) { - kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); + kfd_gtt_sa_free(pm->dqm->dev, pm->rl_ib_obj); pm->allocated = false; } mutex_unlock(>lock); @@ -425,7 +425,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data) } seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, -pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); +pm->rl_ib_obj->cpu_ptr, pm->rl_ib_size_bytes, false); out: mutex_unlock(>lock); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 514896bef99a..389cda7c8f1a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -937,8 +937,8 @@ struct packet_manager { struct kernel_queue *priv_queue; struct mutex lock; bool allocated; - struct kfd_mem_obj *ib_buffer_obj; - unsigned int ib_size_bytes; + struct kfd_mem_obj *rl_ib_obj; + unsigned int rl_ib_size_bytes; bool is_over_subscription; const struct packet_manager_funcs *pmf; @@ -989,7 +989,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, uint32_t filter_param, bool reset, unsigned int sdma_engine); -void pm_release_ib(struct packet_manager *pm); +void pm_destroy_runlist_ib(
[PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()
This is consistent with the calling sequence in unmap_queues_cpsch(). Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e Signed-off-by: Yong Zhao --- .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++-- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 20 +-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 7 ++- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 510f2d1bb8bb..fd7d90136b94 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager *dqm) static int map_queues_cpsch(struct device_queue_manager *dqm) { int retval; + uint64_t rl_ib_gpu_addr; + size_t rl_ib_size; if (!dqm->sched_running) return 0; @@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager *dqm) if (dqm->active_runlist) return 0; - retval = pm_send_runlist(>packets, >queues); + retval = pm_create_runlist_ib(>packets, >queues, + _ib_gpu_addr, _ib_size); + if (retval) + goto fail_create_runlist_ib; + + pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr); + + retval = pm_send_runlist(>packets, >queues, + rl_ib_gpu_addr, rl_ib_size); pr_debug("%s sent runlist\n", __func__); if (retval) { pr_err("failed to execute runlist\n"); - return retval; + goto fail_create_runlist_ib; } dqm->active_runlist = true; return retval; + +fail_create_runlist_ib: + pm_destroy_runlist_ib(>packets); + return retval; } /* dqm->lock mutex has to be locked before calling this function */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 4a9433257428..6ec54e9f9392 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm, return retval; } -static int pm_create_runlist_ib(struct packet_manager *pm, +int pm_create_runlist_ib(struct packet_manager *pm, struct list_head *queues, uint64_t *rl_gpu_addr, size_t *rl_size_bytes) @@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm, /* build map process packet */ if (proccesses_mapped >= pm->dqm->processes_count) { pr_debug("Not enough space left in runlist IB\n"); - pm_destroy_runlist_ib(pm); return -ENOMEM; } @@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm, return retval; } -int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) +int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues, + uint64_t rl_ib_gpu_addr, size_t rl_ib_size) { - uint64_t rl_gpu_ib_addr; uint32_t *rl_buffer; - size_t rl_ib_size, packet_size_dwords; + size_t packet_size_dwords; int retval; - retval = pm_create_runlist_ib(pm, dqm_queues, _gpu_ib_addr, - _ib_size); - if (retval) - goto fail_create_runlist_ib; - - pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); - packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); mutex_lock(>lock); @@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) if (retval) goto fail_acquire_packet_buffer; - retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, + retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr, rl_ib_size / sizeof(uint32_t), false); if (retval) goto fail_create_runlist; @@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) kq_rollback_packet(pm->priv_queue); fail_acquire_packet_buffer: mutex_unlock(>lock); -fail_create_runlist_ib: - pm_destroy_runlist_ib(pm); return retval; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 389cda7c8f1a..6accb605b9f0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct d
[PATCH] drm/amdkfd: Remove duplicate functions update_mqd_hiq()
The functions are the same as update_mqd(). Change-Id: Ic8d8f23cdde6b7806ab766ddf3d71fa668cca5fb Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 16 ++-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 4 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 8d21325b5cbb..7832ec6e480b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -282,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v10_compute_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); @@ -436,7 +424,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v10_compute_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index df77d67ec9aa..aa9010995eaf 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -325,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT; } -static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, - struct queue_properties *q) -{ - struct v9_mqd *m; - - update_mqd(mm, mqd, q); - - /* TODO: what's the point? update_mqd already does this. */ - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; -} - static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) @@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd_hiq_sdma; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); @@ -475,7 +463,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; - mqd->update_mqd = update_mqd_hiq; + mqd->update_mqd = update_mqd; mqd->destroy_mqd = destroy_mqd; mqd->is_occupied = is_occupied; mqd->mqd_size = sizeof(struct v9_mqd); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 3b6b5671964c..a5e8ff1e5945 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void **mqd, static void update_mqd_hiq(struct mqd_manager *mm, void *mqd, struct queue_properties *q) { - struct vi_mqd *m; __update_mqd(mm, mqd, q, MTYPE_UC, 0); - - m = get_mqd(mqd); - m->cp_hqd_vmid = q->vmid; } static void init_mqd_sdma(struct mqd_manager *mm, void **mqd, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: DIQ should not use HIQ way to allocate memory
In the mqd_diq_sdma buffer, there should be only one HIQ mqd. All DIQs should be allocate using the regular way. Change-Id: Ibf3eb33604d0ec30501c244228cdb3b24615b699 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index b08694ec65d7..19f0fe547c57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -400,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 5a0e30441be8..8d21325b5cbb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -432,7 +432,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, pr_debug("%s@%i\n", __func__, __LINE__); break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index bdbcea22ad12..df77d67ec9aa 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -471,7 +471,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index c9e1151b5a57..3b6b5671964c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -452,7 +452,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, #endif break; case KFD_MQD_TYPE_DIQ: - mqd->allocate_mqd = allocate_hiq_mqd; + mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd_hiq; mqd->free_mqd = free_mqd; mqd->load_mqd = load_mqd; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Delete KFD_MQD_TYPE_COMPUTE
It is the same as KFD_MQD_TYPE_CP, so delete it. As a result, we will have one less mqd mananger per device. Change-Id: Iaa98fc17be06b216de7a826c3577f44bc0536b4c Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 1 - drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +-- 6 files changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index b42f34ef2b5c..f7f6df40875e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1595,7 +1595,7 @@ static int get_wave_state(struct device_queue_manager *dqm, goto dqm_unlock; } - mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; + mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; if (!mqd_mgr->get_wave_state) { r = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 28876aceb14b..b08694ec65d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; @@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, mqd = mqd_manager_init_cik(type, dev); if (!mqd) return NULL; - if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + if (type == KFD_MQD_TYPE_CP) mqd->update_mqd = update_mqd_hawaii; return mqd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 579c5ffcfa79..5a0e30441be8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -401,7 +401,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: pr_debug("%s@%i\n", __func__, __LINE__); mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index 22a819c888d8..bdbcea22ad12 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -444,7 +444,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index 7d144f56f421..c9e1151b5a57 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -425,7 +425,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, switch (type) { case KFD_MQD_TYPE_CP: - case KFD_MQD_TYPE_COMPUTE: mqd->allocate_mqd = allocate_mqd; mqd->init_mqd = init_mqd; mqd->free_mqd = free_mqd; @@ -494,7 +493,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, mqd = mqd_manager_init_vi(type, dev); if (!mqd) return NULL; - if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE)) + if (type == KFD_MQD_TYPE_CP) mqd->update_mqd = update_mqd_tonga; return mqd; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 1049759dc6bb..514896bef99a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -508,8 +508,7 @@ struct queue { * Please read the kfd_mqd_manager.h description. */ enum KFD_MQD_TYPE { - KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ - KFD_MQD_TYPE_HIQ, /* for hiq */ + KFD_MQD_TYPE_HIQ = 0, /* for hiq */ KFD_MQD_TYPE_CP,/* for cp queues and diq */ KFD_MQD_TYPE_SDMA, /* for sdma queues */
[PATCH] drm/amdkfd: Rename kfd_kernel_queue_*.c to kfd_packet_manager_*.c
After the recent cleanup, the functionalities provided by the previous kfd_kernel_queue_*.c are actually all packet manager related. So rename them to reflect that. Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/Makefile | 4 ++-- .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_v9.c} | 0 .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_v9.c} (100%) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} (100%) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index f93a16372325..61474627a32c 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,9 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ + $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ + $(AMDKFD_PATH)/kfd_packet_manager_v9.o \ $(AMDKFD_PATH)/kfd_process_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Rename kfd_kernel_queue_*.c to kfd_packet_manager_*.c
After the recent cleanup, the functionalities provided by the previous kfd_kernel_queue_*.c are actually all packet manager related. So rename them to reflect that. Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/Makefile | 4 +-- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 4 +-- ...nel_queue_v9.c => kfd_packet_manager_ai.c} | 26 +-- ...nel_queue_vi.c => kfd_packet_manager_vi.c} | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 4 +-- 5 files changed, 20 insertions(+), 20 deletions(-) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} (94%) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} (99%) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index f93a16372325..55bfecf04239 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,9 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ + $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ + $(AMDKFD_PATH)/kfd_packet_manager_ai.o \ $(AMDKFD_PATH)/kfd_process_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index 6cabed06ef5d..cc945a2acd66 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -233,7 +233,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_VEGAM: - pm->pmf = _vi_pm_funcs; + pm->pmf = _pm_funcs_vi; break; case CHIP_VEGA10: case CHIP_VEGA12: @@ -244,7 +244,7 @@ int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) case CHIP_NAVI10: case CHIP_NAVI12: case CHIP_NAVI14: - pm->pmf = _v9_pm_funcs; + pm->pmf = _pm_funcs_ai; break; default: WARN(1, "Unexpected ASIC family %u", diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c similarity index 94% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c index 2de01009f1b6..713530cd9760 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c @@ -27,7 +27,7 @@ #include "kfd_pm4_opcodes.h" #include "gc/gc_10_1_0_sh_mask.h" -static int pm_map_process_v9(struct packet_manager *pm, +static int pm_map_process_ai(struct packet_manager *pm, uint32_t *buffer, struct qcm_process_device *qpd) { struct pm4_mes_map_process *packet; @@ -73,7 +73,7 @@ static int pm_map_process_v9(struct packet_manager *pm, return 0; } -static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, +static int pm_runlist_ai(struct packet_manager *pm, uint32_t *buffer, uint64_t ib, size_t ib_size_in_dwords, bool chain) { struct pm4_mes_runlist *packet; @@ -111,7 +111,7 @@ static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, +static int pm_set_resources_ai(struct packet_manager *pm, uint32_t *buffer, struct scheduling_resources *res) { struct pm4_mes_set_resources *packet; @@ -139,7 +139,7 @@ static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, +static int pm_map_queues_ai(struct packet_manager *pm, uint32_t *buffer, struct queue *q, bool is_static) { struct pm4_mes_map_queues *packet; @@ -206,7 +206,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer, return 0; } -static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, +static int pm_unmap_queues_ai(struct packet_manager *pm, uint32_t *buffer, enum kfd_queue_type type, enum kfd_unmap_queues_filter filter, uint32_t filter_param, bool reset, @@ -282,7 +282,7 @@ static int pm_unmap_queues_v9(struct
Re: [PATCH] drm/amdkfd: Rename kfd_kernel_queue_*.c to kfd_packet_manager_*.c
Oh, I did not realize the part inside of the file. I think v9->ai is better, because the packet format header uses ai. Also v9 will give people an impression of gfx9. Yong On 2019-11-13 5:19 p.m., Felix Kuehling wrote: On 2019-11-13 5:09 p.m., Yong Zhao wrote: After the recent cleanup, the functionalities provided by the previous kfd_kernel_queue_*.c are actually all packet manager related. So rename them to reflect that. Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/Makefile | 4 ++-- .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} | 0 .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} (100%) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} (100%) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index f93a16372325..55bfecf04239 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,9 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ + $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ + $(AMDKFD_PATH)/kfd_packet_manager_ai.o \ This naming convention is inconsistent with the rest of KFD. We use _v9, not _ai. Also the function s inside this file are named _v9. If we decide to change that naming convention, it should not be accidental and piece-meal. It should be deliberate and comprehensive. Regards, Felix $(AMDKFD_PATH)/kfd_process_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Rename kfd_kernel_queue_*.c to kfd_packet_manager_*.c
After the recent cleanup, the functionalities provided by the previous kfd_kernel_queue_*.c are actually all packet manager related. So rename them to reflect that. Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/Makefile | 4 ++-- .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} | 0 .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0 3 files changed, 2 insertions(+), 2 deletions(-) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} (100%) rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} (100%) diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index f93a16372325..55bfecf04239 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,9 +38,9 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ + $(AMDKFD_PATH)/kfd_packet_manager_vi.o \ + $(AMDKFD_PATH)/kfd_packet_manager_ai.o \ $(AMDKFD_PATH)/kfd_process_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager.o \ $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c similarity index 100% rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Eliminate unnecessary kernel queue function pointers
Up to this point, those functions are all the same for all ASICs, so no need to call them by functions pointers. Removing the function pointers will greatly increase the code readablity. If there is ever need for those function pointers, we can add it back then. Change-Id: I9515fdece70110067cda66e2d24d6768b4846c2f Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 8 ++--- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 30 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 34 +-- .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 28 +++ 4 files changed, 41 insertions(+), 59 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index 1d33c4f25263..27bcc5b472f6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, * The receive packet buff will be sitting on the Indirect Buffer * and in the PQ we put the IB packet + sync packet(s). */ - status = kq->ops.acquire_packet_buffer(kq, + status = kq_acquire_packet_buffer(kq, pq_packets_size_in_bytes / sizeof(uint32_t), _packet_buff); if (status) { - pr_err("acquire_packet_buffer failed\n"); + pr_err("kq_acquire_packet_buffer failed\n"); return status; } @@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, if (status) { pr_err("Failed to allocate GART memory\n"); - kq->ops.rollback_packet(kq); + kq_rollback_packet(kq); return status; } @@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev, rm_packet->data_lo = QUEUESTATE__ACTIVE; - kq->ops.submit_packet(kq); + kq_submit_packet(kq); /* Wait till CP writes sync code: */ status = amdkfd_fence_wait_timeout( diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 59ee9053498c..2d56dc534459 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -34,7 +34,10 @@ #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16) -static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, +/* Initialize a kernel queue, including allocations of GART memory + * needed for the queue. + */ +static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, enum kfd_queue_type type, unsigned int queue_size) { struct queue_properties prop; @@ -88,7 +91,7 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_gpu_addr = kq->pq->gpu_addr; /* For CIK family asics, kq->eop_mem is not needed */ - if (dev->device_info->asic_family > CHIP_HAWAII) { + if (dev->device_info->asic_family > CHIP_MULLINS) { retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem); if (retval != 0) goto err_eop_allocate_vidmem; @@ -191,7 +194,8 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, } -static void uninitialize(struct kernel_queue *kq) +/* Uninitialize a kernel queue and free all its memory usages. */ +static void kq_uninitialize(struct kernel_queue *kq) { if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ) kq->mqd_mgr->destroy_mqd(kq->mqd_mgr, @@ -220,7 +224,7 @@ static void uninitialize(struct kernel_queue *kq) uninit_queue(kq->queue); } -static int acquire_packet_buffer(struct kernel_queue *kq, +int kq_acquire_packet_buffer(struct kernel_queue *kq, size_t packet_size_in_dwords, unsigned int **buffer_ptr) { size_t available_size; @@ -281,7 +285,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq, return -ENOMEM; } -static void submit_packet(struct kernel_queue *kq) +void kq_submit_packet(struct kernel_queue *kq) { #ifdef DEBUG int i; @@ -304,7 +308,7 @@ static void submit_packet(struct kernel_queue *kq) } } -static void rollback_packet(struct kernel_queue *kq) +void kq_rollback_packet(struct kernel_queue *kq) { if (kq->dev->device_info->doorbell_size == 8) { kq->pending_wptr64 = *kq->wptr64_kernel; @@ -324,13 +328,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, if (!kq) return NULL; - kq->ops.initialize = initialize; - kq->ops.uninitialize = uninitialize; - kq->ops.acquire_packet_buffer = acquire_packet_buffer; - kq->ops.submit_packet = submit_packet; - kq->ops.rollb
Re: [PATCH 2/2] drm/amdkfd: Eliminate ops_asic_specific in kernel queue
I will change it to CHIP_MULLINS. Yes , I also spotted the kq->ops cleanup, will send it out shortly. Regards, Yong On 2019-11-13 2:31 p.m., Felix Kuehling wrote: See one comment inline. With that fixed, the series is Reviewed-by: Felix Kuehling I could think of more follow-up cleanup while you're at it: 1. Can you see any reason why the kq->ops need to be function pointers. Looks to me like they are the same for all kernel queues, so those functions could be called without the pointer indirection. 2. The only think left in the ASIC-specific kfd_kernel_queue_*.c files is the PM4 packet writer functions that are called by the kfd_packet_manager. It may make sense to rename them to reflect that. Maybe kfd_packet_manager_*.c Regards, Felix On 2019-11-12 5:18 p.m., Yong Zhao wrote: The ops_asic_specific function pointers are actually quite generic after using a simple if condition. Eliminate it by code refactoring. Change-Id: Icb891289cca31acdbe2d2eea76a426f1738b9c08 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 4 -- .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 36 --- .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 48 -- 4 files changed, 26 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index a750b1d110eb..59ee9053498c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -87,9 +87,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; - retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); - if (!retval) - goto err_eop_allocate_vidmem; + /* For CIK family asics, kq->eop_mem is not needed */ + if (dev->device_info->asic_family > CHIP_HAWAII) { This is not the correct condition to distinguish GFXv7 (CIK) vs v8 (VI). CHIP_MULLINS comes after Hawaii, but it is also GFXv7 (CIK), even though KFD current doesn't support it. + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem); + if (retval != 0) + goto err_eop_allocate_vidmem; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + } retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), >rptr_mem); @@ -200,7 +208,12 @@ static void uninitialize(struct kernel_queue *kq) kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); - kq->ops_asic_specific.uninitialize(kq); + + /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free() + * is able to handle NULL properly. + */ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); + kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); @@ -280,8 +293,15 @@ static void submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif - - kq->ops_asic_specific.submit_packet(kq); + if (kq->dev->device_info->doorbell_size == 8) { + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); + } else { + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); + } } static void rollback_packet(struct kernel_queue *kq) @@ -310,42 +330,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, kq->ops.submit_packet = submit_packet; kq->ops.rollback_packet = rollback_packet; - switch (dev->device_info->asic_family) { - case CHIP_KAVERI: - case CHIP_HAWAII: - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - kernel_queue_init_vi(>ops_asic_specific); - break; - - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - kernel_queue_init_v9(>ops_asic_specific); - break; - default: - WARN(1, "Unexpected ASIC family %u", - dev->device_info->asic_family); - goto out_free; - } - if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) return kq; pr_err("Failed to init kernel que
[PATCH 1/2] drm/amdkfd: Merge CIK kernel queue functions into VI
The only difference that CIK kernel queue functions are different from VI is avoid allocating eop_mem. We can achieve that by using a if condition. Change-Id: Iea9cbc82f603ff008a906c5ee32325ddcd02d963 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/Makefile | 1 - drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 7 +-- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 1 - .../gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 53 --- .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 7 +++ 5 files changed, 9 insertions(+), 60 deletions(-) delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index 017a8b7156da..f93a16372325 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -38,7 +38,6 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \ $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \ $(AMDKFD_PATH)/kfd_kernel_queue.o \ - $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \ $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \ $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \ $(AMDKFD_PATH)/kfd_packet_manager.o \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index 0d966408ea87..a750b1d110eb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -311,6 +311,8 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, kq->ops.rollback_packet = rollback_packet; switch (dev->device_info->asic_family) { + case CHIP_KAVERI: + case CHIP_HAWAII: case CHIP_CARRIZO: case CHIP_TONGA: case CHIP_FIJI: @@ -321,11 +323,6 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, kernel_queue_init_vi(>ops_asic_specific); break; - case CHIP_KAVERI: - case CHIP_HAWAII: - kernel_queue_init_cik(>ops_asic_specific); - break; - case CHIP_VEGA10: case CHIP_VEGA12: case CHIP_VEGA20: diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index a7116a939029..a9a35897d8b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -99,7 +99,6 @@ struct kernel_queue { struct list_headlist; }; -void kernel_queue_init_cik(struct kernel_queue_ops *ops); void kernel_queue_init_vi(struct kernel_queue_ops *ops); void kernel_queue_init_v9(struct kernel_queue_ops *ops); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c deleted file mode 100644 index 19e54acb4125.. --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright 2014 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include "kfd_kernel_queue.h" - -static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size); -static void uninitialize_cik(struct kernel_queue *kq); -static void submit_packet_cik(struct kernel_queue *kq); - -void kernel_queue_init_cik(struct kernel_queue_ops *ops) -{ - ops->initialize = initialize_cik; - ops->uninitialize = uninitialize_cik; - ops->submit_packet = submit_packet_cik; -} - -static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev, - enum kfd_queue_type type, unsigned int queue_size) -{ - return true; -} - -static void uninitialize_cik(struct kernel_queue *kq) -{ -} - -static void sub
[PATCH 2/2] drm/amdkfd: Eliminate ops_asic_specific in kernel queue
The ops_asic_specific function pointers are actually quite generic after using a simple if condition. Eliminate it by code refactoring. Change-Id: Icb891289cca31acdbe2d2eea76a426f1738b9c08 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 --- drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 4 -- .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c | 36 --- .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c | 48 -- 4 files changed, 26 insertions(+), 125 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index a750b1d110eb..59ee9053498c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -87,9 +87,17 @@ static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev, kq->pq_kernel_addr = kq->pq->cpu_ptr; kq->pq_gpu_addr = kq->pq->gpu_addr; - retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size); - if (!retval) - goto err_eop_allocate_vidmem; + /* For CIK family asics, kq->eop_mem is not needed */ + if (dev->device_info->asic_family > CHIP_HAWAII) { + retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem); + if (retval != 0) + goto err_eop_allocate_vidmem; + + kq->eop_gpu_addr = kq->eop_mem->gpu_addr; + kq->eop_kernel_addr = kq->eop_mem->cpu_ptr; + + memset(kq->eop_kernel_addr, 0, PAGE_SIZE); + } retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel), >rptr_mem); @@ -200,7 +208,12 @@ static void uninitialize(struct kernel_queue *kq) kfd_gtt_sa_free(kq->dev, kq->rptr_mem); kfd_gtt_sa_free(kq->dev, kq->wptr_mem); - kq->ops_asic_specific.uninitialize(kq); + + /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free() +* is able to handle NULL properly. +*/ + kfd_gtt_sa_free(kq->dev, kq->eop_mem); + kfd_gtt_sa_free(kq->dev, kq->pq); kfd_release_kernel_doorbell(kq->dev, kq->queue->properties.doorbell_ptr); @@ -280,8 +293,15 @@ static void submit_packet(struct kernel_queue *kq) } pr_debug("\n"); #endif - - kq->ops_asic_specific.submit_packet(kq); + if (kq->dev->device_info->doorbell_size == 8) { + *kq->wptr64_kernel = kq->pending_wptr64; + write_kernel_doorbell64(kq->queue->properties.doorbell_ptr, + kq->pending_wptr64); + } else { + *kq->wptr_kernel = kq->pending_wptr; + write_kernel_doorbell(kq->queue->properties.doorbell_ptr, + kq->pending_wptr); + } } static void rollback_packet(struct kernel_queue *kq) @@ -310,42 +330,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, kq->ops.submit_packet = submit_packet; kq->ops.rollback_packet = rollback_packet; - switch (dev->device_info->asic_family) { - case CHIP_KAVERI: - case CHIP_HAWAII: - case CHIP_CARRIZO: - case CHIP_TONGA: - case CHIP_FIJI: - case CHIP_POLARIS10: - case CHIP_POLARIS11: - case CHIP_POLARIS12: - case CHIP_VEGAM: - kernel_queue_init_vi(>ops_asic_specific); - break; - - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - case CHIP_RAVEN: - case CHIP_RENOIR: - case CHIP_ARCTURUS: - case CHIP_NAVI10: - case CHIP_NAVI12: - case CHIP_NAVI14: - kernel_queue_init_v9(>ops_asic_specific); - break; - default: - WARN(1, "Unexpected ASIC family %u", -dev->device_info->asic_family); - goto out_free; - } - if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE)) return kq; pr_err("Failed to init kernel queue\n"); -out_free: kfree(kq); return NULL; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h index a9a35897d8b7..475e9499c0af 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h @@ -66,7 +66,6 @@ struct kernel_queue_ops { struct kernel_queue { struct kernel_queue_ops ops; - struct kernel_queue_ops ops_asic_specific; /* data */ struct kfd_dev *dev; @@ -99,7 +98,4 @@ struct kernel_queue { struct list_headlist; }; -void kernel_queue_init_vi(struct kernel_q
Re: [PATCH 2/2] drm/amdkfd: Stop using GFP_NOIO explicitly for GFX10
Hi Felix, See one thing inline I am not too sure. Yong On 2019-11-12 4:30 p.m., Felix Kuehling wrote: On 2019-11-12 4:26 p.m., Yong Zhao wrote: Adapt the change from 1cd106ecfc1f04 The change is: drm/amdkfd: Stop using GFP_NOIO explicitly This is no longer needed with the memalloc_nofs_save/restore in dqm_lock/unlock Change-Id: I42450b2c149d2b1842be99a8f355c829a0079e7c Signed-off-by: Yong Zhao The series is Reviewed-by: Felix Kuehling --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 46ddb33b624a..579c5ffcfa79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -393,7 +393,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index be27ff01cdb8..22a819c888d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); [yz] This should be kept probably. With the latest code, allocate_mqd() is called outside of the dqm. So now the situation is different from the original one. if (!mqd_mem_obj) return NULL; retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdkfd: Use QUEUE_IS_ACTIVE macro in mqd v10
This is done for other GFX in commit bb2d2128a54c4. Port it to GFX10. Change-Id: I9e04872be3af0e90f5f6930226896b1ea545f3d9 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 11 ++- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 954dc8ac4ff1..46ddb33b624a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -213,10 +213,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, update_cu_mask(mm, mqd, q); set_priority(m, q); - q->is_active = (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0 && - !q->is_evicted); + q->is_active = QUEUE_IS_ACTIVE(*q); } static int destroy_mqd(struct mqd_manager *mm, void *mqd, @@ -348,11 +345,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void *mqd, m->sdma_queue_id = q->sdma_queue_id; m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT; - - q->is_active = (q->queue_size > 0 && - q->queue_address != 0 && - q->queue_percent > 0 && - !q->is_evicted); + q->is_active = QUEUE_IS_ACTIVE(*q); } /* -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdkfd: Stop using GFP_NOIO explicitly for GFX10
Adapt the change from 1cd106ecfc1f04 The change is: drm/amdkfd: Stop using GFP_NOIO explicitly This is no longer needed with the memalloc_nofs_save/restore in dqm_lock/unlock Change-Id: I42450b2c149d2b1842be99a8f355c829a0079e7c Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 46ddb33b624a..579c5ffcfa79 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -393,7 +393,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE type, if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) return NULL; - mqd = kzalloc(sizeof(*mqd), GFP_NOIO); + mqd = kzalloc(sizeof(*mqd), GFP_KERNEL); if (!mqd) return NULL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index be27ff01cdb8..22a819c888d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, * instead of sub-allocation function. */ if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) { - mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO); + mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); if (!mqd_mem_obj) return NULL; retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd, -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 3/3] drm/amdkfd: Fix a bug when calculating save_area_used_size
workgroup context data writes from m->cp_hqd_cntl_stack_size, so we should deduct it when calculating the used size. Change-Id: I5252e25662c3b8221f451c39115bf084d1911eae Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c index d3380c5bdbde..3a2ee1f01aae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c @@ -302,7 +302,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; - *save_area_used_size = m->cp_hqd_wg_state_offset; + *save_area_used_size = m->cp_hqd_wg_state_offset - + m->cp_hqd_cntl_stack_size;; if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) return -EFAULT; -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/3] drm/amdkfd: Implement queue priority controls for gfx10
Ported from gfx9. Change-Id: I388dc7c609ed724a6d600840f8e7317d9c2c877d Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 4a236b2c2354..4884cd6c65ce 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd, m->compute_static_thread_mgmt_se3); } +static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q) +{ + m->cp_hqd_pipe_priority = pipe_priority_map[q->priority]; + m->cp_hqd_queue_priority = q->priority; +} + static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd, struct queue_properties *q) { @@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd, 1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT | 10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT; - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - if (q->format == KFD_QUEUE_FORMAT_AQL) { m->cp_hqd_aql_control = 1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT; @@ -208,6 +211,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd, m->cp_hqd_ctx_save_control = 0; update_cu_mask(mm, mqd, q); + set_priority(m, q); q->is_active = (q->queue_size > 0 && q->queue_address != 0 && -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/3] drm/amdkfd: Update get_wave_state() for GFX10
Given control stack is now in the userspace context save restore area on GFX10, the same as GFX8, it is not needed to copy it back to userspace. Change-Id: I063ddc3026eefa57713ec47b466a90f9bf9d49b8 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 14 +- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c index 4884cd6c65ce..954dc8ac4ff1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c @@ -251,18 +251,22 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd, { struct v10_compute_mqd *m; - /* Control stack is located one page after MQD. */ - void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE); - m = get_mqd(mqd); + /* Control stack is written backwards, while workgroup context data +* is written forwards. Both starts from m->cp_hqd_cntl_stack_size. +* Current position is at m->cp_hqd_cntl_stack_offset and +* m->cp_hqd_wg_state_offset, respectively. +*/ *ctl_stack_used_size = m->cp_hqd_cntl_stack_size - m->cp_hqd_cntl_stack_offset; *save_area_used_size = m->cp_hqd_wg_state_offset - m->cp_hqd_cntl_stack_size; - if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size)) - return -EFAULT; + /* Control stack is not copied to user mode for GFXv10 because +* it's part of the context save area that is already +* accessible to user mode +*/ return 0; } -- 2.17.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 2/2] drm/amdkfd: Avoid using doorbell_off as offset in process doorbell pages
The NULL pointer is not an issue, because for DIQ, the if (q) condition, which guards the section but is now shown, will never be satisfied. Anyway, I still added the NULL pointer check. With that, I have pushed the change. Yong On 2019-11-11 3:51 p.m., Felix Kuehling wrote: On 2019-11-11 15:43, Felix Kuehling wrote: On 2019-11-01 16:10, Zhao, Yong wrote: dorbell_off in the queue properties is mainly used for the doorbell dw offset in pci bar. We should not set it to the doorbell byte offset in process doorbell pages. This makes the code much easier to read. I kind of agree. I think what's confusing is that the queue_properties structure is used for two different purposes. 1. For storing queue properties provided by user mode through KFD ioctls 2. A subset of struct queue passed to mqd_manager and elsewhere (that's why some driver state is creeping into it) Maybe a follow-up could cleanly separate the queue properties from the queue driver state. That would probably change some internal interfaces to use struct queue instead of queue_properties. Anyway, this patch is Reviewed-by: Felix Kuehling I pointed out a missing NULL pointer check inline near the end of the patch. I should have mentioned it here. Please fix that before you submit. Thanks, Felix Change-Id: I553045ff9fcb3676900c92d10426f2ceb3660005 Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++-- drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h| 3 ++- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 8 ++-- 4 files changed, 15 insertions(+), 10 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d9e36dbf13d5..b91993753b82 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -258,6 +258,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, unsigned int queue_id; struct kfd_process_device *pdd; struct queue_properties q_properties; + uint32_t doorbell_offset_in_process = 0; memset(_properties, 0, sizeof(struct queue_properties)); @@ -286,7 +287,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, p->pasid, dev->id); - err = pqm_create_queue(>pqm, dev, filep, _properties, _id); + err = pqm_create_queue(>pqm, dev, filep, _properties, _id, + _offset_in_process); if (err != 0) goto err_create_queue; @@ -298,12 +300,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); args->doorbell_offset <<= PAGE_SHIFT; if (KFD_IS_SOC15(dev->device_info->asic_family)) - /* On SOC15 ASICs, doorbell allocation must be -* per-device, and independent from the per-process -* queue_id. Return the doorbell offset within the -* doorbell aperture to user mode. + /* On SOC15 ASICs, include the doorbell offset within the +* process doorbell frame, which could be 1 page or 2 pages. */ - args->doorbell_offset |= q_properties.doorbell_off; + args->doorbell_offset |= doorbell_offset_in_process; mutex_unlock(>mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c index d59f2cd056c6..1d33c4f25263 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c @@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev) properties.type = KFD_QUEUE_TYPE_DIQ; status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL, - , ); + , , NULL); if (status) { pr_err("Failed to create DIQ\n"); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 7c561c98f2e2..66bae8f2dad1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -907,7 +907,8 @@ int pqm_create_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct file *f, struct queue_properties *properties, - unsigned int *qid); + unsigned int *qid, + uint32_t *p_doorbell_offset_in_process); int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, struct queue_properties *p); diff --gi
Re: [PATCH 1/2] drm/amdkfd: Use better name to indicate the offset is in dwords
ping On 2019-11-01 4:10 p.m., Zhao, Yong wrote: Change-Id: I75da23bba90231762cf58da3170f5bb77ece45ed Signed-off-by: Yong Zhao --- .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 14 +++--- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 8 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 984c2f2b24b6..4503fb26fe5b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) } q->properties.doorbell_off = - kfd_doorbell_id_to_offset(dev, q->process, + kfd_get_doorbell_dw_offset_from_bar(dev, q->process, q->doorbell_id); return 0; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index ebe79bf00145..f904355c44a1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd) kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + doorbell_start_offset; - kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32); + kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, kfd_doorbell_process_slice(kfd)); @@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd) pr_debug("doorbell base == 0x%08lX\n", (uintptr_t)kfd->doorbell_base); - pr_debug("doorbell_id_offset == 0x%08lX\n", - kfd->doorbell_id_offset); + pr_debug("doorbell_base_dw_offset == 0x%08lX\n", + kfd->doorbell_base_dw_offset); pr_debug("doorbell_process_limit == 0x%08lX\n", doorbell_process_limit); @@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, * Calculating the kernel doorbell offset using the first * doorbell page. */ - *doorbell_off = kfd->doorbell_id_offset + inx; + *doorbell_off = kfd->doorbell_base_dw_offset + inx; pr_debug("Get kernel queue doorbell\n" " doorbell offset == 0x%08X\n" @@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value) } } -unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, +unsigned int kfd_get_doorbell_dw_offset_from_bar(struct kfd_dev *kfd, struct kfd_process *process, unsigned int doorbell_id) { /* -* doorbell_id_offset accounts for doorbells taken by KGD. +* doorbell_base_dw_offset accounts for doorbells taken by KGD. * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to * the process's doorbells. The offset returned is in dword * units regardless of the ASIC-dependent doorbell size. */ - return kfd->doorbell_id_offset + + return kfd->doorbell_base_dw_offset + process->doorbell_index * kfd_doorbell_process_slice(kfd) / sizeof(u32) + doorbell_id * kfd->device_info->doorbell_size / sizeof(u32); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 62db4d20ed32..7c561c98f2e2 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -238,9 +238,9 @@ struct kfd_dev { * KFD. It is aligned for mapping * into user mode */ - size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell -* to HW doorbell, GFX reserved some -* at the start) + size_t doorbell_base_dw_offset; /* Doorbell dword offset (from KFD +* doorbell to PCI doorbell bar, +* GFX reserved some at the start) */ u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells * page used by kernel queue @@ -821,7 +821,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); u32 read_kernel_doorbell(u32 __iomem *db); void write_
Re: [PATCH] drm/amdkfd: Rename create_cp_queue() to init_user_queue()
ping On 2019-11-01 4:12 p.m., Zhao, Yong wrote: create_cp_queue() could also work with SDMA queues, so we should rename it. Change-Id: I76cbaed8fa95dd9062d786cbc1dd037ff041da9d Signed-off-by: Yong Zhao --- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 48185d2957e9..ebb2f69b438c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm) pqm->queue_slot_bitmap = NULL; } -static int create_cp_queue(struct process_queue_manager *pqm, +static int init_user_queue(struct process_queue_manager *pqm, struct kfd_dev *dev, struct queue **q, struct queue_properties *q_properties, struct file *f, unsigned int qid) @@ -251,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = create_cp_queue(pqm, dev, , properties, f, *qid); + retval = init_user_queue(pqm, dev, , properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; @@ -272,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; } - retval = create_cp_queue(pqm, dev, , properties, f, *qid); + retval = init_user_queue(pqm, dev, , properties, f, *qid); if (retval != 0) goto err_create_queue; pqn->q = q; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Update MMHUB power gating register settings
Okay, pushed. Thanks. On 2018-01-11 01:20 PM, Eric Huang wrote: The fix makes sense to me. Acked-by: Eric Huang <jinhuieric.hu...@amd.com> On 2018-01-11 01:00 PM, Felix Kuehling wrote: [+Eric] Acked-by: Felix Kuehling <felix.kuehl...@amd.com> I'm not familiar with the details of what this does. I'm hoping Eric can also review this with more power-management experience. Regards, Felix On 2018-01-10 03:10 PM, Yong Zhao wrote: The new register settings are needed to fix a tlb invalidation issue when MMHUB power gating is turned on for Raven. Change-Id: I846befbb2fcbddf40ca4ecbdc06da1cd442e3554 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 61 ++--- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index ffd5b7e..bdf94c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = { {0x11, 0x6a684}, {0x19, 0xea68e}, {0x29, 0xa69e}, - {0x2b, 0x34a6c0}, - {0x61, 0x83a707}, - {0xe6, 0x8a7a4}, - {0xf0, 0x1a7b8}, - {0xf3, 0xfa7cc}, - {0x104, 0x17a7dd}, - {0x11d, 0xa7dc}, - {0x11f, 0x12a7f5}, - {0x133, 0xa808}, - {0x135, 0x12a810}, - {0x149, 0x7a82c} + {0x2b, 0x0010a6c0}, + {0x3d, 0x83a707}, + {0xc2, 0x8a7a4}, + {0xcc, 0x1a7b8}, + {0xcf, 0xfa7cc}, + {0xe0, 0x17a7dd}, + {0xf9, 0xa7dc}, + {0xfb, 0x12a7f5}, + {0x10f, 0xa808}, + {0x111, 0x12a810}, + {0x125, 0x7a82c} }; #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) -#define PCTL0_RENG_EXEC_END_PTR 0x151 +#define PCTL0_RENG_EXEC_END_PTR 0x12d #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 @@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return; + /** pctl0 **/ pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); - pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); - pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl0 registers */ pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl0_data[i].data); } - /* Set the reng execute end ptr for pctl0 */ - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_END_PTR, - PCTL0_RENG_EXEC_END_PTR); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Re-enable light sleep */ + pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); + + /** pctl1 **/ + pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); + pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl1 registers */ pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl1_data[i].data); } + /* Re-enable light sleep */ + pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); + + mmhub_v1_0_power_gating_write_save_ranges(adev); + + /* Set the reng execute end ptr for pctl0 */ + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_END_PTR, + PCTL0_RENG_EXEC_END_PTR); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Set the reng execute end ptr for pctl1 */ pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, PCTL1_RENG_EXECUTE, RENG_EXECUTE_END_PTR, PCTL1_RENG_EXEC_END_PTR); WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); - - mmhub_v1_0_power_gating_write_save_ranges(adev); - - /* Re-enable light sleep */ - pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); - pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); } void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Update MMHUB power gating register settings
The new register settings are needed to fix a tlb invalidation issue when MMHUB power gating is turned on for Raven. Change-Id: I846befbb2fcbddf40ca4ecbdc06da1cd442e3554 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 61 ++--- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index ffd5b7e..bdf94c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = { {0x11, 0x6a684}, {0x19, 0xea68e}, {0x29, 0xa69e}, - {0x2b, 0x34a6c0}, - {0x61, 0x83a707}, - {0xe6, 0x8a7a4}, - {0xf0, 0x1a7b8}, - {0xf3, 0xfa7cc}, - {0x104, 0x17a7dd}, - {0x11d, 0xa7dc}, - {0x11f, 0x12a7f5}, - {0x133, 0xa808}, - {0x135, 0x12a810}, - {0x149, 0x7a82c} + {0x2b, 0x0010a6c0}, + {0x3d, 0x83a707}, + {0xc2, 0x8a7a4}, + {0xcc, 0x1a7b8}, + {0xcf, 0xfa7cc}, + {0xe0, 0x17a7dd}, + {0xf9, 0xa7dc}, + {0xfb, 0x12a7f5}, + {0x10f, 0xa808}, + {0x111, 0x12a810}, + {0x125, 0x7a82c} }; #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) -#define PCTL0_RENG_EXEC_END_PTR 0x151 +#define PCTL0_RENG_EXEC_END_PTR 0x12d #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 @@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return; + /** pctl0 **/ pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); - pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); - pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl0 registers */ pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl0_data[i].data); } - /* Set the reng execute end ptr for pctl0 */ - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_END_PTR, - PCTL0_RENG_EXEC_END_PTR); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Re-enable light sleep */ + pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); + + /** pctl1 **/ + pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); + pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); /* Light sleep must be disabled before writing to pctl1 registers */ pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; @@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) pctl1_data[i].data); } + /* Re-enable light sleep */ + pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; + WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); + + mmhub_v1_0_power_gating_write_save_ranges(adev); + + /* Set the reng execute end ptr for pctl0 */ + pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, + PCTL0_RENG_EXECUTE, + RENG_EXECUTE_END_PTR, + PCTL0_RENG_EXEC_END_PTR); + WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); + /* Set the reng execute end ptr for pctl1 */ pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, PCTL1_RENG_EXECUTE, RENG_EXECUTE_END_PTR, PCTL1_RENG_EXEC_END_PTR); WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); - - mmhub_v1_0_power_gating_write_save_ranges(adev); - - /* Re-enable light sleep */ - pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); - pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); } void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdkfd: avoid calling execute_queues_cpsch() when destroying an unactive queue
Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Oak Zeng <oak.z...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index acfb121..b21285a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1013,13 +1013,13 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, list_del(>list); qpd->queue_count--; - if (q->properties.is_active) + if (q->properties.is_active) { dqm->queue_count--; - - retval = execute_queues_cpsch(dqm, + retval = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); - if (retval == -ETIME) - qpd->reset_wavefronts = true; + if (retval == -ETIME) + qpd->reset_wavefronts = true; + } mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdkfd: Fix return value 0 when execute_queues_cpsch actually fails
Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Oak Zeng <oak.z...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index d0693fd..acfb121 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1033,7 +1033,7 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, mutex_unlock(>lock); - return 0; + return retval; failed: failed_try_destroy_debugged_queue: -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven
Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 709587d..93500e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, uint64_t tmp; /* adjust vm size first */ - if (amdgpu_vm_size != -1) { + if (amdgpu_vm_size != -1 && adev->asic_type != CHIP_RAVEN) { unsigned max_size = 1 << (max_bits - 30); vm_size = amdgpu_vm_size; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven
Hi Christian, I don't know much about the background. But according to my experiments, as long as we change the vm size to 64G, ATC memory access on Raven will fall apart. How should deal with that or can you come up with a fix? Regards, Yong On 2017-12-14 03:47 AM, Christian König wrote: NAK, that really circumvents the intention of the patch to adjust the number of levels based on the vm_size. Christian. Am 14.12.2017 um 03:25 schrieb Yong Zhao: Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 709587d..3b9eb1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, uint64_t tmp; /* adjust vm size first */ - if (amdgpu_vm_size != -1) { + if (amdgpu_vm_size != -1 && max_level == 1) { unsigned max_size = 1 << (max_bits - 30); vm_size = amdgpu_vm_size; ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven
Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 709587d..3b9eb1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size, uint64_t tmp; /* adjust vm size first */ - if (amdgpu_vm_size != -1) { + if (amdgpu_vm_size != -1 && max_level == 1) { unsigned max_size = 1 << (max_bits - 30); vm_size = amdgpu_vm_size; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer
Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 13 +++-- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 6 ++ 3 files changed, 6 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 8447810..81ec7bb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocated_vmid) + struct qcm_process_device *qpd) { int retval; @@ -170,7 +169,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_unlock; } - *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; q->properties.tba_addr = qpd->tba_addr; @@ -184,10 +182,8 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -EINVAL; if (retval) { - if (list_empty(>queues_list)) { + if (list_empty(>queues_list)) deallocate_vmid(dqm, qpd, q); - *allocated_vmid = 0; - } goto out_unlock; } @@ -812,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, int *allocate_vmid) + struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; retval = 0; - if (allocate_vmid) - *allocate_vmid = 0; - mutex_lock(>lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 8752edf..c61b693 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -84,8 +84,7 @@ struct device_process_node { struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocate_vmid); + struct qcm_process_device *qpd); int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index eeb7726..fbfa274 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -201,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd, - >properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; @@ -222,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd, - >properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer
Change-Id: Ia5c74ad567c30e206ed804b204fdf8a0f8a75a19 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 14 -- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +-- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 53a66e8..1df1123 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -144,8 +144,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocated_vmid) + struct qcm_process_device *qpd) { int retval; @@ -165,7 +164,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_unlock; } - *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) @@ -176,10 +174,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -EINVAL; if (retval) { - if (list_empty(>queues_list)) { + if (list_empty(>queues_list)) deallocate_vmid(dqm, qpd, q); - *allocated_vmid = 0; - } + goto out_unlock; } @@ -788,16 +785,13 @@ static void select_sdma_engine_id(struct queue *q) } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, int *allocate_vmid) + struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; retval = 0; - if (allocate_vmid) - *allocate_vmid = 0; - mutex_lock(>lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index faf820a..449407a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -84,8 +84,7 @@ struct device_process_node { struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocate_vmid); + struct qcm_process_device *qpd); int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 03bec76..1e7bcae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -199,8 +199,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd, - >properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer
Change-Id: Ia5c74ad567c30e206ed804b204fdf8a0f8a75a19 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 14 -- drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 3 +-- drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +-- 3 files changed, 6 insertions(+), 14 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 53a66e8..1df1123 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -144,8 +144,7 @@ static void deallocate_vmid(struct device_queue_manager *dqm, static int create_queue_nocpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocated_vmid) + struct qcm_process_device *qpd) { int retval; @@ -165,7 +164,6 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, if (retval) goto out_unlock; } - *allocated_vmid = qpd->vmid; q->properties.vmid = qpd->vmid; if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) @@ -176,10 +174,9 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm, retval = -EINVAL; if (retval) { - if (list_empty(>queues_list)) { + if (list_empty(>queues_list)) deallocate_vmid(dqm, qpd, q); - *allocated_vmid = 0; - } + goto out_unlock; } @@ -788,16 +785,13 @@ static void select_sdma_engine_id(struct queue *q) } static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, int *allocate_vmid) + struct qcm_process_device *qpd) { int retval; struct mqd_manager *mqd; retval = 0; - if (allocate_vmid) - *allocate_vmid = 0; - mutex_lock(>lock); if (dqm->total_queue_count >= max_num_of_queues_per_device) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index faf820a..449407a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -84,8 +84,7 @@ struct device_process_node { struct device_queue_manager_ops { int (*create_queue)(struct device_queue_manager *dqm, struct queue *q, - struct qcm_process_device *qpd, - int *allocate_vmid); + struct qcm_process_device *qpd); int (*destroy_queue)(struct device_queue_manager *dqm, struct qcm_process_device *qpd, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 03bec76..1e7bcae 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -199,8 +199,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, goto err_create_queue; pqn->q = q; pqn->kq = NULL; - retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd, - >properties.vmid); + retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd); pr_debug("DQM returned %d for create_queue\n", retval); print_queue(q); break; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory on Raven
From: Yong Zhao <yong.z...@amd.com> Without the additional bits set in PDEs/PTEs, the ATC memory access would have failed on Raven. Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4 Signed-off-by: Yong Zhao <yong.z...@amd.com> Acked-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 ++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bca9eeb..d98d58a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_SHADOW); if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_SYSTEM; + init_value = AMDGPU_PTE_DEFAULT_ATC; if (level != adev->vm_manager.num_level - 1) init_value |= AMDGPU_PDE_PTE; + } /* walk over the address space and allocate the page tables */ @@ -2017,7 +2018,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(>list); if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; + init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, @@ -2629,7 +2630,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type == CHIP_RAVEN) { vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + init_pde_value = AMDGPU_PTE_DEFAULT_ATC + | AMDGPU_PDE_PTE; + } } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 66efbc2..5d0cfc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_MTYPE(a)((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) +/* For Raven */ +#define AMDGPU_MTYPE_CC 2 + +#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ +| AMDGPU_PTE_SNOOPED\ +| AMDGPU_PTE_EXECUTABLE \ +| AMDGPU_PTE_READABLE \ +| AMDGPU_PTE_WRITEABLE \ +| AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) + /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory
Hi Christian, Do you have a minute to review this change? Yong On 2017-09-27 02:46 PM, Alex Deucher wrote: On Tue, Sep 26, 2017 at 7:30 PM, Yong Zhao <yong.z...@amd.com> wrote: From: Yong Zhao <yong.z...@amd.com> Without the additional bits set in PDEs/PTEs, the ATC memory access would have failed. Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4 Signed-off-by: Yong Zhao <yong.z...@amd.com> Glad we finally got this sorted. Acked-by: Alex Deucher <alexander.deuc...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8fcc743..c848b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,6 +33,8 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +#include "vega10/vega10_enum.h" + /* * PASID manager * @@ -108,6 +110,13 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef START #undef LAST +#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ + | AMDGPU_PTE_SNOOPED\ + | AMDGPU_PTE_EXECUTABLE \ + | AMDGPU_PTE_READABLE \ + | AMDGPU_PTE_WRITEABLE \ + | AMDGPU_PTE_MTYPE(MTYPE_CC)) + /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ @@ -328,9 +337,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_SHADOW); if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_SYSTEM; + init_value = AMDGPU_PTE_DEFAULT_ATC; if (level != adev->vm_manager.num_level - 1) init_value |= AMDGPU_PDE_PTE; + } /* walk over the address space and allocate the page tables */ @@ -2017,7 +2027,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(>list); if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; + init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, @@ -2627,7 +2637,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type == CHIP_RAVEN) { vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + init_pde_value = AMDGPU_PTE_DEFAULT_ATC + | AMDGPU_PDE_PTE; + } } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory
From: Yong Zhao <yong.z...@amd.com> Without the additional bits set in PDEs/PTEs, the ATC memory access would have failed. Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 8fcc743..c848b7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -33,6 +33,8 @@ #include "amdgpu.h" #include "amdgpu_trace.h" +#include "vega10/vega10_enum.h" + /* * PASID manager * @@ -108,6 +110,13 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef START #undef LAST +#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ + | AMDGPU_PTE_SNOOPED\ + | AMDGPU_PTE_EXECUTABLE \ + | AMDGPU_PTE_READABLE \ + | AMDGPU_PTE_WRITEABLE \ + | AMDGPU_PTE_MTYPE(MTYPE_CC)) + /* Local structure. Encapsulate some VM table update parameters to reduce * the number of function parameters */ @@ -328,9 +337,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_SHADOW); if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_SYSTEM; + init_value = AMDGPU_PTE_DEFAULT_ATC; if (level != adev->vm_manager.num_level - 1) init_value |= AMDGPU_PDE_PTE; + } /* walk over the address space and allocate the page tables */ @@ -2017,7 +2027,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(>list); if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; + init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, @@ -2627,7 +2637,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type == CHIP_RAVEN) { vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + init_pde_value = AMDGPU_PTE_DEFAULT_ATC + | AMDGPU_PDE_PTE; + } } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 2/2] drm/amdgpu: Add copy_pte_num_dw member in amdgpu_vm_pte_funcs
Use it to replace the hard coded value in amdgpu_vm_bo_update_mapping(). Change-Id: I85d89d401b8dbcf01ca9c55c281e552db874fde5 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/si_dma.c| 2 ++ 7 files changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8708476..e7de600 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -296,10 +296,14 @@ struct amdgpu_buffer_funcs { /* provided by hw blocks that can write ptes, e.g., sdma */ struct amdgpu_vm_pte_funcs { + /* number of dw to reserve per operation */ + unsignedcopy_pte_num_dw; + /* copy pte entries from GART */ void (*copy_pte)(struct amdgpu_ib *ib, uint64_t pe, uint64_t src, unsigned count); + /* write pte one entry at a time with addr mapping */ void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, uint64_t value, unsigned count, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 28d16781..8fcc743 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1597,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (pages_addr) { /* copy commands needed */ - ndw += ncmds * 7; + ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; /* and also PTEs */ ndw += nptes * 2; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c64dcd1..60cecd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1387,7 +1387,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { + .copy_pte_num_dw = 7, .copy_pte = cik_sdma_vm_copy_pte, + .write_pte = cik_sdma_vm_write_pte, .set_max_nums_pte_pde = 0x1f >> 3, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index c05eb74..acdee3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1324,7 +1324,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { + .copy_pte_num_dw = 7, .copy_pte = sdma_v2_4_vm_copy_pte, + .write_pte = sdma_v2_4_vm_write_pte, .set_max_nums_pte_pde = 0x1f >> 3, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 2079340..72f31cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1748,7 +1748,9 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, .copy_pte = sdma_v3_0_vm_copy_pte, + .write_pte = sdma_v3_0_vm_write_pte, /* not 0x3f due to HW limitation */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 798fc2d23..7bf25271 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1714,7 +1714,9 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { + .copy_pte_num_dw = 7, .copy_pte = sdma_v4_0_vm_copy_pte, + .write_pte = sdma_v4_0_vm_write_pte, .set_max_nums_pte_pde = 0x40 >> 3, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index adb6ae7..3fa2fbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -887,7 +887,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) } static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { + .copy_pte_num_dw = 5, .copy_pte = si_dma_vm_copy_pte, + .write_pte = si_dma_vm_write_pte, .set_max_nums_pte_pde = 0x8 >> 3, -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
[PATCH 1/2] drm/amdgpu: Fix a bug in amdgpu_fill_buffer()
When max_bytes is not 8 bytes aligned and bo size is larger than max_bytes, the last 8 bytes in a ttm node may be left unchanged. For example, on pre SDMA 4.0, max_bytes = 0x1f, and the bo size is 0x20, the problem will happen. In order to fix the problem, we separately store the max nums of PTEs/PDEs a single operation can set in amdgpu_vm_pte_funcs structure, rather than inferring it from bytes limit of SDMA constant fill, i.e. fill_max_bytes. Together with the fix, we replace the hard code value "10" in amdgpu_vm_bo_update_mapping() with the corresponding values from structure amdgpu_vm_pte_funcs. Change-Id: I37c588a57cb63f1a8251fb5ead2eff4b39e047c9 Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Christian König <christian.koe...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 +++-- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 3 +++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 3 +++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 3 +++ drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 +++ 8 files changed, 30 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index e45e6e9..8708476 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -304,6 +304,13 @@ struct amdgpu_vm_pte_funcs { void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, uint64_t value, unsigned count, uint32_t incr); + + /* maximum nums of PTEs/PDEs in a single operation */ + uint32_tset_max_nums_pte_pde; + + /* number of dw to reserve per operation */ + unsignedset_pte_pde_num_dw; + /* for linear pte/pde updates without addr mapping */ void (*set_pte_pde)(struct amdgpu_ib *ib, uint64_t pe, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 0e5f78f..bd43268 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1527,8 +1527,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ - uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; + uint32_t max_bytes = 8 * + adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct drm_mm_node *mm_node; @@ -1560,8 +1560,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, ++mm_node; } - /* 10 double words for each SDMA_OP_PTEPDE cmd */ - num_dw = num_loops * 10; + /* num of dwords for each SDMA_OP_PTEPDE cmd */ + num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; /* for IB padding */ num_dw += 64; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 6c11332..28d16781 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1606,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, } else { /* set page commands needed */ - ndw += ncmds * 10; + ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw; /* extra commands for begin/end fragments */ - ndw += 2 * 10 * adev->vm_manager.fragment_size; + ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw + * adev->vm_manager.fragment_size; params.func = amdgpu_vm_do_set_ptes; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index f508f4d..c64dcd1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1389,6 +1389,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { .copy_pte = cik_sdma_vm_copy_pte, .write_pte = cik_sdma_vm_write_pte, + + .set_max_nums_pte_pde = 0x1f >> 3, + .set_pte_pde_num_dw = 10, .set_pte_pde = cik_sdma_vm_set_pte_pde, }; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index f2d0710..c05eb74 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1326,6 +1326,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) static const struct amdgpu_vm_pte_func
[PATCH 1/2] drm/amdgpu: Correct bytes limit for SDMA 3.0 copy and fill
Change-Id: I10fc5efbc303056c5c5c4dc4f4dd2c3186595a91 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 728c0d8..4e7fe07 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1730,11 +1730,11 @@ static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, } static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { - .copy_max_bytes = 0x1f, + .copy_max_bytes = 0x3fffe0, /* not 0x3f due to HW limitation*/ .copy_num_dw = 7, .emit_copy_buffer = sdma_v3_0_emit_copy_buffer, - .fill_max_bytes = 0x1f, + .fill_max_bytes = 0x3fffe0, /* not 0x3f due to HW limitation*/ .fill_num_dw = 5, .emit_fill_buffer = sdma_v3_0_emit_fill_buffer, }; -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 1/2] drm/amdgpu: Add a parameter to amdgpu_bo_create()
Got it. On 2017-07-28 05:01 AM, Christian König wrote: Am 27.07.2017 um 21:48 schrieb Yong Zhao: The parameter init_value contains the value to which we initialized VRAM bo when AMDGPU_GEM_CREATE_VRAM_CLEARED flag is set. Change-Id: I9ef3b9dd3ca9b98cc25dd2eaff68fbe1129c3e3c Signed-off-by: Yong Zhao <yong.z...@amd.com> I'm about to push a cleanup which removes a bunch of references to amdgpu_bo_create(), so don't be surprised when you need to rebase your patch once more before pushing. The patch is Reviewed-by: Christian König <christian.koe...@amd.com> and that rebase should only require you to remove changes, so feel free to push it after the rebase. Christian. --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 12 +--- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h| 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 20 files changed, 43 insertions(+), 34 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2292c77..6d2bd80 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -185,7 +185,8 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, - AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); + AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0, + &(*mem)->bo); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 2fb299a..63ec1e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, - NULL, ); + NULL, 0, ); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, - NULL, ); + NULL, 0, ); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 3d41cd4..343cdf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, NULL, , NULL, - ); + 0, ); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fe6783e..cf81f9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -344,7 +344,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, - NULL, NULL, >vram_scratch.robj); + NULL, NULL, 0, >vram_scratch.robj); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 124b237..8cd79dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/g
[PATCH 2/2] drm/amdgpu: Support IOMMU on Raven
We achieved that by setting the PTEs to 2 (the SYSTEM bit is set) when the corresponding addresses are not occupied by gpu driver allocated buffers. Change-Id: I995c11c7a25bdaf7a16700d9e08a8fe287d49417 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 18 -- drivers/gpu/drm/amd/amdgpu/amdgpu_object.h| 6 -- drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c| 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 27 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h| 3 +++ drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 10 +- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 21 files changed, 70 insertions(+), 42 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 2292c77..fce2fa5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -185,7 +185,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, return -ENOMEM; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, -AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo); +AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, &(*mem)->bo, 0); if (r) { dev_err(adev->dev, "failed to allocate BO for amdkfd (%d)\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 2fb299a..56445ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, n = AMDGPU_BENCHMARK_ITERATIONS; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL, -NULL, ); +NULL, , 0); if (r) { goto out_cleanup; } @@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size, goto out_cleanup; } r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL, -NULL, ); +NULL, , 0); if (r) { goto out_cleanup; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 3d41cd4..ce19419 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device *cgs_device, ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE, true, domain, flags, NULL, , NULL, - ); + , 0); if (ret) { DRM_ERROR("(%d) bo create failed\n", ret); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fe6783e..d8cd14f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -344,7 +344,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device *adev) PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM, AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, -NULL, NULL, >vram_scratch.robj); +NULL, NULL, >vram_scratch.robj, 0); if (r) { return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index 124b237..a6d7f55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -141,7 +141,7 @@ int amdgpu
[PATCH 1/2] drm/amdgpu: Add support for filling a buffer with 64 bit value
That function will be used later to support setting a page table block with 64 bit value. Change-Id: Ib142ebd4163d6e23670a3f0ceed536d59133b942 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 19 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4d2a454..6ab30da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1509,11 +1509,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct reservation_object *resv, - struct dma_fence **fence) + uint64_t src_data, + struct reservation_object *resv, + struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/ uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -1545,7 +1546,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + /* 10 double words for each SDMA_OP_PTEPDE cmd */ + num_dw = num_loops * 10; /* for IB padding */ num_dw += 64; @@ -1570,12 +1573,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint32_t byte_count = mm_node->size << PAGE_SHIFT; uint64_t dst_addr; + WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8"); + dst_addr = amdgpu_mm_node_addr(>tbo, mm_node, >tbo.mem); while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_fill_buffer(adev, >ibs[0], src_data, - dst_addr, cur_size_in_bytes); + amdgpu_vm_set_pte_pde(adev, >ibs[0], + dst_addr, 0, + cur_size_in_bytes >> 3, 0, + src_data); dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f137c24..0e2399f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -73,7 +73,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, struct reservation_object *resv, struct dma_fence **fence); -- 2.7.4 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Changes for enabling ATS support from PTE
Hi there, Attached are two patches made to amdgpu in order to support ATS on Raven. Please review them. Regards, Yong >From 0657ddb14a16d1b809c419b51e805287fb6a9989 Mon Sep 17 00:00:00 2001 From: Yong Zhao <yong.z...@amd.com> Date: Thu, 20 Jul 2017 18:44:10 -0400 Subject: [PATCH 1/2] drm/amdgpu: Add support for filling a buffer with 64 bit value That function will be used later to support setting a page table block with 64 bit value. Change-Id: Ib142ebd4163d6e23670a3f0ceed536d59133b942 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 38 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h| 2 +- 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 21e0814..4dfec57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -416,7 +416,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev, bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) { struct dma_fence *fence; - r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, ); + r = amdgpu_fill_buffer(bo, 0, 4, bo->tbo.resv, ); if (unlikely(r)) goto fail_unreserve; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 7820e81..99db4aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1611,11 +1611,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, } int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, - struct reservation_object *resv, - struct dma_fence **fence) + uint64_t src_data, unsigned int word_size, + struct reservation_object *resv, + struct dma_fence **fence) { struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + /* max_bytes applies to both SDMA_OP_CONST_FILL and SDMA_OP_PTEPDE */ uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes; struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; @@ -1647,7 +1648,17 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, num_pages -= mm_node->size; ++mm_node; } - num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + + switch (word_size) { + case 4: + num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw; + break; + case 8: /* 10 double words for each SDMA_OP_PTEPDE cmd */ + num_dw = num_loops * 10; + break; + default: + return -EINVAL; + } /* for IB padding */ num_dw += 64; @@ -1676,8 +1687,23 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, while (byte_count) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_fill_buffer(adev, >ibs[0], src_data, - dst_addr, cur_size_in_bytes); + switch (word_size) { + case 4: /* only take the lower 32 bits of src_data */ +amdgpu_emit_fill_buffer(adev, >ibs[0], + (uint32_t)src_data, dst_addr, + cur_size_in_bytes); +break; + case 8: +WARN_ONCE(cur_size_in_bytes & 0x7, + "size should be a multiple of 8"); +amdgpu_vm_set_pte_pde(adev, >ibs[0], + dst_addr, 0, + cur_size_in_bytes >> 3, 0, + src_data); +break; + } + + dst_addr += cur_size_in_bytes; byte_count -= cur_size_in_bytes; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a22e430..067e5e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -73,7 +73,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); int amdgpu_fill_buffer(struct amdgpu_bo *bo, - uint32_t src_data, + uint64_t src_data, unsigned int word_size, struct reservation_object *resv, struct dma_fence **fence); -- 2.7.4 >From e0bb154b8ae014989e88a40f19379eec9a8b Mon Sep 17 00:00:00 2001 From: Yong Zhao <yong.z...@amd.com> Date: Thu, 20 Jul 2017 18:49:09 -0400 Subject: [PATCH 2/2] drm/amdgpu: Support IOMMU on Raven We achieved that by setting the PTEs to 2 (the SYSTEM bit is set) when the corresponding addresses are not occupied by gpu driver allocated buffers. Change-Id: I995c11c7a25bdaf7a16700d9e08a8fe287d49417 Signed-off-by: Yong Zhao <yong.z...@amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c| 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 8 include/uapi/drm/amdgpu_drm.h | 2 ++ 5 files changed, 39 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 9182def..433a90e
Re: [PATCH 06/12] drm/amdgpu: Correctly establish the suspend/resume hook for amdkfd
Hi Alex, As far as I know, we never tested suspend/resume on the setting you mentioned. Theoretically it should work. When I read the code now, I was wondering whether we should stop kfd before amdgpu_bo_evict_vram() and amdgpu_fence_driver_suspend(). If that's not needed, it may make more sense to stick to the previous design which kept the kfd suspend/resume inside your IP block suspend/resume. Regards, Yong On 2017-07-06 05:06 PM, Alex Deucher wrote: On Mon, Jul 3, 2017 at 5:11 PM, Felix Kuehling <felix.kuehl...@amd.com> wrote: From: Yong Zhao <yong.z...@amd.com> Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com> Does this work properly for multiple GPUs? E.g., if one is suspended and another is not? E.g., PX laptops where we runtime suspend the dGPU while the APU is still running. Alex --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++ drivers/gpu/drm/amd/amdgpu/cik.c | 9 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5b1220f..bc69b9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -56,6 +56,8 @@ #include #include "amdgpu_vf_error.h" +#include "amdgpu_amdkfd.h" + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -2397,6 +2399,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) drm_modeset_unlock_all(dev); } + amdgpu_amdkfd_suspend(adev); + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, >mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); @@ -2537,6 +2541,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } } } + r = amdgpu_amdkfd_resume(adev); + if (r) + return r; /* blat the mode back in */ if (fbcon) { diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 6ce9f80..00639bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1825,21 +1825,14 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_amdkfd_suspend(adev); - return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = cik_common_hw_init(adev); - if (r) - return r; - - return amdgpu_amdkfd_resume(adev); + return cik_common_hw_init(adev); } static bool cik_common_is_idle(void *handle) -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx
Re: [PATCH 06/12] drm/amdgpu: Correctly establish the suspend/resume hook for amdkfd
On 2017-07-06 05:44 PM, Alex Deucher wrote: On Thu, Jul 6, 2017 at 5:33 PM, Yong Zhao <yong.z...@amd.com> wrote: Hi Alex, As far as I know, we never tested suspend/resume on the setting you mentioned. Theoretically it should work. Are the kfd s/r entry points global or per GPU? If you have two GPUs and you suspend one, will it suspend the entire kfd? I'm fine with the change, it's no worse than the current situation. Mostly just curious. kfd s/r is per GPU. If we suspend only one out of two GPUs, the other one will keep working. When I read the code now, I was wondering whether we should stop kfd before amdgpu_bo_evict_vram() and amdgpu_fence_driver_suspend(). If that's not needed, it may make more sense to stick to the previous design which kept the kfd suspend/resume inside your IP block suspend/resume. I think it makes more sense to put the kfd calls in the common device s/r code rather than in the soc specific ip functions. Change is: Reviewed-by: Alex Deucher <alexander.deuc...@amd.com> Regards, Yong On 2017-07-06 05:06 PM, Alex Deucher wrote: On Mon, Jul 3, 2017 at 5:11 PM, Felix Kuehling <felix.kuehl...@amd.com> wrote: From: Yong Zhao <yong.z...@amd.com> Signed-off-by: Yong Zhao <yong.z...@amd.com> Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com> Does this work properly for multiple GPUs? E.g., if one is suspended and another is not? E.g., PX laptops where we runtime suspend the dGPU while the APU is still running. Alex --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++ drivers/gpu/drm/amd/amdgpu/cik.c | 9 + 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 5b1220f..bc69b9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -56,6 +56,8 @@ #include #include "amdgpu_vf_error.h" +#include "amdgpu_amdkfd.h" + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); @@ -2397,6 +2399,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) drm_modeset_unlock_all(dev); } + amdgpu_amdkfd_suspend(adev); + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, >mode_config.crtc_list, head) { struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); @@ -2537,6 +2541,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) } } } + r = amdgpu_amdkfd_resume(adev); + if (r) + return r; /* blat the mode back in */ if (fbcon) { diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 6ce9f80..00639bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1825,21 +1825,14 @@ static int cik_common_suspend(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_amdkfd_suspend(adev); - return cik_common_hw_fini(adev); } static int cik_common_resume(void *handle) { - int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = cik_common_hw_init(adev); - if (r) - return r; - - return amdgpu_amdkfd_resume(adev); + return cik_common_hw_init(adev); } static bool cik_common_is_idle(void *handle) -- 1.9.1 ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx ___ amd-gfx mailing list amd-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/amd-gfx