from:"Yong Zhao"

[PATCH] drm/amdgpu: Improve the MTYPE comments

2020-05-26 Thread Yong Zhao

Use words insteads of acronym for better understanding.

Signed-off-by: Yong Zhao 
---
 include/uapi/drm/amdgpu_drm.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index d65f9b4ba05c..0072ddb59747 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -564,15 +564,15 @@ struct drm_amdgpu_gem_op {
 #define AMDGPU_VM_MTYPE_MASK   (0xf << 5)
 /* Default MTYPE. Pre-AI must use this.  Recommended for newer ASICs. */
 #define AMDGPU_VM_MTYPE_DEFAULT(0 << 5)
-/* Use NC MTYPE instead of default MTYPE */
+/* Use Non Coherent MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_NC (1 << 5)
-/* Use WC MTYPE instead of default MTYPE */
+/* Use Write Combine MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_WC (2 << 5)
-/* Use CC MTYPE instead of default MTYPE */
+/* Use Cache Coherent MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_CC (3 << 5)
-/* Use UC MTYPE instead of default MTYPE */
+/* Use UnCached MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_UC (4 << 5)
-/* Use RW MTYPE instead of default MTYPE */
+/* Use Read Write MTYPE instead of default MTYPE */
 #define AMDGPU_VM_MTYPE_RW (5 << 5)
 
 struct drm_amdgpu_gem_va {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Print CU information by default during initialization

2020-04-17 Thread Yong Zhao

This is convenient for multiple teams to obtain the information. Also,
add device info by using dev_info().

Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 71ea56e220ae..423eed223aa5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3170,7 +3170,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
-   DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+   dev_info(adev->dev,
+   "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
adev->gfx.config.max_shader_engines,
adev->gfx.config.max_sh_per_se,
adev->gfx.config.max_cu_per_sh,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdgpu: Print CU information by default during initialization

2020-04-17 Thread Yong Zhao

This is convenient for multiple teams to obtain the information.

Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 71ea56e220ae..92b7a1ff1dc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3170,7 +3170,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
-   DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+   DRM_INFO("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
adev->gfx.config.max_shader_engines,
adev->gfx.config.max_sh_per_se,
adev->gfx.config.max_cu_per_sh,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdkfd: Adjust three dmesg printings during initialization

2020-04-17 Thread Yong Zhao

Delete two printings which are not very useful, and change one from
pr_info() to pr_debug().

Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index de9f68d5c312..1009a3b8dcc2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -502,7 +502,7 @@ int kfd_parse_crat_table(void *crat_image, struct list_head 
*device_list,
num_nodes = crat_table->num_domains;
image_len = crat_table->length;
 
-   pr_info("Parsing CRAT table with %d nodes\n", num_nodes);
+   pr_debug("Parsing CRAT table with %d nodes\n", num_nodes);
 
for (node_id = 0; node_id < num_nodes; node_id++) {
top_dev = kfd_create_topology_device(device_list);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 5db42814dd51..46dcf74ee2e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -787,7 +787,6 @@ static int kfd_topology_update_sysfs(void)
 {
int ret;
 
-   pr_info("Creating topology SYSFS entries\n");
if (!sys_props.kobj_topology) {
sys_props.kobj_topology =
kfd_alloc_struct(sys_props.kobj_topology);
@@ -1048,7 +1047,6 @@ int kfd_topology_init(void)
sys_props.generation_count++;
kfd_update_system_properties();
kfd_debug_print_topology();
-   pr_info("Finished initializing topology\n");
} else
pr_err("Failed to update topology in sysfs ret=%d\n", ret);
 
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdgpu: Adjust the SDMA doorbell info printing

2020-04-17 Thread Yong Zhao

Add more detail while turning off the printing by default, because it
is very useful.

Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 352cd9632770..c0ca9a8229e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1851,7 +1851,7 @@ static int sdma_v4_0_sw_init(void *handle)
ring->ring_obj = NULL;
ring->use_doorbell = true;
 
-   DRM_INFO("use_doorbell being set to: [%s]\n",
+   DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
 
/* doorbell size is 2 dwords, get DWORD offset */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index f7b2bcf3f293..764f455ada75 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -1260,7 +1260,7 @@ static int sdma_v5_0_sw_init(void *handle)
ring->ring_obj = NULL;
ring->use_doorbell = true;
 
-   DRM_INFO("use_doorbell being set to: [%s]\n",
+   DRM_DEBUG("SDMA %d use_doorbell being set to: [%s]\n", i,
ring->use_doorbell?"true":"false");
 
ring->doorbell_index = (i == 0) ?
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Use pr_debug to print the message of reaching event limit

2020-03-09 Thread Yong Zhao

People are inclined to think of the previous pr_warn message as an
error, so use pre_debug instead.

Change-Id: I3ac565a2bd3b8d57345812104c872183898d237f
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 1f8365575b12..15476fca8fa6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -187,7 +187,7 @@ static int create_signal_event(struct file *devkfd,
if (p->signal_mapped_size &&
p->signal_event_count == p->signal_mapped_size / 8) {
if (!p->signal_event_limit_reached) {
-   pr_warn("Signal event wasn't created because limit was 
reached\n");
+   pr_debug("Signal event wasn't created because limit was 
reached\n");
p->signal_event_limit_reached = true;
}
return -ENOSPC;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags

2020-03-06 Thread Yong Zhao

ALLOC_MEM_FLAGS_* used are the same as the KFD_IOC_ALLOC_MEM_FLAGS_*,
but they are interweavedly used in kernel driver, resulting in bad
readability. For example, KFD_IOC_ALLOC_MEM_FLAGS_COHERENT is not
referenced in kernel, and it functions implicitly in kernel through
ALLOC_MEM_FLAGS_COHERENT, causing unnecessary confusion.

Replace all occurrences of ALLOC_MEM_FLAGS_* with
KFD_IOC_ALLOC_MEM_FLAGS_* to solve the problem.

Change-Id: Iced6ed3698167296c97b14e7e4569883859d619c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  6 ++--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 29 ++-
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 13 +
 .../gpu/drm/amd/include/kgd_kfd_interface.h   | 21 --
 4 files changed, 27 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 726c91ab6761..abfbe89e805e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include "amdgpu_xgmi.h"
+#include 
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -501,10 +502,11 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, 
int dma_buf_fd,
   metadata_size, _flags);
if (flags) {
*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-   ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
+   KFD_IOC_ALLOC_MEM_FLAGS_VRAM
+   : KFD_IOC_ALLOC_MEM_FLAGS_GTT;
 
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
-   *flags |= ALLOC_MEM_FLAGS_PUBLIC;
+   *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
}
 
 out_put:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e4481caed648..9dff792c9290 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -29,6 +29,7 @@
 #include "amdgpu_vm.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_dma_buf.h"
+#include 
 
 /* BO flag to indicate a KFD userptr BO */
 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -400,18 +401,18 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
amdgpu_sync *sync)
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
-   bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
+   bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
uint32_t mapping_flags;
 
mapping_flags = AMDGPU_VM_PAGE_READABLE;
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
switch (adev->asic_type) {
case CHIP_ARCTURUS:
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev)
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1160,24 +1161,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/*
 * Check on which domain to allocate BO
 */
-   if (flags & ALLOC_MEM_FLAGS_VRAM) {
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-   alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+   alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-   } else if (flags & ALLOC_MEM_FLAGS_GTT) {
+   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
-   } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flags = 0;
if (!offset || !*offset)
return -EINVAL;
user_addr = untagged_addr(*offset);
-   } els

[PATCH] drm/amdgpu: Use better names to reflect it is CP MQD buffer

2020-03-04 Thread Yong Zhao

Add "CP" to AMDGPU_GEM_CREATE_MQD_GFX9 to indicate it is only for CP MQD.

Change-Id: Ie69cd3ba477e4bac161ea5b20ec2919a35f3528e
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 6 +++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 7 +--
 include/uapi/drm/amdgpu_drm.h  | 2 +-
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index bc2e72a66db9..726c91ab6761 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -224,7 +224,7 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
 
 int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
-   void **cpu_ptr, bool mqd_gfx9)
+   void **cpu_ptr, bool cp_mqd_gfx9)
 {
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
struct amdgpu_bo *bo = NULL;
@@ -240,8 +240,8 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t 
size,
bp.type = ttm_bo_type_kernel;
bp.resv = NULL;
 
-   if (mqd_gfx9)
-   bp.flags |= AMDGPU_GEM_CREATE_MQD_GFX9;
+   if (cp_mqd_gfx9)
+   bp.flags |= AMDGPU_GEM_CREATE_CP_MQD_GFX9;
 
r = amdgpu_bo_create(adev, , );
if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index fca87bafe174..665db2353a78 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1043,7 +1043,7 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
if (amdgpu_bo_encrypted(abo))
flags |= AMDGPU_PTE_TMZ;
 
-   if (abo->flags & AMDGPU_GEM_CREATE_MQD_GFX9) {
+   if (abo->flags & AMDGPU_GEM_CREATE_CP_MQD_GFX9) {
uint64_t page_idx = 1;
 
r = amdgpu_gart_bind(adev, gtt->offset, page_idx,
@@ -1051,7 +1051,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
if (r)
goto gart_bind_fail;
 
-   /* Patch mtype of the second part BO */
+   /* The memory type of the first page defaults to UC. Now
+* modify the memory type to NC from the second page of
+* the BO onward.
+*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index eaf94a421901..1e59c0146531 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -130,7 +130,7 @@ extern "C" {
 /* Flag that indicates allocating MQD gart on GFX9, where the mtype
  * for the second page onward should be set to NC.
  */
-#define AMDGPU_GEM_CREATE_MQD_GFX9 (1 << 8)
+#define AMDGPU_GEM_CREATE_CP_MQD_GFX9  (1 << 8)
 /* Flag that BO may contain sensitive data that must be wiped before
  * releasing the memory
  */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Consolidate duplicated bo alloc flags

2020-03-04 Thread Yong Zhao

ALLOC_MEM_FLAGS_* used are the same as the KFD_IOC_ALLOC_MEM_FLAGS_*,
but they are interweavedly used in kernel driver, resulting in bad
readability. For example, KFD_IOC_ALLOC_MEM_FLAGS_COHERENT is totally
not referenced in kernel, and it functions in the kernel through
ALLOC_MEM_FLAGS_COHERENT, causing unnecessary confusion.

Replace all occurrences of ALLOC_MEM_FLAGS_* by
KFD_IOC_ALLOC_MEM_FLAGS_* to solve the problem.

Change-Id: Iced6ed3698167296c97b14e7e4569883859d619c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  9 +++--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 38 +++
 drivers/gpu/drm/amd/amdkfd/kfd_process.c  | 13 ---
 .../gpu/drm/amd/include/kgd_kfd_interface.h   | 21 --
 4 files changed, 36 insertions(+), 45 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 726c91ab6761..affaa0d4b636 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include "amdgpu_xgmi.h"
+#include 
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
@@ -500,11 +501,13 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, 
int dma_buf_fd,
r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
   metadata_size, _flags);
if (flags) {
-   *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
-   ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
+   if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM)
+   *flags = KFD_IOC_ALLOC_MEM_FLAGS_VRAM;
+   else
+   *flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT;
 
if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
-   *flags |= ALLOC_MEM_FLAGS_PUBLIC;
+   *flags |= KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC;
}
 
 out_put:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index e4481caed648..c81fe7011e88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -29,6 +29,7 @@
 #include "amdgpu_vm.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_dma_buf.h"
+#include 
 
 /* BO flag to indicate a KFD userptr BO */
 #define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
@@ -400,18 +401,18 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct 
amdgpu_sync *sync)
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
-   bool coherent = mem->alloc_flags & ALLOC_MEM_FLAGS_COHERENT;
+   bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
uint32_t mapping_flags;
 
mapping_flags = AMDGPU_VM_PAGE_READABLE;
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_WRITABLE)
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_EXECUTABLE)
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
switch (adev->asic_type) {
case CHIP_ARCTURUS:
-   if (mem->alloc_flags & ALLOC_MEM_FLAGS_VRAM) {
+   if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev)
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
@@ -1160,24 +1161,24 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
/*
 * Check on which domain to allocate BO
 */
-   if (flags & ALLOC_MEM_FLAGS_VRAM) {
+   if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM;
alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE;
-   alloc_flags |= (flags & ALLOC_MEM_FLAGS_PUBLIC) ?
+   alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ?
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED :
AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-   } else if (flags & ALLOC_MEM_FLAGS_GTT) {
+   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_flags = 0;
-   } else if (flags & ALLOC_MEM_FLAGS_USERPTR) {
+   } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
domain = AMDGPU_GEM_DOMAIN_GTT;
alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
alloc_flag

[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround

2020-03-04 Thread Yong Zhao

Because too many things are involved in this workaround, we need more
comments to avoid pitfalls.

Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c|  5 -
 .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c| 18 +++---
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1947a326de57..10f6f4b21b44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
if (r)
goto gart_bind_fail;
 
-   /* Patch mtype of the second part BO */
+   /* The memory type of the first page defaults to UC. Now
+* modify the memory type to NC from the second page of
+* the BO onward.
+*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 436b7f518979..5b11190ff6e6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -87,9 +87,21 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
 
-   /* From V9,  for CWSR, the control stack is located on the next page
-* boundary after the mqd, we will use the gtt allocation function
-* instead of sub-allocation function.
+   /* For V9 only, due to a HW bug, the control stack of a user mode
+* compute queue needs to be allocated just behind the page boundary
+* of its regular MQD buffer. So we allocate an enlarged MQD buffer:
+* the first page of the buffer serves as the regular MQD buffer
+* purpose and the remaining is for control stack. Although the two
+* parts are in the same buffer object, they need different memory
+* types: MQD part needs UC (uncached) as usual, while control stack
+* needs NC (non coherent), which is different from the UC type which
+* is used when control stack is allocated in user space.
+*
+* Because of all those, we use the gtt allocation function instead
+* of sub-allocation function for this enlarged MQD buffer. Moreover,
+* in order to achieve two memory types in a single buffer object, we
+* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct
+* amdgpu memory functions to do so.
 */
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround

2020-02-26 Thread Yong Zhao

Because too many things are involved in this workaround, we need more
comments to avoid pitfalls.

Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  5 -
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 17 ++---
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..a6c8e4cfc051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
if (r)
goto gart_bind_fail;
 
-   /* Patch mtype of the second part BO */
+   /* The memory type of the first page defaults to UC. Now
+* modify the memory type to NC from the second page of
+* the BO onward.
+*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 436b7f518979..ff2e84872721 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -87,9 +87,20 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
 
-   /* From V9,  for CWSR, the control stack is located on the next page
-* boundary after the mqd, we will use the gtt allocation function
-* instead of sub-allocation function.
+   /* For V9 only, due to a HW bug, the control stack of a user mode
+* compute queue needs to be allocated just behind the page boundary
+* of its regular MQD buffer. So we allocate an enlarged MQD buffer:
+* the first page of the buffer serves as the regular MQD buffer
+* purpose and the remaining is for control stack. Although the two
+* parts are in the same buffer object, they need different memory
+* type: MQD part needs UC (uncached) as usual, while control stack
+* needs NC (non coherent).
+*
+* Because of all those, we use the gtt allocation function instead
+* of sub-allocation function for this enlarged MQD buffer. Moreover,
+* in order to achieve two memory types in a single buffer object, we
+* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct
+* amdgpu memory functions to do so.
 */
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Add more comments on GFX9 user CP queue MQD workaround

2020-02-26 Thread Yong Zhao

Because two many things are involved in this workaround, we need more
comments to avoid pitfalls.

Change-Id: I5d7917296dd5f5edb45921118cf8e7d778d40de1
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c |  5 -
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 16 +---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 660867cf2597..a6c8e4cfc051 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1041,7 +1041,10 @@ int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
if (r)
goto gart_bind_fail;
 
-   /* Patch mtype of the second part BO */
+   /* The memory type of the first page defaults to UC. Now
+* modify the memory type to NC from the second page of
+* the BO onward.
+*/
flags &= ~AMDGPU_PTE_MTYPE_VG10_MASK;
flags |= AMDGPU_PTE_MTYPE_VG10(AMDGPU_MTYPE_NC);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 436b7f518979..06a3d9ead510 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -87,9 +87,19 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
int retval;
struct kfd_mem_obj *mqd_mem_obj = NULL;
 
-   /* From V9,  for CWSR, the control stack is located on the next page
-* boundary after the mqd, we will use the gtt allocation function
-* instead of sub-allocation function.
+   /* For V9 only, due to a HW bug, the control stack of user mode
+* compute queues needs to be allocated just behind the page boundary
+* of its MQD buffer. So we allocate a enlarged MQD buffer: the first
+* page of which serves as the regular MQD buffer purpose. Although
+* the two parts are in the same buffer object, they need different
+* memory type: MQD part needs UC (uncached) as usual, while control
+* stack needs NC (non coherent).
+*
+* Because of all those, we use the gtt allocation function instead
+* of sub-allocation function for this enlarged MQD buffer. Moreover,
+* in order to achieve two memory types in a single buffer object, we
+* pass a special bo flag AMDGPU_GEM_CREATE_MQD_GFX9 to instruct
+* amdgpu memory functions to do so.
 */
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: Make get_tile_config() generic

2020-02-26 Thread Yong Zhao

Given we can query all the asic specific information from amdgpu_gfx_config,
we can make get_tile_config() generic.

Change-Id: I1080fec4d50c51bc84bb49b0145f8fec50081fce
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h|  3 ++
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   |  1 -
 .../drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c| 33 ---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 26 ---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 26 ---
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 23 -
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 22 +
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c  |  2 +-
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  4 ---
 10 files changed, 26 insertions(+), 116 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index b0ad3be0b03f..13feb313e9b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -242,6 +242,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
 
+int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+   struct tile_config *config);
+
 /* KGD2KFD callbacks */
 int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 4bcc175a149d..d6549e5ea7e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -319,7 +319,6 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.address_watch_get_offset = kgd_gfx_v9_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
-   .get_tile_config = kgd_gfx_v9_get_tile_config,
.set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
index ca91fffb8a36..4ec6d0c03201 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@@ -42,38 +42,6 @@ enum hqd_dequeue_request_type {
SAVE_WAVES
 };
 
-/* Because of REG_GET_FIELD() being used, we put this function in the
- * asic specific file.
- */
-static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
-   struct tile_config *config)
-{
-   struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
-
-   config->gb_addr_config = adev->gfx.config.gb_addr_config;
-#if 0
-/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
- * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
- * changes commented out related code, doing the same here for now but
- * need to sync with Ken et al
- */
-   config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-   MC_ARB_RAMCFG, NOOFBANK);
-   config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
-   MC_ARB_RAMCFG, NOOFRANKS);
-#endif
-
-   config->tile_config_ptr = adev->gfx.config.tile_mode_array;
-   config->num_tile_configs =
-   ARRAY_SIZE(adev->gfx.config.tile_mode_array);
-   config->macro_tile_config_ptr =
-   adev->gfx.config.macrotile_mode_array;
-   config->num_macro_tile_configs =
-   ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
-
-   return 0;
-}
-
 static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
 {
return (struct amdgpu_device *)kgd;
@@ -805,7 +773,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.address_watch_get_offset = kgd_address_watch_get_offset,
.get_atc_vmid_pasid_mapping_info =
get_atc_vmid_pasid_mapping_info,
-   .get_tile_config = amdgpu_amdkfd_get_tile_config,
.set_vm_context_page_table_base = set_vm_context_page_table_base,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
.get_unique_id = amdgpu_amdkfd_get_unique_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 8f052e98a3c6..0b7e78748540 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -84,31 +84,6 @@ union TCP_WATCH_CNTL_BITS {
float f32All;
 };
 
-/* Because of REG_GET_FIELD() being used, we put this function in the
- * asic specific file.
- */
-static int get_tile_config(struct kgd_dev *kgd,
-   str

[PATCH 1/2] drm/amdgpu: Add num_banks and num_ranks to gfx config structure

2020-02-26 Thread Yong Zhao

The two members will be used by KFD later.

Change-Id: I36a605e359b242f2fe546fb67f8e402c48a62342
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c   | 5 +
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c   | 5 +
 3 files changed, 12 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index ca17ffb01301..37ba05b63b2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -151,6 +151,8 @@ struct amdgpu_gfx_config {
unsigned num_gpus;
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
+   unsigned num_banks;
+   unsigned num_ranks;
unsigned gb_addr_config;
unsigned num_rbs;
unsigned gs_vgt_table_depth;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 8f20a5dd44fe..172905dadf9a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -4338,6 +4338,11 @@ static void gfx_v7_0_gpu_early_init(struct amdgpu_device 
*adev)
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
+   adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+   MC_ARB_RAMCFG, NOOFBANK);
+   adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+   MC_ARB_RAMCFG, NOOFRANKS);
+
adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
adev->gfx.config.mem_max_burst_length_bytes = 256;
if (adev->flags & AMD_IS_APU) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index fa245973de12..e63f98b2d389 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1820,6 +1820,11 @@ static int gfx_v8_0_gpu_early_init(struct amdgpu_device 
*adev)
adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
 
+   adev->gfx.config.num_banks = REG_GET_FIELD(mc_arb_ramcfg,
+   MC_ARB_RAMCFG, NOOFBANK);
+   adev->gfx.config.num_ranks = REG_GET_FIELD(mc_arb_ramcfg,
+   MC_ARB_RAMCFG, NOOFRANKS);
+
adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
adev->gfx.config.mem_max_burst_length_bytes = 256;
if (adev->flags & AMD_IS_APU) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Increase timout on emulator to tenfold instead of twice

2020-02-25 Thread Yong Zhao

Since emulators are slower, sometime some operations like flushing tlb
through FM need more than twice the regular timout of 100ms, so increase
the timeout to 1s on emulators.

Change-Id: Idf1ff571dd2fe69d3a236d916262ad65f86c44e8
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 28a86752567e..8ef8a49b9255 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2834,7 +2834,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
if (amdgpu_emu_mode == 1)
-   adev->usec_timeout *= 2;
+   adev->usec_timeout *= 10;
adev->gmc.gart_size = 512 * 1024 * 1024;
adev->accel_working = false;
adev->num_rings = 0;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 5/6] drm/amdkfd: Delete excessive printings

2020-02-24 Thread Yong Zhao

Those printings are duplicated or useless.

Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a3c44d88314b..958275db3f55 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -297,8 +297,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
struct mqd_manager *mqd_mgr;
int retval;
 
-   print_queue(q);
-
dqm_lock(dqm);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index c604a2ede3f5..3bfa5c8d9654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -257,7 +257,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
 
@@ -278,7 +277,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -299,7 +297,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
 
if (retval != 0) {
-   pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
+   pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
pqm->process->pasid, type, retval);
goto err_create_queue;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/6] drm/amdkfd: Rename queue_count to active_queue_count

2020-02-24 Thread Yong Zhao

The name is easier to understand the code.

Change-Id: I9064dab1d022e02780023131f940fff578a06b72
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  4 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 80d22bf702e8..7ef9b89f5c70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -359,7 +359,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active)
-   dqm->queue_count++;
+   dqm->active_queue_count++;
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
@@ -494,7 +494,7 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
}
qpd->queue_count--;
if (q->properties.is_active)
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
return retval;
 }
@@ -563,13 +563,13 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
/*
 * check active state vs. the previous state and modify
 * counter accordingly. map_queues_cpsch uses the
-* dqm->queue_count to determine whether a new runlist must be
+* dqm->active_queue_count to determine whether a new runlist must be
 * uploaded.
 */
if (q->properties.is_active && !prev_active)
-   dqm->queue_count++;
+   dqm->active_queue_count++;
else if (!q->properties.is_active && prev_active)
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
@@ -618,7 +618,7 @@ static int evict_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -662,7 +662,7 @@ static int evict_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = false;
-   dqm->queue_count--;
+   dqm->active_queue_count--;
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
@@ -731,7 +731,7 @@ static int restore_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
-   dqm->queue_count++;
+   dqm->active_queue_count++;
 
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -786,7 +786,7 @@ static int restore_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = true;
-   dqm->queue_count++;
+   dqm->active_queue_count++;
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -899,7 +899,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
-   dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+   dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
 
@@ -924,7 +924,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 {
int i;
 
-   WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+   WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
 
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -1064,7 +1064,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
 
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
-   dqm->queue_count = dqm->processes_count = 0;
+   dqm->active_queue_count = dqm->processes_count = 0;

[PATCH 6/6] drm/amdkfd: Delete unnecessary unmap queue package submissions

2020-02-24 Thread Yong Zhao

The previous SDMA queue counting was wrong. In addition, after confirming
with MEC firmware team, we understands that only one unmap queue package,
instead of one unmap queue package for CP and each SDMA engine, is needed,
which results in much simpler driver code.

Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 79 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 -
 .../amd/amdkfd/kfd_process_queue_manager.c| 16 ++--
 3 files changed, 29 insertions(+), 68 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 958275db3f55..692abfd2088a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
device_queue_manager *dqm)
return dqm->dev->device_info->num_xgmi_sdma_engines;
 }
 
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+   return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
return dqm->dev->device_info->num_sdma_engines
@@ -375,11 +380,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
if (q->properties.is_active)
increment_queue_count(dqm, q->properties.type);
 
-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
@@ -460,15 +460,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
-   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -915,8 +913,6 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
 
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -981,8 +977,11 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
int bit;
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   if (dqm->sdma_bitmap == 0)
+   if (dqm->sdma_bitmap == 0) {
+   pr_err("No more SDMA queue to allocate\n");
return -ENOMEM;
+   }
+
bit = __ffs64(dqm->sdma_bitmap);
dqm->sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -991,8 +990,10 @@ static int allocate_sdma_queue(struct device_queue_manager 
*dqm,
q->properties.sdma_queue_id = q->sdma_id /
get_num_sdma_engines(dqm);
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   if (dqm->xgmi_sdma_bitmap == 0)
+   if (dqm->xgmi_sdma_bitmap == 0) {
+   pr_err("No more XGMI SDMA queue to allocate\n");
return -ENOMEM;
+   }
bit = __ffs64(dqm->xgmi_sdma_bitmap);
dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
q->sdma_id = bit;
@@ -1081,8 +1082,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->processes_count = 0;
dqm->active_cp_queue_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+
dqm->active_runlist = false;

[PATCH 2/6] drm/amdkfd: Avoid ambiguity by indicating it's cp queue

2020-02-24 Thread Yong Zhao

The queues represented in queue_bitmap are only CP queues.

Change-Id: I7e6a75de39718d7c4da608166b85b9377d06d1b3
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  4 ++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 12 ++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c|  2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |  2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8609287620ea..ebe4b8f88e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -126,7 +126,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
/* this is going to have a few of the MSBs set that we need to
 * clear
 */
-   bitmap_complement(gpu_resources.queue_bitmap,
+   bitmap_complement(gpu_resources.cp_queue_bitmap,
  adev->gfx.mec.queue_bitmap,
  KGD_MAX_QUEUES);
 
@@ -137,7 +137,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
-   clear_bit(i, gpu_resources.queue_bitmap);
+   clear_bit(i, gpu_resources.cp_queue_bitmap);
 
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7ef9b89f5c70..973581c2b401 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -78,14 +78,14 @@ static bool is_pipe_enabled(struct device_queue_manager 
*dqm, int mec, int pipe)
/* queue is available for KFD usage if bit is 1 */
for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
if (test_bit(pipe_offset + i,
- dqm->dev->shared_resources.queue_bitmap))
+ dqm->dev->shared_resources.cp_queue_bitmap))
return true;
return false;
 }
 
-unsigned int get_queues_num(struct device_queue_manager *dqm)
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
 {
-   return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+   return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
KGD_MAX_QUEUES);
 }
 
@@ -908,7 +908,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
if (test_bit(pipe_offset + queue,
-dqm->dev->shared_resources.queue_bitmap))
+
dqm->dev->shared_resources.cp_queue_bitmap))
dqm->allocated_queues[pipe] |= 1 << queue;
}
 
@@ -1029,7 +1029,7 @@ static int set_sched_resources(struct 
device_queue_manager *dqm)
mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
/ dqm->dev->shared_resources.num_pipe_per_mec;
 
-   if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+   if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
continue;
 
/* only acquire queues from the first MEC */
@@ -1979,7 +1979,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
 
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
if (!test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.queue_bitmap))
+ 
dqm->dev->shared_resources.cp_queue_bitmap))
continue;
 
r = dqm->dev->kfd2kgd->hqd_dump(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index ee3400e92c30..3f0fb0d28c01 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -219,7 +219,7 @@ void device_queue_manager_init_v10_navi10(
struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manag

[PATCH 3/6] drm/amdkfd: Count active CP queues directly

2020-02-24 Thread Yong Zhao

The previous code of calculating active CP queues is problematic if
some SDMA queues are inactive. Fix that by counting CP queues directly.

Change-Id: I5ffaa75a95cbebc984558199ba2f3db6909c52a9
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 47 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  1 +
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  3 +-
 3 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 973581c2b401..a3c44d88314b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -132,6 +132,22 @@ void program_sh_mem_settings(struct device_queue_manager 
*dqm,
qpd->sh_mem_bases);
 }
 
+void increment_queue_count(struct device_queue_manager *dqm,
+   enum kfd_queue_type type)
+{
+   dqm->active_queue_count++;
+   if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+   dqm->active_cp_queue_count++;
+}
+
+void decrement_queue_count(struct device_queue_manager *dqm,
+   enum kfd_queue_type type)
+{
+   dqm->active_queue_count--;
+   if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+   dqm->active_cp_queue_count--;
+}
+
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
struct kfd_dev *dev = qpd->dqm->dev;
@@ -359,7 +375,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active)
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
@@ -494,7 +510,7 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
}
qpd->queue_count--;
if (q->properties.is_active)
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
return retval;
 }
@@ -567,9 +583,9 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
 * uploaded.
 */
if (q->properties.is_active && !prev_active)
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
else if (!q->properties.is_active && prev_active)
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
@@ -618,7 +634,7 @@ static int evict_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -662,7 +678,7 @@ static int evict_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = false;
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
@@ -731,7 +747,7 @@ static int restore_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
 
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -786,7 +802,7 @@ static int restore_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = true;
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -900,6 +916,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = d

[PATCH 4/6] drm/amdkfd: Fix a memory leak in queue creation error handling

2020-02-24 Thread Yong Zhao

When the queue creation failed, some resources were not freed. Fix it.

Change-Id: Ia24b6ad31528dceddfd4d1c58bb1d22c35d3eabf
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index b62ee2e3344a..c604a2ede3f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -329,6 +329,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return retval;
 
 err_create_queue:
+   uninit_queue(q);
+   if (kq)
+   kernel_queue_uninit(kq, false);
kfree(pqn);
 err_allocate_pqn:
/* check if queues list is empty unregister process from device */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/6] drm/amdkfd: Only count active sdma queues

2020-02-05 Thread Yong Zhao


Please disregard the patch 5 and 6, as I have a new version for them.

Yong

On 2020-02-05 6:39 p.m., Yong Zhao wrote:

One minor fix added.

Yong

On 2020-02-05 6:28 p.m., Yong Zhao wrote:

The sdma_queue_count was only used for inferring whether we should
unmap SDMA queues under HWS mode. In contrast, We mapped active queues
rather than all in map_queues_cpsch(). In order to match the map and 
unmap

for SDMA queues, we should just count active SDMA queues. Meanwhile,
rename sdma_queue_count to active_sdma_queue_count to reflect the new
usage.

Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 ---
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  5 +-
  .../amd/amdkfd/kfd_process_queue_manager.c    | 16 +++---
  3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 064108cf493b..cf77b866054a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -138,6 +138,10 @@ void increment_queue_count(struct 
device_queue_manager *dqm,

  dqm->active_queue_count++;
  if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
  dqm->active_cp_queue_count++;
+    else if (type == KFD_QUEUE_TYPE_SDMA)
+    dqm->active_sdma_queue_count++;
+    else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+    dqm->active_xgmi_sdma_queue_count++;
  }
    void decrement_queue_count(struct device_queue_manager *dqm,
@@ -146,6 +150,10 @@ void decrement_queue_count(struct 
device_queue_manager *dqm,

  dqm->active_queue_count--;
  if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
  dqm->active_cp_queue_count--;
+    else if (type == KFD_QUEUE_TYPE_SDMA)
+    dqm->active_sdma_queue_count--;
+    else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+    dqm->active_xgmi_sdma_queue_count--;
  }
    static int allocate_doorbell(struct qcm_process_device *qpd, 
struct queue *q)
@@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,

  if (q->properties.is_active)
  increment_queue_count(dqm, q->properties.type);
  -    if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-    dqm->sdma_queue_count++;
-    else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-    dqm->xgmi_sdma_queue_count++;
-
  /*
   * Unconditionally increment this counter, regardless of the 
queue's

   * type or whether the queue is active.
@@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,

  mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
  q->properties.type)];
  -    if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+    if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
  deallocate_hqd(dqm, q);
-    } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-    dqm->sdma_queue_count--;
+    else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
  deallocate_sdma_queue(dqm, q);
-    } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-    dqm->xgmi_sdma_queue_count--;
+    else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
  deallocate_sdma_queue(dqm, q);
-    } else {
+    else {
  pr_debug("q->properties.type %d is invalid\n",
  q->properties.type);
  return -EINVAL;
@@ -916,8 +917,8 @@ static int initialize_nocpsch(struct 
device_queue_manager *dqm)

  mutex_init(>lock_hidden);
  INIT_LIST_HEAD(>queues);
  dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
-    dqm->sdma_queue_count = 0;
-    dqm->xgmi_sdma_queue_count = 0;
+    dqm->active_sdma_queue_count = 0;
+    dqm->active_xgmi_sdma_queue_count = 0;
    for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
  int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct 
device_queue_manager *dqm)

  mutex_init(>lock_hidden);
  INIT_LIST_HEAD(>queues);
  dqm->active_queue_count = dqm->processes_count = 0;
-    dqm->sdma_queue_count = 0;
-    dqm->xgmi_sdma_queue_count = 0;
+    dqm->active_sdma_queue_count = 0;
+    dqm->active_xgmi_sdma_queue_count = 0;
  dqm->active_runlist = false;
  dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
  dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - 
get_num_xgmi_sdma_queues(dqm));
@@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,

  list_add(>list, >queues_list);
  qpd->queue_count++;
  -    if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-    dqm->sdma_queue_count++;
-

[PATCH 1/3] drm/amdkfd: Delete excessive printings

2020-02-05 Thread Yong Zhao

Those printings are duplicated or useless.

Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index a3c44d88314b..958275db3f55 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -297,8 +297,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
struct mqd_manager *mqd_mgr;
int retval;
 
-   print_queue(q);
-
dqm_lock(dqm);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index c604a2ede3f5..3bfa5c8d9654 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -257,7 +257,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
 
@@ -278,7 +277,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -299,7 +297,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
 
if (retval != 0) {
-   pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
+   pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
pqm->process->pasid, type, retval);
goto err_create_queue;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdkfd: Fix bugs in SDMA queues mapping in HWS mode

2020-02-05 Thread Yong Zhao

The sdma_queue_count was only used for inferring whether we should
unmap SDMA queues under HWS mode. In contrast, We only mapped active
queues rather than all in map_queues_cpsch(). In order to match the
map and unmap for SDMA queues, we should just count active SDMA
queues.

Moreover, previously in execute_queues_cpsch(), we determined whether
to unmap SDMA queues based on active_sdma_queue_count. However, its
value only reflectd the "to be mapped" SDMA queue count, rather than
the "mapped" count, which actually should be used. For example, if
there is a SDMA queue mapped and the application is destroying it,
when the driver reaches unmap_queues_cpsch(), active_sdma_queue_count
is already 0, so unmap_sdma_queues() won't be triggered, which is a bug.
Fix the issue by recording whether we should call unmap_sdma_queues()
in next execute_queues_cpsch() before mapping all queues.

An optimization is also made. Previously whenever unmapping SDMA queues,
the code would send one unmapping packet for each SDMA engine to CP
firmware regardless whether there are SDMA queues mapped on that engine.
By introducing used_sdma_engines_bitmap, which is calculated during
mapping, we can just send only necessary engines during unmapping.

Change-Id: I84fd2f7e63d6b7f664580b425a78d3e995ce9abc
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 131 +-
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |   4 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  16 +--
 3 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 958275db3f55..3ca660acaa1d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -109,6 +109,11 @@ static unsigned int get_num_xgmi_sdma_engines(struct 
device_queue_manager *dqm)
return dqm->dev->device_info->num_xgmi_sdma_engines;
 }
 
+static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
+{
+   return get_num_sdma_engines(dqm) + get_num_xgmi_sdma_engines(dqm);
+}
+
 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
 {
return dqm->dev->device_info->num_sdma_engines
@@ -133,19 +138,27 @@ void program_sh_mem_settings(struct device_queue_manager 
*dqm,
 }
 
 void increment_queue_count(struct device_queue_manager *dqm,
-   enum kfd_queue_type type)
+   struct queue *q)
 {
+   enum kfd_queue_type type = q->properties.type;
+
dqm->active_queue_count++;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+   else
+   dqm->used_queues_on_sdma[q->properties.sdma_engine_id]++;
 }
 
 void decrement_queue_count(struct device_queue_manager *dqm,
-   enum kfd_queue_type type)
+   struct queue *q)
 {
+   enum kfd_queue_type type = q->properties.type;
+
dqm->active_queue_count--;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+   else
+   dqm->used_queues_on_sdma[q->properties.sdma_engine_id]--;
 }
 
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
@@ -373,12 +386,7 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active)
-   increment_queue_count(dqm, q->properties.type);
-
-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
+   increment_queue_count(dqm, q);
 
/*
 * Unconditionally increment this counter, regardless of the queue's
@@ -460,15 +468,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
-   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_de

[PATCH 2/3] drm/amdgpu: Use MAX_SDMA_ENGINE_NUM instead of a number

2020-02-05 Thread Yong Zhao

MAX_SDMA_ENGINE_NUM will be used in more than one place.

Change-Id: I99c84086ee62612b373c547a9d29bc4a69e7c72e
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h| 2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
index 3fa18003d4d6..9d41d983a40f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@@ -52,7 +52,7 @@ struct amdgpu_doorbell_index {
uint32_t userqueue_end;
uint32_t gfx_ring0;
uint32_t gfx_ring1;
-   uint32_t sdma_engine[8];
+   uint32_t sdma_engine[MAX_SDMA_ENGINE_NUM];
uint32_t ih;
union {
struct {
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 55750890b73f..3709d3603fb0 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -35,6 +35,7 @@
 struct pci_dev;
 
 #define KGD_MAX_QUEUES 128
+#define MAX_SDMA_ENGINE_NUM 8
 
 struct kfd_dev;
 struct kgd_dev;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 5/6] drm/amdkfd: Only count active sdma queues

2020-02-05 Thread Yong Zhao


One minor fix added.

Yong

On 2020-02-05 6:28 p.m., Yong Zhao wrote:

The sdma_queue_count was only used for inferring whether we should
unmap SDMA queues under HWS mode. In contrast, We mapped active queues
rather than all in map_queues_cpsch(). In order to match the map and unmap
for SDMA queues, we should just count active SDMA queues. Meanwhile,
rename sdma_queue_count to active_sdma_queue_count to reflect the new
usage.

Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 ---
  .../drm/amd/amdkfd/kfd_device_queue_manager.h |  5 +-
  .../amd/amdkfd/kfd_process_queue_manager.c| 16 +++---
  3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 064108cf493b..cf77b866054a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -138,6 +138,10 @@ void increment_queue_count(struct device_queue_manager 
*dqm,
dqm->active_queue_count++;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+   else if (type == KFD_QUEUE_TYPE_SDMA)
+   dqm->active_sdma_queue_count++;
+   else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+   dqm->active_xgmi_sdma_queue_count++;
  }
  
  void decrement_queue_count(struct device_queue_manager *dqm,

@@ -146,6 +150,10 @@ void decrement_queue_count(struct device_queue_manager 
*dqm,
dqm->active_queue_count--;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+   else if (type == KFD_QUEUE_TYPE_SDMA)
+   dqm->active_sdma_queue_count--;
+   else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+   dqm->active_xgmi_sdma_queue_count--;
  }
  
  static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)

@@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
if (q->properties.is_active)
increment_queue_count(dqm, q->properties.type);
  
-	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)

-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
@@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
  
-	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {

+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -916,8 +917,8 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+   dqm->active_sdma_queue_count = 0;
+   dqm->active_xgmi_sdma_queue_count = 0;
  
  	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {

int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->processes_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+   dqm->active_sdma_queue_count = 0;
+   dqm->active_xgmi_sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
list_add(>list, >queues_list);
qpd->que

[PATCH 6/6] drm/amdkfd: Delete excessive printings

2020-02-05 Thread Yong Zhao

Those printings are duplicated or useless.

Change-Id: I88fbe8f5748bbd0a20bcf1f6ca67b9dde99733fe
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 2 --
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 4 +---
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index cf77b866054a..3bfdc9b251b3 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -305,8 +305,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
struct mqd_manager *mqd_mgr;
int retval;
 
-   print_queue(q);
-
dqm_lock(dqm);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 941b5876f19f..cf11f4dce98a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -253,7 +253,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
 
@@ -274,7 +273,6 @@ int pqm_create_queue(struct process_queue_manager *pqm,
pqn->q = q;
pqn->kq = NULL;
retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
-   pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
case KFD_QUEUE_TYPE_DIQ:
@@ -295,7 +293,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
}
 
if (retval != 0) {
-   pr_err("Pasid 0x%x DQM create queue %d failed. ret %d\n",
+   pr_err("Pasid 0x%x DQM create queue type %d failed. ret %d\n",
pqm->process->pasid, type, retval);
goto err_create_queue;
}
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 4/6] drm/amdkfd: Fix a memory leak in queue creation error handling

2020-02-05 Thread Yong Zhao

When the queue creation is failed, some resources were not freed. Fix it.

Change-Id: Ia24b6ad31528dceddfd4d1c58bb1d22c35d3eabf
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index b62ee2e3344a..c604a2ede3f5 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -329,6 +329,9 @@ int pqm_create_queue(struct process_queue_manager *pqm,
return retval;
 
 err_create_queue:
+   uninit_queue(q);
+   if (kq)
+   kernel_queue_uninit(kq, false);
kfree(pqn);
 err_allocate_pqn:
/* check if queues list is empty unregister process from device */
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/6] drm/amdkfd: Avoid ambiguity by indicating it's cp queue

2020-02-05 Thread Yong Zhao

The queues represented in queue_bitmap are only CP queues.

Change-Id: I7e6a75de39718d7c4da608166b85b9377d06d1b3
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c   |  4 ++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c| 12 ++--
 .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h|  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c   |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_topology.c|  2 +-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h  |  2 +-
 7 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 8609287620ea..ebe4b8f88e79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -126,7 +126,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
/* this is going to have a few of the MSBs set that we need to
 * clear
 */
-   bitmap_complement(gpu_resources.queue_bitmap,
+   bitmap_complement(gpu_resources.cp_queue_bitmap,
  adev->gfx.mec.queue_bitmap,
  KGD_MAX_QUEUES);
 
@@ -137,7 +137,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
* adev->gfx.mec.num_pipe_per_mec
* adev->gfx.mec.num_queue_per_pipe;
for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
-   clear_bit(i, gpu_resources.queue_bitmap);
+   clear_bit(i, gpu_resources.cp_queue_bitmap);
 
amdgpu_doorbell_get_kfd_info(adev,
_resources.doorbell_physical_address,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 7ef9b89f5c70..973581c2b401 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -78,14 +78,14 @@ static bool is_pipe_enabled(struct device_queue_manager 
*dqm, int mec, int pipe)
/* queue is available for KFD usage if bit is 1 */
for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
if (test_bit(pipe_offset + i,
- dqm->dev->shared_resources.queue_bitmap))
+ dqm->dev->shared_resources.cp_queue_bitmap))
return true;
return false;
 }
 
-unsigned int get_queues_num(struct device_queue_manager *dqm)
+unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
 {
-   return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+   return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
KGD_MAX_QUEUES);
 }
 
@@ -908,7 +908,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
if (test_bit(pipe_offset + queue,
-dqm->dev->shared_resources.queue_bitmap))
+
dqm->dev->shared_resources.cp_queue_bitmap))
dqm->allocated_queues[pipe] |= 1 << queue;
}
 
@@ -1029,7 +1029,7 @@ static int set_sched_resources(struct 
device_queue_manager *dqm)
mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
/ dqm->dev->shared_resources.num_pipe_per_mec;
 
-   if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+   if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
continue;
 
/* only acquire queues from the first MEC */
@@ -1979,7 +1979,7 @@ int dqm_debugfs_hqds(struct seq_file *m, void *data)
 
for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
if (!test_bit(pipe_offset + queue,
- dqm->dev->shared_resources.queue_bitmap))
+ 
dqm->dev->shared_resources.cp_queue_bitmap))
continue;
 
r = dqm->dev->kfd2kgd->hqd_dump(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index ee3400e92c30..3f0fb0d28c01 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -219,7 +219,7 @@ void device_queue_manager_init_v10_navi10(
struct device_queue_manager_asic_ops *asic_ops);
 void program_sh_mem_settings(struct device_queue_manag

[PATCH 5/6] drm/amdkfd: Only count active sdma queues

2020-02-05 Thread Yong Zhao

The sdma_queue_count was only used for inferring whether we should
unmap SDMA queues under HWS mode. In contrast, We mapped active queues
rather than all in map_queues_cpsch(). In order to match the map and unmap
for SDMA queues, we should just count active SDMA queues. Meanwhile,
rename sdma_queue_count to active_sdma_queue_count to reflect the new
usage.

Change-Id: I9f1c3305dad044a3c779ec0730fcf7554050de8b
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 54 ---
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  5 +-
 .../amd/amdkfd/kfd_process_queue_manager.c| 16 +++---
 3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 064108cf493b..cf77b866054a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -138,6 +138,10 @@ void increment_queue_count(struct device_queue_manager 
*dqm,
dqm->active_queue_count++;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count++;
+   else if (type == KFD_QUEUE_TYPE_SDMA)
+   dqm->active_sdma_queue_count++;
+   else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+   dqm->active_xgmi_sdma_queue_count++;
 }
 
 void decrement_queue_count(struct device_queue_manager *dqm,
@@ -146,6 +150,10 @@ void decrement_queue_count(struct device_queue_manager 
*dqm,
dqm->active_queue_count--;
if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
dqm->active_cp_queue_count--;
+   else if (type == KFD_QUEUE_TYPE_SDMA)
+   dqm->active_sdma_queue_count--;
+   else if (type == KFD_QUEUE_TYPE_SDMA_XGMI)
+   dqm->active_xgmi_sdma_queue_count--;
 }
 
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
@@ -377,11 +385,6 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
if (q->properties.is_active)
increment_queue_count(dqm, q->properties.type);
 
-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
-
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
@@ -462,15 +465,13 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
 
-   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
+   if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
deallocate_hqd(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
-   dqm->sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
deallocate_sdma_queue(dqm, q);
-   } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
-   dqm->xgmi_sdma_queue_count--;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
deallocate_sdma_queue(dqm, q);
-   } else {
+   else {
pr_debug("q->properties.type %d is invalid\n",
q->properties.type);
return -EINVAL;
@@ -916,8 +917,8 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+   dqm->active_sdma_queue_count = 0;
+   dqm->active_xgmi_sdma_queue_count = 0;
 
for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
int pipe_offset = pipe * get_queues_per_pipe(dqm);
@@ -1081,8 +1082,8 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
dqm->active_queue_count = dqm->processes_count = 0;
-   dqm->sdma_queue_count = 0;
-   dqm->xgmi_sdma_queue_count = 0;
+   dqm->active_sdma_queue_count = 0;
+   dqm->active_xgmi_sdma_queue_count = 0;
dqm->active_runlist = false;
dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
@@ -1254,11 +1255,6 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
list_add(>list, >queues_list);
qpd->queue_count++;
 
-   if (q->properties.type == KFD_QUEUE_TYPE_S

[PATCH 3/6] drm/amdkfd: Count active CP queues directly

2020-02-05 Thread Yong Zhao

The previous code of calculating active CP queues is problematic if
some SDMA queues are inactive. Fix that by counting CP queues directly.

Change-Id: I5ffaa75a95cbebc984558199ba2f3db6909c52a9
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 45 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  1 +
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  3 +-
 3 files changed, 33 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 973581c2b401..064108cf493b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -132,6 +132,22 @@ void program_sh_mem_settings(struct device_queue_manager 
*dqm,
qpd->sh_mem_bases);
 }
 
+void increment_queue_count(struct device_queue_manager *dqm,
+   enum kfd_queue_type type)
+{
+   dqm->active_queue_count++;
+   if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+   dqm->active_cp_queue_count++;
+}
+
+void decrement_queue_count(struct device_queue_manager *dqm,
+   enum kfd_queue_type type)
+{
+   dqm->active_queue_count--;
+   if (type == KFD_QUEUE_TYPE_COMPUTE || type == KFD_QUEUE_TYPE_DIQ)
+   dqm->active_cp_queue_count--;
+}
+
 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
 {
struct kfd_dev *dev = qpd->dqm->dev;
@@ -359,7 +375,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active)
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
@@ -494,7 +510,7 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
}
qpd->queue_count--;
if (q->properties.is_active)
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
return retval;
 }
@@ -567,9 +583,9 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
 * uploaded.
 */
if (q->properties.is_active && !prev_active)
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
else if (!q->properties.is_active && prev_active)
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
@@ -618,7 +634,7 @@ static int evict_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
 
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -662,7 +678,7 @@ static int evict_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = false;
-   dqm->active_queue_count--;
+   decrement_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
@@ -731,7 +747,7 @@ static int restore_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
 
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -786,7 +802,7 @@ static int restore_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = true;
-   dqm->active_queue_count++;
+   increment_queue_count(dqm, q->properties.type);
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -1158,7 +1174,7 @@ static int create_kernel_queue_cpsch(struct 
device_queue_manager *dqm,
dqm->total_queue_count);
 
list_add(>list, >

[PATCH 1/6] drm/amdkfd: Rename queue_count to active_queue_count

2020-02-05 Thread Yong Zhao

The name is easier to understand the code.

Change-Id: I9064dab1d022e02780023131f940fff578a06b72
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +--
 .../drm/amd/amdkfd/kfd_device_queue_manager.h |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  4 +-
 .../amd/amdkfd/kfd_process_queue_manager.c|  2 +-
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 80d22bf702e8..7ef9b89f5c70 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -359,7 +359,7 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
list_add(>list, >queues_list);
qpd->queue_count++;
if (q->properties.is_active)
-   dqm->queue_count++;
+   dqm->active_queue_count++;
 
if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
dqm->sdma_queue_count++;
@@ -494,7 +494,7 @@ static int destroy_queue_nocpsch_locked(struct 
device_queue_manager *dqm,
}
qpd->queue_count--;
if (q->properties.is_active)
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
return retval;
 }
@@ -563,13 +563,13 @@ static int update_queue(struct device_queue_manager *dqm, 
struct queue *q)
/*
 * check active state vs. the previous state and modify
 * counter accordingly. map_queues_cpsch uses the
-* dqm->queue_count to determine whether a new runlist must be
+* dqm->active_queue_count to determine whether a new runlist must be
 * uploaded.
 */
if (q->properties.is_active && !prev_active)
-   dqm->queue_count++;
+   dqm->active_queue_count++;
else if (!q->properties.is_active && prev_active)
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
retval = map_queues_cpsch(dqm);
@@ -618,7 +618,7 @@ static int evict_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = false;
-   dqm->queue_count--;
+   dqm->active_queue_count--;
 
if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
continue;
@@ -662,7 +662,7 @@ static int evict_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = false;
-   dqm->queue_count--;
+   dqm->active_queue_count--;
}
retval = execute_queues_cpsch(dqm,
qpd->is_debug ?
@@ -731,7 +731,7 @@ static int restore_process_queues_nocpsch(struct 
device_queue_manager *dqm,
mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
q->properties.type)];
q->properties.is_active = true;
-   dqm->queue_count++;
+   dqm->active_queue_count++;
 
if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
continue;
@@ -786,7 +786,7 @@ static int restore_process_queues_cpsch(struct 
device_queue_manager *dqm,
continue;
 
q->properties.is_active = true;
-   dqm->queue_count++;
+   dqm->active_queue_count++;
}
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
@@ -899,7 +899,7 @@ static int initialize_nocpsch(struct device_queue_manager 
*dqm)
 
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
-   dqm->queue_count = dqm->next_pipe_to_allocate = 0;
+   dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
dqm->xgmi_sdma_queue_count = 0;
 
@@ -924,7 +924,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 {
int i;
 
-   WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
+   WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
 
kfree(dqm->allocated_queues);
for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
@@ -1064,7 +1064,7 @@ static int initialize_cpsch(struct device_queue_manager 
*dqm)
 
mutex_init(>lock_hidden);
INIT_LIST_HEAD(>queues);
-   dqm->queue_count = dqm->processes_count = 0;
+   dqm->active_queue_count = dqm->processes_count = 0;

Re: [PATCH] drm/amdkfd: Fix a bug in SDMA RLC queue counting under HWS mode

2020-01-30 Thread Yong Zhao


True. It is a bug too. I am looking into it.

Yong

On 2020-01-30 5:51 p.m., Felix Kuehling wrote:

On 2020-01-30 17:29, Yong Zhao wrote:

The sdma_queue_count increment should be done before
execute_queues_cpsch(), which calls pm_calc_rlib_size() where
sdma_queue_count is used to calculate whether over_subscription is
triggered.

With the previous code, when a SDMA queue is created,
compute_queue_count in pm_calc_rlib_size() is one more than the
actual compute queue number, because the queue_count has been
incremented while sdma_queue_count has not. This patch fixes that.

Change-Id: I20353e657efd505353d0dd9f7eb2fab5085e7202
Signed-off-by: Yong Zhao 


Reviewed-by: Felix Kuehling 

But I took a look at pm_calc_rlib_size. I don't think subtracting 
dqm->sdma_queue_count from dqm->queue_count is not quite correct, 
because sdma_queue_count counts all SDMA queues, while queue_count 
only counts active queues. So an application that creates inactive 
SDMA queues will also create errors here. We probably need to count 
active compute and active SDMA queues separately in DQM to fix this 
properly.


Regards,
  Felix



---
  drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++
  1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c

index 2870553a2ce0..80d22bf702e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1237,16 +1237,18 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,

    list_add(>list, >queues_list);
  qpd->queue_count++;
+
+    if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+    dqm->sdma_queue_count++;
+    else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+    dqm->xgmi_sdma_queue_count++;
+
  if (q->properties.is_active) {
  dqm->queue_count++;
  retval = execute_queues_cpsch(dqm,
  KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
  }
  -    if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-    dqm->sdma_queue_count++;
-    else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-    dqm->xgmi_sdma_queue_count++;
  /*
   * Unconditionally increment this counter, regardless of the 
queue's

   * type or whether the queue is active.

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Fix a bug in SDMA RLC queue counting under HWS mode

2020-01-30 Thread Yong Zhao

The sdma_queue_count increment should be done before
execute_queues_cpsch(), which calls pm_calc_rlib_size() where
sdma_queue_count is used to calculate whether over_subscription is
triggered.

With the previous code, when a SDMA queue is created,
compute_queue_count in pm_calc_rlib_size() is one more than the
actual compute queue number, because the queue_count has been
incremented while sdma_queue_count has not. This patch fixes that.

Change-Id: I20353e657efd505353d0dd9f7eb2fab5085e7202
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 2870553a2ce0..80d22bf702e8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1237,16 +1237,18 @@ static int create_queue_cpsch(struct 
device_queue_manager *dqm, struct queue *q,
 
list_add(>list, >queues_list);
qpd->queue_count++;
+
+   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
+   dqm->sdma_queue_count++;
+   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
+   dqm->xgmi_sdma_queue_count++;
+
if (q->properties.is_active) {
dqm->queue_count++;
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
}
 
-   if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
-   dqm->sdma_queue_count++;
-   else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
-   dqm->xgmi_sdma_queue_count++;
/*
 * Unconditionally increment this counter, regardless of the queue's
 * type or whether the queue is active.
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/3] drm/amdgpu: Fix TLB invalidation request when using semaphore

2020-01-20 Thread Yong Zhao


Reviewed-by: Yong Zhao 

On 2020-01-17 8:37 p.m., Felix Kuehling wrote:

Use a more meaningful variable name for the invalidation request
that is distinct from the tmp variable that gets overwritten when
acquiring the invalidation semaphore.

Fixes: 00f607f38d82 ("drm/amdgpu: invalidate mmhub semaphore workaround in 
gmc9/gmc10")
Signed-off-by: Felix Kuehling 
---
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 5 +++--
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 8 
  2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 86f4ffe408e7..d914555e1212 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -262,7 +262,8 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
  {
bool use_semaphore = gmc_v10_0_use_invalidate_semaphore(adev, vmhub);
struct amdgpu_vmhub *hub = >vmhub[vmhub];
-   u32 tmp = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+   u32 inv_req = gmc_v10_0_get_invalidate_req(vmid, flush_type);
+   u32 tmp;
/* Use register 17 for GART */
const unsigned eng = 17;
unsigned int i;
@@ -289,7 +290,7 @@ static void gmc_v10_0_flush_vm_hub(struct amdgpu_device 
*adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM 
flush!\n");
}
  
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);

+   WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
  
  	/*

 * Issue a dummy read to wait for the ACK register to be cleared
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 54bdc1786ab1..6d95de1413c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -476,13 +476,13 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
  {
bool use_semaphore = gmc_v9_0_use_invalidate_semaphore(adev, vmhub);
const unsigned eng = 17;
-   u32 j, tmp;
+   u32 j, inv_req, tmp;
struct amdgpu_vmhub *hub;
  
  	BUG_ON(vmhub >= adev->num_vmhubs);
  
  	hub = >vmhub[vmhub];

-   tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type);
+   inv_req = gmc_v9_0_get_invalidate_req(vmid, flush_type);
  
  	/* This is necessary for a HW workaround under SRIOV as well

 * as GFXOFF under bare metal
@@ -493,7 +493,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
uint32_t req = hub->vm_inv_eng0_req + eng;
uint32_t ack = hub->vm_inv_eng0_ack + eng;
  
-		amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp,

+   amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, inv_req,
1 << vmid);
return;
}
@@ -521,7 +521,7 @@ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
DRM_ERROR("Timeout waiting for sem acquire in VM 
flush!\n");
}
  
-	WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);

+   WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, inv_req);
  
  	/*

 * Issue a dummy read to wait for the ACK register to be cleared

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Fix a compiling warning

2020-01-10 Thread Yong Zhao

The warning was introduced in

commit b4267f2e687ff5e0402ab915c9a4d47f9a4eb73e
Author: Yong Zhao 
Date:   Fri Dec 13 11:31:48 2019 -0500

drm/amdkfd: Improve function get_sdma_rlc_reg_offset()

Change-Id: I87da4f1ad8a190327a4a71f0ff78812cb942d6e0
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 2b26925623eb..f9011a07cb90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -71,7 +71,7 @@ static uint32_t get_sdma_rlc_reg_offset(struct amdgpu_device 
*adev,
unsigned int engine_id,
unsigned int queue_id)
 {
-   uint32_t sdma_engine_reg_base;
+   uint32_t sdma_engine_reg_base = 0;
uint32_t sdma_rlc_reg_offset;
 
switch (engine_id) {
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Add a message when SW scheduler is used

2020-01-10 Thread Yong Zhao

SW scheduler is previously called non HW scheduler, or non HWS. This
message is useful when triaging issues from dmesg.

Change-Id: I625518c88c043df5f60409d1ca520e7fc032251f
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d7eb6ac37f62..2870553a2ce0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -934,6 +934,7 @@ static void uninitialize(struct device_queue_manager *dqm)
 
 static int start_nocpsch(struct device_queue_manager *dqm)
 {
+   pr_info("SW scheduler is used");
init_interrupts(dqm);

if (dqm->dev->device_info->asic_family == CHIP_HAWAII)
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()

2020-01-07 Thread Yong Zhao

The SOC15_REG_OFFSET() macro needs to dereference adev->reg_offset[IP]
pointer, which is NULL when there are fewer than 8 sdma engines. Avoid
that by not initializing the array regardless.

Change-Id: Iabae9bff7546b344720905d5d4a5cfc066a79d25
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 65 ---
 1 file changed, 43 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 3c119407dc34..2b26925623eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -71,32 +71,53 @@ static uint32_t get_sdma_rlc_reg_offset(struct 
amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
 {
-   uint32_t sdma_engine_reg_base[8] = {
-   SOC15_REG_OFFSET(SDMA0, 0,
-mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA1, 0,
-mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA2, 0,
-mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA3, 0,
-mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA4, 0,
-mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA5, 0,
-mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA6, 0,
-mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA7, 0,
-mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
-   };
-
-   uint32_t retval = sdma_engine_reg_base[engine_id]
+   uint32_t sdma_engine_reg_base;
+   uint32_t sdma_rlc_reg_offset;
+
+   switch (engine_id) {
+   case 0:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+   mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+   break;
+   case 1:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+   mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+   break;
+   case 2:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+   mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+   break;
+   case 3:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+   mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+   break;
+   case 4:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+   mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+   break;
+   case 5:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+   mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+   break;
+   case 6:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+   mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+   break;
+   case 7:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+   mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+   break;
+   default:
+   break;
+   }
+
+   sdma_rlc_reg_offset = sdma_engine_reg_base
+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
-   queue_id, retval);
+   queue_id, sdma_rlc_reg_offset);
 
-   return retval;
+   return sdma_rlc_reg_offset;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock

2020-01-02 Thread Yong Zhao


One comment inline.

On 2020-01-02 4:11 p.m., Alex Sierra wrote:

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 40 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  6 +++-
  2 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..d6aba4f9df74 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -82,6 +82,32 @@ struct amdgpu_prt_cb {
struct dma_fence_cb cb;
  };
  
+/**

+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+   mutex_lock(>eviction_lock);
+   vm->saved_flags = memalloc_nofs_save();
[yz] I feel memalloc_nofs_save() should be called before mutex_lock(). 
Not too sure though.

+}
+
+static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+   if (mutex_trylock(>eviction_lock)) {
+   vm->saved_flags = memalloc_nofs_save();
+   return 1;
+   }
+   return 0;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+   memalloc_nofs_restore(vm->saved_flags);
+   mutex_unlock(>eviction_lock);
+}
+
  /**
   * amdgpu_vm_level_shift - return the addr shift for each level
   *
@@ -678,9 +704,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
  
-	mutex_lock(>eviction_lock);

+   amdgpu_vm_eviction_lock(vm);
vm->evicting = false;
-   mutex_unlock(>eviction_lock);
+   amdgpu_vm_eviction_unlock(vm);
  
  	return 0;

  }
@@ -1559,7 +1585,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
  
-	mutex_lock(>eviction_lock);

+   amdgpu_vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
goto error_unlock;
@@ -1576,7 +1602,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
r = vm->update_funcs->commit(, fence);
  
  error_unlock:

-   mutex_unlock(>eviction_lock);
+   amdgpu_vm_eviction_unlock(vm);
return r;
  }
  
@@ -2537,18 +2563,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)

return false;
  
  	/* Try to block ongoing updates */

-   if (!mutex_trylock(_base->vm->eviction_lock))
+   if (!amdgpu_vm_eviction_trylock(bo_base->vm))
return false;
  
  	/* Don't evict VM page tables while they are updated */

if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
!dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-   mutex_unlock(_base->vm->eviction_lock);
+   amdgpu_vm_eviction_unlock(bo_base->vm);
return false;
}
  
  	bo_base->vm->evicting = true;

-   mutex_unlock(_base->vm->eviction_lock);
+   amdgpu_vm_eviction_unlock(bo_base->vm);
return true;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 100547f094ff..c21a36bebc0c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "amdgpu_sync.h"

  #include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
  
-	/* Lock to prevent eviction while we are updating page tables */

+   /* Lock to prevent eviction while we are updating page tables
+* use vm_eviction_lock/unlock(vm)
+*/
struct mutexeviction_lock;
boolevicting;
+   unsigned intsaved_flags;
  
  	/* BOs who needs a validation */

struct list_headevicted;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid

2020-01-02 Thread Yong Zhao


See one inline comment. Other than that:

Acked-by: Yong Zhao 

On 2020-01-02 4:11 p.m., Alex Sierra wrote:

This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.

Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 87 
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 +
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 89 +
  5 files changed, 249 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b499a3de8bb6..b6413a56f546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
+   /* flush the vm tlb via pasid */
+   int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+   uint32_t flush_type, bool all_hub);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
   uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
  };
  
  #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))

+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+   ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+   ((adev), (pasid), (type), (allhub)))
  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
(r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
(r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
  #define amdgpu_gmc_map_mtype(adev, flags) 
(adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f5725336a5f2..11a2252e60f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
  #include "hdp/hdp_5_0_0_sh_mask.h"
  #include "gc/gc_10_1_0_sh_mask.h"
  #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
  #include "dcn/dcn_2_0_0_offset.h"
  #include "dcn/dcn_2_0_0_sh_mask.h"
  #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
  #include "navi10_enum.h"
  
  #include "soc15.h"

+#include "soc15d.h"
  #include "soc15_common.h"
  
  #include "nbio_v2_3.h"

@@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct 
amdgpu_device *adev,
(!amdgpu_sriov_vf(adev)));
  }
  
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(

+   struct amdgpu_device *adev,
+   uint8_t vmid, uint16_t *p_pasid)
+{
+   uint32_t value;
+
+   value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
++ vmid);
+   *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+   return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
+   uint16_t pasid, uint32_t flush_type,
+   bool all_hub)
+{
+   signed long r;
+   uint32_t seq;
+   struct amdgpu_ring *ring = >gfx.kiq.ring;
+
+   spin_lock(>gfx.kiq.ring_lock);
+   amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+   amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+   amdgpu_ring_write(ring,
+   PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+   PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+   PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+   PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+   amdgpu_fence_emit_polling(ring, );
+   amdgpu_ring_commit(ring);
+   spin_unlock(>gfx.kiq.ring_lock);
+
+   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (r < 1) {
+   DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+   return -ETIME;
+   }
+
+   return 0;
+}
+
  /*
   * GART
   * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +425,47 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t v

Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd

2019-12-20 Thread Yong Zhao



On 2019-12-20 6:50 p.m., Yong Zhao wrote:

Inline.

On 2019-12-20 4:35 p.m., Felix Kuehling wrote:

On 2019-12-20 1:24, Alex Sierra wrote:

[Why]
TLB flush method has been deprecated using kfd2kgd interface.
This implementation is now on the amdgpu_amdkfd API.

[How]
TLB flush functions now implemented in amdgpu_amdkfd.

Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
Signed-off-by: Alex Sierra 


Looks good to me. See my comment about the TODO inline.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 
++

  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 --
  3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index d3da9dde4ee1..b7f6e70c5762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct 
amdgpu_device *adev, u32 vmid)

  return false;
  }
  +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, 
uint16_t vmid)

+{
+    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+    /* TODO: condition missing for FAMILY above NV */


I'm not sure what's missing here. NV and above don't need any special 
treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB 
needs to be flushed.


Regards,
  Felix



+    if (adev->family == AMDGPU_FAMILY_AI) {
+    int i;
+
+    for (i = 0; i < adev->num_vmhubs; i++)
+    amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+    } else {
+    amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+    }


This if else can be unified by

for (i = 0; i < adev->num_vmhubs; i++)

    amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);


+
+    return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
pasid)

+{
+    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+    uint32_t flush_type = 0;
+    bool all_hub = false;
+
+    if (adev->gmc.xgmi.num_physical_nodes &&
+    adev->asic_type == CHIP_VEGA20)
+    flush_type = 2;
+
+    if (adev->family == AMDGPU_FAMILY_AI)
+    all_hub = true;
+
+    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, 
all_hub);
The all_hub parameter can be inferred from num_vmhubs in 
flush_gpu_tlb_pasid(), so it can be optimized out here.

+}
+
  bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
  {
  struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
enum kgd_engine_type engine,

  uint32_t *ib_cmd, uint32_t ib_len);
  void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
  bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
pasid);
    bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 
vmid);
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
  #include 
  #include 
  #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
    struct mm_struct;
  @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev 
*dev, struct kfd_process *process,

  void kfd_flush_tlb(struct kfd_process_device *pdd)
  {
  struct kfd_dev *dev = pdd->dev;
-    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
    if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
  /* Nothing to flush until a VMID is assigned, which
   * only happens when the first queue is created.
   */
  if (pdd->qpd.vmid)
-    f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+    amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+    pdd->qpd.vmid);
  } else {
-    f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+    amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+    pdd->process->pasid);
  }
  }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cyong.zhao%40amd.com%7C3a33649d2a804998d00408d785a7776f%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124827007059728sdata=fNTunmAJObxfgbJBlNWWXu

Re: [PATCH 2/5] drm/amdgpu: export function to flush TLB via pasid

2019-12-20 Thread Yong Zhao




On 2019-12-20 1:24 a.m., Alex Sierra wrote:

This can be used directly from amdgpu and amdkfd to invalidate
TLB through pasid.
It supports gmc v7, v8, v9 and v10.

Change-Id: I6563a8eba2e42d1a67fa2547156c20da41d1e490
Signed-off-by: Alex Sierra 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h |  6 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c  | 81 
  drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c   | 33 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c   | 34 ++
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c   | 84 +
  5 files changed, 238 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index b499a3de8bb6..b6413a56f546 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -92,6 +92,9 @@ struct amdgpu_gmc_funcs {
/* flush the vm tlb via mmio */
void (*flush_gpu_tlb)(struct amdgpu_device *adev, uint32_t vmid,
uint32_t vmhub, uint32_t flush_type);
+   /* flush the vm tlb via pasid */
+   int (*flush_gpu_tlb_pasid)(struct amdgpu_device *adev, uint16_t pasid,
+   uint32_t flush_type, bool all_hub);
/* flush the vm tlb via ring */
uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid,
   uint64_t pd_addr);
@@ -216,6 +219,9 @@ struct amdgpu_gmc {
  };
  
  #define amdgpu_gmc_flush_gpu_tlb(adev, vmid, vmhub, type) ((adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (vmhub), (type)))

+#define amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, type, allhub) \
+   ((adev)->gmc.gmc_funcs->flush_gpu_tlb_pasid \
+   ((adev), (pasid), (type), (allhub)))
  #define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) 
(r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr))
  #define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) 
(r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid))
  #define amdgpu_gmc_map_mtype(adev, flags) 
(adev)->gmc.gmc_funcs->map_mtype((adev),(flags))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f5725336a5f2..b1a5408a8d7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -30,6 +30,8 @@
  #include "hdp/hdp_5_0_0_sh_mask.h"
  #include "gc/gc_10_1_0_sh_mask.h"
  #include "mmhub/mmhub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "athub/athub_2_0_0_offset.h"
  #include "dcn/dcn_2_0_0_offset.h"
  #include "dcn/dcn_2_0_0_sh_mask.h"
  #include "oss/osssys_5_0_0_offset.h"
@@ -37,6 +39,7 @@
  #include "navi10_enum.h"
  
  #include "soc15.h"

+#include "soc15d.h"
  #include "soc15_common.h"
  
  #include "nbio_v2_3.h"

@@ -234,6 +237,48 @@ static bool gmc_v10_0_use_invalidate_semaphore(struct 
amdgpu_device *adev,
(!amdgpu_sriov_vf(adev)));
  }
  
+static bool gmc_v10_0_get_atc_vmid_pasid_mapping_info(

+   struct amdgpu_device *adev,
+   uint8_t vmid, uint16_t *p_pasid)
+{
+   uint32_t value;
+
+   value = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
++ vmid);
+   *p_pasid = value & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+
+   return !!(value & ATC_VMID0_PASID_MAPPING__VALID_MASK);
+}
+
+static int gmc_v10_0_invalidate_tlbs_with_kiq(struct amdgpu_device *adev,
+   uint16_t pasid, uint32_t flush_type,
+   bool all_hub)
+{
+   signed long r;
+   uint32_t seq;
+   struct amdgpu_ring *ring = >gfx.kiq.ring;
+
+   spin_lock(>gfx.kiq.ring_lock);
+   amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+   amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+   amdgpu_ring_write(ring,
+   PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+   PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
+   PACKET3_INVALIDATE_TLBS_PASID(pasid) |
+   PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
+   amdgpu_fence_emit_polling(ring, );
+   amdgpu_ring_commit(ring);
+   spin_unlock(>gfx.kiq.ring_lock);
+
+   r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+   if (r < 1) {
+   DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+   return -ETIME;
+   }
+
+   return 0;
+}
+
  /*
   * GART
   * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -380,6 +425,41 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device 
*adev, uint32_t vmid,
DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
  }
  
+/**

+ * gmc_v10_0_flush_gpu_tlb_pasid - tlb flush via pasid
+ *
+ * @adev: amdgpu_device pointer
+ * @pasid: pasid to be flush
+ *
+ * Flush the TLB for the requested pasid.
+ */

Re: [PATCH 3/5] drm/amdgpu: GPU TLB flush API moved to amdgpu_amdkfd

2019-12-20 Thread Yong Zhao


Inline.

On 2019-12-20 4:35 p.m., Felix Kuehling wrote:

On 2019-12-20 1:24, Alex Sierra wrote:

[Why]
TLB flush method has been deprecated using kfd2kgd interface.
This implementation is now on the amdgpu_amdkfd API.

[How]
TLB flush functions now implemented in amdgpu_amdkfd.

Change-Id: Ic51cccdfe6e71288d78da772b6e1b6ced72f8ef7
Signed-off-by: Alex Sierra 


Looks good to me. See my comment about the TODO inline.



---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
  drivers/gpu/drm/amd/amdkfd/kfd_process.c   |  8 --
  3 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index d3da9dde4ee1..b7f6e70c5762 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -634,6 +634,38 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct 
amdgpu_device *adev, u32 vmid)

  return false;
  }
  +int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
vmid)

+{
+    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+    /* TODO: condition missing for FAMILY above NV */


I'm not sure what's missing here. NV and above don't need any special 
treatment. Since SDMA is connected to GFXHUB on NV, only the GFXHUB 
needs to be flushed.


Regards,
  Felix



+    if (adev->family == AMDGPU_FAMILY_AI) {
+    int i;
+
+    for (i = 0; i < adev->num_vmhubs; i++)
+    amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);
+    } else {
+    amdgpu_gmc_flush_gpu_tlb(adev, vmid, AMDGPU_GFXHUB_0, 0);
+    }


This if else can be unified by

for (i = 0; i < adev->num_vmhubs; i++)

    amdgpu_gmc_flush_gpu_tlb(adev, vmid, i, 0);


+
+    return 0;
+}
+
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
pasid)

+{
+    struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+    uint32_t flush_type = 0;
+    bool all_hub = false;
+
+    if (adev->gmc.xgmi.num_physical_nodes &&
+    adev->asic_type == CHIP_VEGA20)
+    flush_type = 2;
+
+    if (adev->family == AMDGPU_FAMILY_AI)
+    all_hub = true;
+
+    return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, 
all_hub);

+}
+
  bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
  {
  struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h

index 069d5d230810..47b0f2957d1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -136,6 +136,8 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, 
enum kgd_engine_type engine,

  uint32_t *ib_cmd, uint32_t ib_len);
  void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
  bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);
+int amdgpu_amdkfd_flush_gpu_tlb_vmid(struct kgd_dev *kgd, uint16_t 
vmid);
+int amdgpu_amdkfd_flush_gpu_tlb_pasid(struct kgd_dev *kgd, uint16_t 
pasid);
    bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 
vmid);
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

index 536a153ac9a4..25b90f70aecd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -32,6 +32,7 @@
  #include 
  #include 
  #include "amdgpu_amdkfd.h"
+#include "amdgpu.h"
    struct mm_struct;
  @@ -1152,16 +1153,17 @@ int kfd_reserved_mem_mmap(struct kfd_dev 
*dev, struct kfd_process *process,

  void kfd_flush_tlb(struct kfd_process_device *pdd)
  {
  struct kfd_dev *dev = pdd->dev;
-    const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
    if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) {
  /* Nothing to flush until a VMID is assigned, which
   * only happens when the first queue is created.
   */
  if (pdd->qpd.vmid)
-    f2g->invalidate_tlbs_vmid(dev->kgd, pdd->qpd.vmid);
+    amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->kgd,
+    pdd->qpd.vmid);
  } else {
-    f2g->invalidate_tlbs(dev->kgd, pdd->process->pasid);
+    amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->kgd,
+    pdd->process->pasid);
  }
  }

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfxdata=02%7C01%7Cyong.zhao%40amd.com%7C196afdae9b69425e58aa08d78594927a%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637124745521063472sdata=5ZZNtq%2FyOZX%2BdQrG1ydoidOU90ZrbWGz9tnuycEg4F4%3Dreserved=0 


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/5] drm/amdgpu: Avoid reclaim fs while eviction lock

2019-12-20 Thread Yong Zhao


One style comment inline.

Yong

On 2019-12-20 1:24 a.m., Alex Sierra wrote:

[Why]
Avoid reclaim filesystem while eviction lock is held called from
MMU notifier.

[How]
Setting PF_MEMALLOC_NOFS flags while eviction mutex is locked.
Using memalloc_nofs_save / memalloc_nofs_restore API.

Change-Id: I5531c9337836e7d4a430df3f16dcc82888e8018c
Signed-off-by: Alex Sierra 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 ++---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 28 +-
  2 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b999b67ff57a..b36daa6230fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -678,9 +678,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, 
struct amdgpu_vm *vm,
}
}
  
-	mutex_lock(>eviction_lock);

+   vm_eviction_lock(vm);
vm->evicting = false;
-   mutex_unlock(>eviction_lock);
+   vm_eviction_unlock(vm);
  
  	return 0;

  }
@@ -1559,7 +1559,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
if (!(flags & AMDGPU_PTE_VALID))
owner = AMDGPU_FENCE_OWNER_KFD;
  
-	mutex_lock(>eviction_lock);

+   vm_eviction_lock(vm);
if (vm->evicting) {
r = -EBUSY;
goto error_unlock;
@@ -1576,7 +1576,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
r = vm->update_funcs->commit(, fence);
  
  error_unlock:

-   mutex_unlock(>eviction_lock);
+   vm_eviction_unlock(vm);
return r;
  }
  
@@ -2537,18 +2537,18 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo)

return false;
  
  	/* Try to block ongoing updates */

-   if (!mutex_trylock(_base->vm->eviction_lock))
+   if (!vm_eviction_trylock(bo_base->vm))
return false;
  
  	/* Don't evict VM page tables while they are updated */

if (!dma_fence_is_signaled(bo_base->vm->last_direct) ||
!dma_fence_is_signaled(bo_base->vm->last_delayed)) {
-   mutex_unlock(_base->vm->eviction_lock);
+   vm_eviction_unlock(bo_base->vm);
return false;
}
  
  	bo_base->vm->evicting = true;

-   mutex_unlock(_base->vm->eviction_lock);
+   vm_eviction_unlock(bo_base->vm);
return true;
  }
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h

index 100547f094ff..d35aa76469ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -30,6 +30,7 @@
  #include 
  #include 
  #include 
+#include 
  
  #include "amdgpu_sync.h"

  #include "amdgpu_ring.h"
@@ -242,9 +243,12 @@ struct amdgpu_vm {
/* tree of virtual addresses mapped */
struct rb_root_cached   va;
  
-	/* Lock to prevent eviction while we are updating page tables */

+   /* Lock to prevent eviction while we are updating page tables
+* use vm_eviction_lock/unlock(vm)
+*/
struct mutexeviction_lock;
boolevicting;
+   unsigned intsaved_flags;

[yz] The tabs should be used here instead of spaces.
  
  	/* BOs who needs a validation */

struct list_headevicted;
@@ -436,4 +440,26 @@ void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
  void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
  
+/* vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS

+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void vm_eviction_lock(struct amdgpu_vm *vm)
+{
+   mutex_lock(>eviction_lock);
+   vm->saved_flags = memalloc_nofs_save();
+}
+static inline int vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+   if (mutex_trylock(>eviction_lock)) {
+   vm->saved_flags = memalloc_nofs_save();
+   return 1;
+   }
+   return 0;
+}
+static inline void vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+   memalloc_nofs_restore(vm->saved_flags);
+   mutex_unlock(>eviction_lock);
+}
  #endif

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Improve function get_sdma_rlc_reg_offset()

2019-12-13 Thread Yong Zhao

This prevents the NULL pointer access when there are fewer than 8 sdma
engines.

Change-Id: Iabae9bff7546b344720905d5d4a5cfc066a79d25
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 64 ---
 1 file changed, 42 insertions(+), 22 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 3c119407dc34..2ad088f10493 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -71,32 +71,52 @@ static uint32_t get_sdma_rlc_reg_offset(struct 
amdgpu_device *adev,
unsigned int engine_id,
unsigned int queue_id)
 {
-   uint32_t sdma_engine_reg_base[8] = {
-   SOC15_REG_OFFSET(SDMA0, 0,
-mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA1, 0,
-mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA2, 0,
-mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA3, 0,
-mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA4, 0,
-mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA5, 0,
-mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA6, 0,
-mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL,
-   SOC15_REG_OFFSET(SDMA7, 0,
-mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL
-   };
-
-   uint32_t retval = sdma_engine_reg_base[engine_id]
+   uint32_t sdma_engine_reg_base;
+   uint32_t sdma_rlc_reg_offset;
+
+   switch (engine_id) {
+   case 0:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA0, 0,
+   mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL;
+   break;
+   case 1:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA1, 0,
+   mmSDMA1_RLC0_RB_CNTL) - mmSDMA1_RLC0_RB_CNTL;
+   break;
+   case 2:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA2, 0,
+   mmSDMA2_RLC0_RB_CNTL) - mmSDMA2_RLC0_RB_CNTL;
+   break;
+   case 3:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA3, 0,
+   mmSDMA3_RLC0_RB_CNTL) - mmSDMA3_RLC0_RB_CNTL;
+   break;
+   case 4:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA4, 0,
+   mmSDMA4_RLC0_RB_CNTL) - mmSDMA4_RLC0_RB_CNTL;
+   break;
+   case 5:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA5, 0,
+   mmSDMA5_RLC0_RB_CNTL) - mmSDMA5_RLC0_RB_CNTL;
+   break;
+   case 6:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA6, 0,
+   mmSDMA6_RLC0_RB_CNTL) - mmSDMA6_RLC0_RB_CNTL;
+   break;
+   case 7:
+   sdma_engine_reg_base = SOC15_REG_OFFSET(SDMA7, 0,
+   mmSDMA7_RLC0_RB_CNTL) - mmSDMA7_RLC0_RB_CNTL;
+   break;
+
+   }
+
+   sdma_rlc_reg_offset = sdma_engine_reg_base
+ queue_id * (mmSDMA0_RLC1_RB_CNTL - mmSDMA0_RLC0_RB_CNTL);
 
pr_debug("RLC register offset for SDMA%d RLC%d: 0x%x\n", engine_id,
-   queue_id, retval);
+   queue_id, sdma_rlc_reg_offset);
 
-   return retval;
+   return sdma_rlc_reg_offset;
 }
 
 static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()

2019-12-12 Thread Yong Zhao


The first one was already fixed and pushed a week ago.

Regards,

Yong

On 2019-12-12 7:25 p.m., Felix Kuehling wrote:
I agree with Christian's comments on patch 1. With those fixed, the 
series is


Reviewed-by: Felix Kuehling 

Regards,
  Felix

On 2019-12-02 20:42, Yong Zhao wrote:

Since Arcturus has it own function pointer, we can move Arcturus
specific logic to there rather than leaving it entangled with
other GFX9 chips.

Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7
Signed-off-by: Yong Zhao 
---
  .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 20 ++-
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++--
  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
  3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c

index b6713e0ed1b2..3c119407dc34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -46,6 +46,8 @@
  #include "soc15.h"
  #include "soc15d.h"
  #include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
    #define HQD_N_REGS 56
  #define DUMP_REG(addr) do {    \
@@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev 
*kgd, void *mqd,

  return 0;
  }
  +static void kgd_set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,

+    uint64_t page_table_base)
+{
+    struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+    if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+    pr_err("trying to set page table base for wrong VMID %u\n",
+   vmid);
+    return;
+    }
+
+    mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+    gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
  const struct kfd2kgd_calls arcturus_kfd2kgd = {
  .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
  .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
  .get_atc_vmid_pasid_mapping_info =
  kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
  .get_tile_config = kgd_gfx_v9_get_tile_config,
-    .set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+    .set_vm_context_page_table_base = 
kgd_set_vm_context_page_table_base,

  .invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
  .invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
  .get_hive_id = amdgpu_amdkfd_get_hive_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c

index 6f1a4676ddde..e7861f0ef415 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,6 @@
  #include "soc15d.h"
  #include "mmhub_v1_0.h"
  #include "gfxhub_v1_0.h"
-#include "mmhub_v9_4.h"
      enum hqd_dequeue_request_type {
@@ -758,8 +757,8 @@ uint32_t 
kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd,

  return 0;
  }
  -void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd, uint32_t vmid,

-    uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev 
*kgd,

+    uint32_t vmid, uint64_t page_table_base)
  {
  struct amdgpu_device *adev = get_amdgpu_device(kgd);
  @@ -769,14 +768,7 @@ void 
kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 
uint32_t vmi

  return;
  }
  -    /* TODO: take advantage of per-process address space size. For
- * now, all processes share the same address space size, like
- * on GFX8 and older.
- */
-    if (adev->asic_type == CHIP_ARCTURUS) {
-    mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
-    } else
-    mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+    mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
    gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h

index d9e9ad22b2bd..02b1426d17d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct 
kgd_dev *kgd,

    bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
  uint8_t vmid, uint16_t *p_pasid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, 
uint32_t vmid,

-    uint64_t page_table_base);
  int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
  int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t 
vmid);

  int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,

_

[PATCH] drm/amdgpu: Add CU info print log

2019-12-11 Thread Yong Zhao

The log will be useful for easily getting the CU info on various
emulation models or ASICs.

Change-Id: Ic1c914938aa3445d8dbfdf6a237bc1d58b0d5267
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8992506541d8..c778b6db5e42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3032,6 +3032,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
goto failed;
}
 
+   DRM_DEBUG("SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
+   adev->gfx.config.max_shader_engines,
+   adev->gfx.config.max_sh_per_se,
+   adev->gfx.config.max_cu_per_sh,
+   adev->gfx.cu_info.number);
+
adev->accel_working = true;
 
amdgpu_vm_check_compute_bug(adev);
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Add CU info print log

2019-12-11 Thread Yong Zhao

The log will be useful for easily getting the CU info on various
emulation models or ASICs.

Change-Id: Ic1c914938aa3445d8dbfdf6a237bc1d58b0d5267
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c| 6 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 6 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c| 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 ++
 7 files changed, 22 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8992506541d8..df9732510012 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1529,6 +1529,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct 
amdgpu_device *adev)
adev->gfx.config.max_shader_engines = 
le32_to_cpu(gpu_info_fw->gc_num_se);
adev->gfx.config.max_cu_per_sh = 
le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
adev->gfx.config.max_sh_per_se = 
le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
+
+   DRM_INFO("gpu_info: SE %d, SH per SE %d, CU per SH %d\n",
+   adev->gfx.config.max_shader_engines,
+   adev->gfx.config.max_sh_per_se,
+   adev->gfx.config.max_cu_per_sh);
+
adev->gfx.config.max_backends_per_se = 
le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches =
le32_to_cpu(gpu_info_fw->gc_num_tccs);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index f95092741c38..8001a067700c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -388,6 +388,12 @@ int amdgpu_discovery_get_gfx_info(struct amdgpu_device 
*adev)
adev->gfx.config.max_cu_per_sh = 2 * 
(le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
  
le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
+
+   DRM_INFO("discovery: SE %d, SH per SE %d, CU per SH %d\n",
+   adev->gfx.config.max_shader_engines,
+   adev->gfx.config.max_sh_per_se,
+   adev->gfx.config.max_cu_per_sh);
+
adev->gfx.config.max_backends_per_se = 
le32_to_cpu(gc_info->gc_num_rb_per_se);
adev->gfx.config.max_texture_channel_caches = 
le32_to_cpu(gc_info->gc_num_gl2c);
adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 8cdef79de9d4..a26892e71680 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -5432,6 +5432,8 @@ static int gfx_v10_0_get_cu_info(struct amdgpu_device 
*adev,
cu_info->ao_cu_mask = ao_cu_mask;
cu_info->simd_per_cu = NUM_SIMD_PER_CU;
 
+   DRM_INFO("active_cu_number: %d\n", cu_info->number);
+
return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 95bb2422b27c..bb05a94690d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -3620,6 +3620,8 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device 
*adev)
 
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
+
+   DRM_INFO("active_cu_number: %d\n", cu_info->number);
 }
 
 const struct amdgpu_ip_block_version gfx_v6_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 648d767d14e7..6d16216d5c7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -5159,6 +5159,8 @@ static void gfx_v7_0_get_cu_info(struct amdgpu_device 
*adev)
cu_info->max_scratch_slots_per_cu = 32;
cu_info->wave_front_size = 64;
cu_info->lds_size = 64;
+
+   DRM_INFO("active_cu_number: %d\n", cu_info->number);
 }
 
 const struct amdgpu_ip_block_version gfx_v7_0_ip_block =
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8b9f440688ed..1073eb5c3cec 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -7141,6 +7141,8 @@ static void gfx_v8_0_get_cu_info(struct amdgpu_device 
*adev)
cu_info->max_scratch_slots_per_cu = 32;
cu_info->wave_front_size = 64;
cu_info->lds_size = 64;
+
+   DRM_INFO("active_cu_number: %d\n

Re: [PATCH] drm/amdgpu/gfx: Improvement on EDC GPR workarounds

2019-12-03 Thread Yong Zhao


Not sure whether we should add the issue ticket info here.


Reviewed-by: Yong Zhao 



On 2019-12-03 3:45 p.m., James Zhu wrote:

SPI limits total CS waves in flight per SE to no more than 32 * num_cu and
we need to stuff 40 waves on a CU to completely clean the SGPR. This is
accomplished in the WR by cleaning the SE in two steps, half of the CU per
step.

Signed-off-by: James Zhu 
---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 83 ++-
  1 file changed, 63 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6a251a3..147c08f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3938,24 +3938,37 @@ static const struct soc15_reg_entry vgpr_init_regs[] = {
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x },
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x },
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* 
CU_GROUP_COUNT=1 */
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 4 },
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x17f }, /* VGPRS=15 
(256 logical VGPRs, SGPRS=1 (16 SGPRs, BULKY=1 */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x3f },
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x40 },  /* 64KB LDS */
  };
  
-static const struct soc15_reg_entry sgpr_init_regs[] = {

-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x100 }, /* 
CU_GROUP_COUNT=1 */
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 256*2 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 1 },
+static const struct soc15_reg_entry sgpr1_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0x00ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0x00ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0x00ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0x00ff },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
+};
+
+static const struct soc15_reg_entry sgpr2_init_regs[] = {
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xff00 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_RESOURCE_LIMITS), 0x000 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_X), 0x40 },
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Y), 8 },
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_NUM_THREAD_Z), 1 },
-   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x340 }, /* SGPRS=13 (112 
GPRS) */
+   { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC1), 0x240 }, /* (80 GPRS) */
 { SOC15_REG_ENTRY(GC, 0, mmCOMPUTE_PGM_RSRC2), 0x0 },
  };
  
@@ -4065,7 +4078,9 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)

total_size =
((ARRAY_SIZE(vgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
total_size +=
-   ((ARRAY_SIZE(sgpr_init_regs) * 3) + 4 + 5 + 2) * 4;
+   ((ARRAY_SIZE(sgpr1_init_regs) * 3) + 4 + 5 + 2) * 4;
+   total_size +=
+   ((ARRAY_SIZE(sgpr2_init_regs) * 3) + 4 + 5 + 2) * 4;
total_size = ALIGN(total_size, 256);
vgpr_offset = total_size;
total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
@@ -4108,7 +4123,35 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct 
amdgpu_device *adev)
  
  	/* write dispatch packet */

ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
-   ib.ptr[ib.length_dw++] = 256; /* x */
+   ib.ptr[ib.length_dw++] = 0x40*2; /* x */
+   ib.ptr[ib.length_dw++] = 1; /* y */
+   ib.ptr[ib.length_dw++] = 1; /* z */
+   ib.ptr[ib.length_dw

[PATCH] drm/amdkfd: Contain MMHUB number in mmhub_v9_4_setup_vm_pt_regs()

2019-12-03 Thread Yong Zhao

Adjust the exposed function prototype so that the caller does not need
to know the MMHUB number.

Change-Id: I4420d1715984f703954f074682b075fc59e2a330
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h |  8 
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c   | 14 --
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h   |  2 ++
 4 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 47c853ef1051..6f1a4676ddde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,7 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
-#include "gmc_v9_0.h"
+#include "mmhub_v9_4.h"
 
 
 enum hqd_dequeue_request_type {
@@ -774,9 +774,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct 
kgd_dev *kgd, uint32_t vmi
 * on GFX8 and older.
 */
if (adev->asic_type == CHIP_ARCTURUS) {
-   /* Two MMHUBs */
-   mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
-   mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
+   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
} else
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index 971c0840358f..49e8be761214 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -36,12 +36,4 @@
 
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
-
-/* amdgpu_amdkfd*.c */
-void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-   uint64_t value);
-void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-   uint64_t value);
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
-   uint32_t vmid, uint64_t value);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 8599bfdb9a9e..d9301e80522a 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -54,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
return base;
 }
 
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int 
hubid,
uint32_t vmid, uint64_t value)
 {
/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
@@ -80,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct 
amdgpu_device *adev,
 {
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
 
-   mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base);
+   mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
 
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -101,6 +101,16 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct 
amdgpu_device *adev,
(u32)(adev->gmc.gart_end >> 44));
 }
 
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base)
+{
+   int i;
+
+   for (i = 0; i < MMHUB_NUM_INSTANCES; i++)
+   mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid,
+   page_table_base);
+}
+
 static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
 int hubid)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index 354a4b7e875b..1b979773776c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -34,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev);
 int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
   enum amd_clockgating_state state);
 void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base);
 
 #endif
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Contain MMHUB number in the implementation

2019-12-02 Thread Yong Zhao

Adjust the exposed function prototype so that the caller does not need
to know the MMHUB number.

Change-Id: I4420d1715984f703954f074682b075fc59e2a330
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c |  6 ++
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h |  8 
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c   | 13 +++--
 drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h   |  2 ++
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 47c853ef1051..6f1a4676ddde 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,7 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
-#include "gmc_v9_0.h"
+#include "mmhub_v9_4.h"
 
 
 enum hqd_dequeue_request_type {
@@ -774,9 +774,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct 
kgd_dev *kgd, uint32_t vmi
 * on GFX8 and older.
 */
if (adev->asic_type == CHIP_ARCTURUS) {
-   /* Two MMHUBs */
-   mmhub_v9_4_setup_vm_pt_regs(adev, 0, vmid, page_table_base);
-   mmhub_v9_4_setup_vm_pt_regs(adev, 1, vmid, page_table_base);
+   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
} else
mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h 
b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index 971c0840358f..49e8be761214 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -36,12 +36,4 @@
 
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
-
-/* amdgpu_amdkfd*.c */
-void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-   uint64_t value);
-void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
-   uint64_t value);
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
-   uint32_t vmid, uint64_t value);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index 8599bfdb9a9e..0b621bf8bbd0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -54,7 +54,7 @@ u64 mmhub_v9_4_get_fb_location(struct amdgpu_device *adev)
return base;
 }
 
-void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, int hubid,
+static void mmhub_v9_4_setup_hubid_vm_pt_regs(struct amdgpu_device *adev, int 
hubid,
uint32_t vmid, uint64_t value)
 {
/* two registers distance between mmVML2VC0_VM_CONTEXT0_* to
@@ -80,7 +80,7 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct 
amdgpu_device *adev,
 {
uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo);
 
-   mmhub_v9_4_setup_vm_pt_regs(adev, hubid, 0, pt_base);
+   mmhub_v9_4_setup_hubid_vm_pt_regs(adev, hubid, 0, pt_base);
 
WREG32_SOC15_OFFSET(MMHUB, 0,
mmVML2VC0_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32,
@@ -101,6 +101,15 @@ static void mmhub_v9_4_init_gart_aperture_regs(struct 
amdgpu_device *adev,
(u32)(adev->gmc.gart_end >> 44));
 }
 
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base)
+{
+   int i;
+   for (i = 0; i < MMHUB_NUM_INSTANCES; i++) {
+   mmhub_v9_4_setup_hubid_vm_pt_regs(adev, i, vmid, 
page_table_base);
+   }
+}
+
 static void mmhub_v9_4_init_system_aperture_regs(struct amdgpu_device *adev,
 int hubid)
 {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
index 354a4b7e875b..1b979773776c 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.h
@@ -34,5 +34,7 @@ void mmhub_v9_4_init(struct amdgpu_device *adev);
 int mmhub_v9_4_set_clockgating(struct amdgpu_device *adev,
   enum amd_clockgating_state state);
 void mmhub_v9_4_get_clockgating(struct amdgpu_device *adev, u32 *flags);
+void mmhub_v9_4_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid,
+   uint64_t page_table_base);
 
 #endif
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: Add Arcturus specific set_vm_context_page_table_base()

2019-12-02 Thread Yong Zhao

Since Arcturus has it own function pointer, we can move Arcturus
specific logic to there rather than leaving it entangled with
other GFX9 chips.

Change-Id: I7df7c004a0c8ac0616ded0e65144670df50f92a7
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c   | 20 ++-
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 14 +++--
 .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h |  2 --
 3 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index b6713e0ed1b2..3c119407dc34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -46,6 +46,8 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "amdgpu_amdkfd_gfx_v9.h"
+#include "gfxhub_v1_0.h"
+#include "mmhub_v9_4.h"
 
 #define HQD_N_REGS 56
 #define DUMP_REG(addr) do {\
@@ -258,6 +260,22 @@ static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void 
*mqd,
return 0;
 }
 
+static void kgd_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
+   uint64_t page_table_base)
+{
+   struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+   if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+   pr_err("trying to set page table base for wrong VMID %u\n",
+  vmid);
+   return;
+   }
+
+   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
+
+   gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+}
+
 const struct kfd2kgd_calls arcturus_kfd2kgd = {
.program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping,
@@ -277,7 +295,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.get_atc_vmid_pasid_mapping_info =
kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
.get_tile_config = kgd_gfx_v9_get_tile_config,
-   .set_vm_context_page_table_base = 
kgd_gfx_v9_set_vm_context_page_table_base,
+   .set_vm_context_page_table_base = kgd_set_vm_context_page_table_base,
.invalidate_tlbs = kgd_gfx_v9_invalidate_tlbs,
.invalidate_tlbs_vmid = kgd_gfx_v9_invalidate_tlbs_vmid,
.get_hive_id = amdgpu_amdkfd_get_hive_id,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 6f1a4676ddde..e7861f0ef415 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -40,7 +40,6 @@
 #include "soc15d.h"
 #include "mmhub_v1_0.h"
 #include "gfxhub_v1_0.h"
-#include "mmhub_v9_4.h"
 
 
 enum hqd_dequeue_request_type {
@@ -758,8 +757,8 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev 
*kgd,
return 0;
 }
 
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
-   uint64_t page_table_base)
+static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd,
+   uint32_t vmid, uint64_t page_table_base)
 {
struct amdgpu_device *adev = get_amdgpu_device(kgd);
 
@@ -769,14 +768,7 @@ void kgd_gfx_v9_set_vm_context_page_table_base(struct 
kgd_dev *kgd, uint32_t vmi
return;
}
 
-   /* TODO: take advantage of per-process address space size. For
-* now, all processes share the same address space size, like
-* on GFX8 and older.
-*/
-   if (adev->asic_type == CHIP_ARCTURUS) {
-   mmhub_v9_4_setup_vm_pt_regs(adev, vmid, page_table_base);
-   } else
-   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
+   mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 
gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
index d9e9ad22b2bd..02b1426d17d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
@@ -57,8 +57,6 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev 
*kgd,
 
 bool kgd_gfx_v9_get_atc_vmid_pasid_mapping_info(struct kgd_dev *kgd,
uint8_t vmid, uint16_t *p_pasid);
-void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t 
vmid,
-   uint64_t page_table_base);
 int kgd_gfx_v9_invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 int kgd_gfx_v9_invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
 int kgd_gfx_v9_get_tile_config(struct kgd_dev *kgd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdkfd: Remove duplicate functions update_mqd_hiq()

2019-11-22 Thread Yong Zhao


Pushed. Thanks!

Yong

On 2019-11-21 11:21 p.m., Liu, Zhan wrote:

Looks good to me.

Reviewed-by: Zhan Liu 


-Original Message-
From: amd-gfx  On Behalf Of
Yong Zhao
Sent: 2019/November/21, Thursday 4:25 PM
To: amd-gfx@lists.freedesktop.org
Cc: Zhao, Yong 
Subject: [PATCH] drm/amdkfd: Remove duplicate functions
update_mqd_hiq()

The functions are the same as update_mqd().

Change-Id: Ic8d8f23cdde6b7806ab766ddf3d71fa668cca5fb
Signed-off-by: Yong Zhao 
---
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 16 ++--
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  | 16 ++--
drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  4 
  3 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 8d21325b5cbb..7832ec6e480b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -282,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm,
void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
  }

-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-   struct queue_properties *q)
-{
-   struct v10_compute_mqd *m;
-
-   update_mqd(mm, mqd, q);
-
-   /* TODO: what's the point? update_mqd already does this. */
-   m = get_mqd(mqd);
-   m->cp_hqd_vmid = q->vmid;
-}
-
  static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum
KFD_MQD_TYPE type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd); @@ -
436,7 +424,7 @@ struct mqd_manager *mqd_manager_init_v10(enum
KFD_MQD_TYPE type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd); diff --git
a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index df77d67ec9aa..aa9010995eaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -325,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm,
void **mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
  }

-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-   struct queue_properties *q)
-{
-   struct v9_mqd *m;
-
-   update_mqd(mm, mqd, q);
-
-   /* TODO: what's the point? update_mqd already does this. */
-   m = get_mqd(mqd);
-   m->cp_hqd_vmid = q->vmid;
-}
-
  static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum
KFD_MQD_TYPE type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd); @@ -475,7 +463,7
@@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE
type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd); diff --git
a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 3b6b5671964c..a5e8ff1e5945 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm,
void **mqd,  static void update_mqd_hiq(struct mqd_manager *mm, void
*mqd,
struct queue_propertie

[PATCH 1/2] drm/amdkfd: Rename pm_release_ib() to pm_destroy_runlist_ib()

2019-11-21 Thread Yong Zhao

Its counterparty is called pm_create_runlist_ib(). The new name makes
it easier to navigate in the code.

Accordingly, Add rl_ to the variable names to indicate it is runlist.

Change-Id: Id63bfebeb8a5ed6aaefbebe98858d84724fd26be
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c  |  2 +-
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c| 18 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  6 +++---
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index f7f6df40875e..510f2d1bb8bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1355,7 +1355,7 @@ static int unmap_queues_cpsch(struct device_queue_manager 
*dqm,
if (retval)
return retval;
 
-   pm_release_ib(>packets);
+   pm_destroy_runlist_ib(>packets);
dqm->active_runlist = false;
 
return retval;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 6cabed06ef5d..4a9433257428 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -98,15 +98,15 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
mutex_lock(>lock);
 
retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
-   >ib_buffer_obj);
+   >rl_ib_obj);
 
if (retval) {
pr_err("Failed to allocate runlist IB\n");
goto out;
}
 
-   *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
-   *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
+   *(void **)rl_buffer = pm->rl_ib_obj->cpu_ptr;
+   *rl_gpu_buffer = pm->rl_ib_obj->gpu_addr;
 
memset(*rl_buffer, 0, *rl_buffer_size);
pm->allocated = true;
@@ -138,7 +138,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
return retval;
 
*rl_size_bytes = alloc_size_bytes;
-   pm->ib_size_bytes = alloc_size_bytes;
+   pm->rl_ib_size_bytes = alloc_size_bytes;
 
pr_debug("Building runlist ib process count: %d queues count %d\n",
pm->dqm->processes_count, pm->dqm->queue_count);
@@ -149,7 +149,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
/* build map process packet */
if (proccesses_mapped >= pm->dqm->processes_count) {
pr_debug("Not enough space left in runlist IB\n");
-   pm_release_ib(pm);
+   pm_destroy_runlist_ib(pm);
return -ENOMEM;
}
 
@@ -337,7 +337,7 @@ int pm_send_runlist(struct packet_manager *pm, struct 
list_head *dqm_queues)
 fail_acquire_packet_buffer:
mutex_unlock(>lock);
 fail_create_runlist_ib:
-   pm_release_ib(pm);
+   pm_destroy_runlist_ib(pm);
return retval;
 }
 
@@ -401,11 +401,11 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum 
kfd_queue_type type,
return retval;
 }
 
-void pm_release_ib(struct packet_manager *pm)
+void pm_destroy_runlist_ib(struct packet_manager *pm)
 {
mutex_lock(>lock);
if (pm->allocated) {
-   kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj);
+   kfd_gtt_sa_free(pm->dqm->dev, pm->rl_ib_obj);
pm->allocated = false;
}
mutex_unlock(>lock);
@@ -425,7 +425,7 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
}
 
seq_hex_dump(m, "  ", DUMP_PREFIX_OFFSET, 32, 4,
-pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false);
+pm->rl_ib_obj->cpu_ptr, pm->rl_ib_size_bytes, false);
 
 out:
mutex_unlock(>lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 514896bef99a..389cda7c8f1a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -937,8 +937,8 @@ struct packet_manager {
struct kernel_queue *priv_queue;
struct mutex lock;
bool allocated;
-   struct kfd_mem_obj *ib_buffer_obj;
-   unsigned int ib_size_bytes;
+   struct kfd_mem_obj *rl_ib_obj;
+   unsigned int rl_ib_size_bytes;
bool is_over_subscription;
 
const struct packet_manager_funcs *pmf;
@@ -989,7 +989,7 @@ int pm_send_unmap_queue(struct packet_manager *pm, enum 
kfd_queue_type type,
uint32_t filter_param, bool reset,
unsigned int sdma_engine);
 
-void pm_release_ib(struct packet_manager *pm);
+void pm_destroy_runlist_ib(

[PATCH 2/2] drm/amdkfd: Move pm_create_runlist_ib() out of pm_send_runlist()

2019-11-21 Thread Yong Zhao

This is consistent with the calling sequence in unmap_queues_cpsch().

Change-Id: Ieb6714422c812d4f6ebbece34e339871471e4b5e
Signed-off-by: Yong Zhao 
---
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 +++--
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 20 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  7 ++-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 510f2d1bb8bb..fd7d90136b94 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1302,6 +1302,8 @@ static int unmap_sdma_queues(struct device_queue_manager 
*dqm)
 static int map_queues_cpsch(struct device_queue_manager *dqm)
 {
int retval;
+   uint64_t rl_ib_gpu_addr;
+   size_t rl_ib_size;
 
if (!dqm->sched_running)
return 0;
@@ -1310,15 +1312,27 @@ static int map_queues_cpsch(struct device_queue_manager 
*dqm)
if (dqm->active_runlist)
return 0;
 
-   retval = pm_send_runlist(>packets, >queues);
+   retval = pm_create_runlist_ib(>packets, >queues,
+   _ib_gpu_addr, _ib_size);
+   if (retval)
+   goto fail_create_runlist_ib;
+
+   pr_debug("runlist IB address: 0x%llX\n", rl_ib_gpu_addr);
+
+   retval = pm_send_runlist(>packets, >queues,
+   rl_ib_gpu_addr, rl_ib_size);
pr_debug("%s sent runlist\n", __func__);
if (retval) {
pr_err("failed to execute runlist\n");
-   return retval;
+   goto fail_create_runlist_ib;
}
dqm->active_runlist = true;
 
return retval;
+
+fail_create_runlist_ib:
+   pm_destroy_runlist_ib(>packets);
+   return retval;
 }
 
 /* dqm->lock mutex has to be locked before calling this function */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 4a9433257428..6ec54e9f9392 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -116,7 +116,7 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
return retval;
 }
 
-static int pm_create_runlist_ib(struct packet_manager *pm,
+int pm_create_runlist_ib(struct packet_manager *pm,
struct list_head *queues,
uint64_t *rl_gpu_addr,
size_t *rl_size_bytes)
@@ -149,7 +149,6 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
/* build map process packet */
if (proccesses_mapped >= pm->dqm->processes_count) {
pr_debug("Not enough space left in runlist IB\n");
-   pm_destroy_runlist_ib(pm);
return -ENOMEM;
}
 
@@ -299,20 +298,13 @@ int pm_send_set_resources(struct packet_manager *pm,
return retval;
 }
 
-int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues,
+   uint64_t rl_ib_gpu_addr, size_t rl_ib_size)
 {
-   uint64_t rl_gpu_ib_addr;
uint32_t *rl_buffer;
-   size_t rl_ib_size, packet_size_dwords;
+   size_t packet_size_dwords;
int retval;
 
-   retval = pm_create_runlist_ib(pm, dqm_queues, _gpu_ib_addr,
-   _ib_size);
-   if (retval)
-   goto fail_create_runlist_ib;
-
-   pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
-
packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
mutex_lock(>lock);
 
@@ -321,7 +313,7 @@ int pm_send_runlist(struct packet_manager *pm, struct 
list_head *dqm_queues)
if (retval)
goto fail_acquire_packet_buffer;
 
-   retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
+   retval = pm->pmf->runlist(pm, rl_buffer, rl_ib_gpu_addr,
rl_ib_size / sizeof(uint32_t), false);
if (retval)
goto fail_create_runlist;
@@ -336,8 +328,6 @@ int pm_send_runlist(struct packet_manager *pm, struct 
list_head *dqm_queues)
kq_rollback_packet(pm->priv_queue);
 fail_acquire_packet_buffer:
mutex_unlock(>lock);
-fail_create_runlist_ib:
-   pm_destroy_runlist_ib(pm);
return retval;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 389cda7c8f1a..6accb605b9f0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -980,7 +980,8 @@ int pm_init(struct packet_manager *pm, struct 
d

[PATCH] drm/amdkfd: Remove duplicate functions update_mqd_hiq()

2019-11-21 Thread Yong Zhao

The functions are the same as update_mqd().

Change-Id: Ic8d8f23cdde6b7806ab766ddf3d71fa668cca5fb
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 16 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  | 16 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  |  4 
 3 files changed, 4 insertions(+), 32 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 8d21325b5cbb..7832ec6e480b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -282,18 +282,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void 
**mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
 }
 
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-   struct queue_properties *q)
-{
-   struct v10_compute_mqd *m;
-
-   update_mqd(mm, mqd, q);
-
-   /* TODO: what's the point? update_mqd already does this. */
-   m = get_mqd(mqd);
-   m->cp_hqd_vmid = q->vmid;
-}
-
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -422,7 +410,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
@@ -436,7 +424,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v10_compute_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index df77d67ec9aa..aa9010995eaf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -325,18 +325,6 @@ static void init_mqd_hiq(struct mqd_manager *mm, void 
**mqd,
1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
 }
 
-static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
-   struct queue_properties *q)
-{
-   struct v9_mqd *m;
-
-   update_mqd(mm, mqd, q);
-
-   /* TODO: what's the point? update_mqd already does this. */
-   m = get_mqd(mqd);
-   m->cp_hqd_vmid = q->vmid;
-}
-
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
struct kfd_mem_obj *mqd_mem_obj, uint64_t *gart_addr,
struct queue_properties *q)
@@ -462,7 +450,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE 
type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd_hiq_sdma;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
@@ -475,7 +463,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE 
type,
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
-   mqd->update_mqd = update_mqd_hiq;
+   mqd->update_mqd = update_mqd;
mqd->destroy_mqd = destroy_mqd;
mqd->is_occupied = is_occupied;
mqd->mqd_size = sizeof(struct v9_mqd);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 3b6b5671964c..a5e8ff1e5945 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -312,11 +312,7 @@ static void init_mqd_hiq(struct mqd_manager *mm, void 
**mqd,
 static void update_mqd_hiq(struct mqd_manager *mm, void *mqd,
struct queue_properties *q)
 {
-   struct vi_mqd *m;
__update_mqd(mm, mqd, q, MTYPE_UC, 0);
-
-   m = get_mqd(mqd);
-   m->cp_hqd_vmid = q->vmid;
 }
 
 static void init_mqd_sdma(struct mqd_manager *mm, void **mqd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: DIQ should not use HIQ way to allocate memory

2019-11-15 Thread Yong Zhao

In the mqd_diq_sdma buffer, there should be only one HIQ mqd. All DIQs
should be allocate using the regular way.

Change-Id: Ibf3eb33604d0ec30501c244228cdb3b24615b699
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c  | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index b08694ec65d7..19f0fe547c57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -400,7 +400,7 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE 
type,
 #endif
break;
case KFD_MQD_TYPE_DIQ:
-   mqd->allocate_mqd = allocate_hiq_mqd;
+   mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 5a0e30441be8..8d21325b5cbb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -432,7 +432,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
pr_debug("%s@%i\n", __func__, __LINE__);
break;
case KFD_MQD_TYPE_DIQ:
-   mqd->allocate_mqd = allocate_hiq_mqd;
+   mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index bdbcea22ad12..df77d67ec9aa 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -471,7 +471,7 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE 
type,
 #endif
break;
case KFD_MQD_TYPE_DIQ:
-   mqd->allocate_mqd = allocate_hiq_mqd;
+   mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index c9e1151b5a57..3b6b5671964c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -452,7 +452,7 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE 
type,
 #endif
break;
case KFD_MQD_TYPE_DIQ:
-   mqd->allocate_mqd = allocate_hiq_mqd;
+   mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd_hiq;
mqd->free_mqd = free_mqd;
mqd->load_mqd = load_mqd;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete KFD_MQD_TYPE_COMPUTE

2019-11-15 Thread Yong Zhao

It is the same as KFD_MQD_TYPE_CP, so delete it. As a result, we will
have one less mqd mananger per device.

Change-Id: Iaa98fc17be06b216de7a826c3577f44bc0536b4c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  | 1 -
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   | 1 -
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   | 3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 +--
 6 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index b42f34ef2b5c..f7f6df40875e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1595,7 +1595,7 @@ static int get_wave_state(struct device_queue_manager 
*dqm,
goto dqm_unlock;
}
 
-   mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
+   mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
 
if (!mqd_mgr->get_wave_state) {
r = -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index 28876aceb14b..b08694ec65d7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -374,7 +374,6 @@ struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE 
type,
 
switch (type) {
case KFD_MQD_TYPE_CP:
-   case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -442,7 +441,7 @@ struct mqd_manager *mqd_manager_init_cik_hawaii(enum 
KFD_MQD_TYPE type,
mqd = mqd_manager_init_cik(type, dev);
if (!mqd)
return NULL;
-   if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+   if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_hawaii;
return mqd;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 579c5ffcfa79..5a0e30441be8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -401,7 +401,6 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
 
switch (type) {
case KFD_MQD_TYPE_CP:
-   case KFD_MQD_TYPE_COMPUTE:
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 22a819c888d8..bdbcea22ad12 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -444,7 +444,6 @@ struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE 
type,
 
switch (type) {
case KFD_MQD_TYPE_CP:
-   case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index 7d144f56f421..c9e1151b5a57 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -425,7 +425,6 @@ struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE 
type,
 
switch (type) {
case KFD_MQD_TYPE_CP:
-   case KFD_MQD_TYPE_COMPUTE:
mqd->allocate_mqd = allocate_mqd;
mqd->init_mqd = init_mqd;
mqd->free_mqd = free_mqd;
@@ -494,7 +493,7 @@ struct mqd_manager *mqd_manager_init_vi_tonga(enum 
KFD_MQD_TYPE type,
mqd = mqd_manager_init_vi(type, dev);
if (!mqd)
return NULL;
-   if ((type == KFD_MQD_TYPE_CP) || (type == KFD_MQD_TYPE_COMPUTE))
+   if (type == KFD_MQD_TYPE_CP)
mqd->update_mqd = update_mqd_tonga;
return mqd;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 1049759dc6bb..514896bef99a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -508,8 +508,7 @@ struct queue {
  * Please read the kfd_mqd_manager.h description.
  */
 enum KFD_MQD_TYPE {
-   KFD_MQD_TYPE_COMPUTE = 0,   /* for no cp scheduling */
-   KFD_MQD_TYPE_HIQ,   /* for hiq */
+   KFD_MQD_TYPE_HIQ = 0,   /* for hiq */
KFD_MQD_TYPE_CP,/* for cp queues and diq */
KFD_MQD_TYPE_SDMA,  /* for sdma queues */

[PATCH] drm/amdkfd: Rename kfd_kernel_queue_.c to kfd_packet_manager_.c

2019-11-13 Thread Yong Zhao

After the recent cleanup, the functionalities provided by the previous
kfd_kernel_queue_*.c are actually all packet manager related. So rename
them to reflect that.

Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   | 4 ++--
 .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_v9.c} | 0
 .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => 
kfd_packet_manager_v9.c} (100%)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => 
kfd_packet_manager_vi.c} (100%)

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index f93a16372325..61474627a32c 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,9 +38,9 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_v9.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Rename kfd_kernel_queue_.c to kfd_packet_manager_.c

2019-11-13 Thread Yong Zhao

After the recent cleanup, the functionalities provided by the previous
kfd_kernel_queue_*.c are actually all packet manager related. So rename
them to reflect that.

Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |  4 +--
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   |  4 +--
 ...nel_queue_v9.c => kfd_packet_manager_ai.c} | 26 +--
 ...nel_queue_vi.c => kfd_packet_manager_vi.c} |  2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h |  4 +--
 5 files changed, 20 insertions(+), 20 deletions(-)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => 
kfd_packet_manager_ai.c} (94%)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => 
kfd_packet_manager_vi.c} (99%)

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index f93a16372325..55bfecf04239 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,9 +38,9 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_ai.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index 6cabed06ef5d..cc945a2acd66 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -233,7 +233,7 @@ int pm_init(struct packet_manager *pm, struct 
device_queue_manager *dqm)
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGAM:
-   pm->pmf = _vi_pm_funcs;
+   pm->pmf = _pm_funcs_vi;
break;
case CHIP_VEGA10:
case CHIP_VEGA12:
@@ -244,7 +244,7 @@ int pm_init(struct packet_manager *pm, struct 
device_queue_manager *dqm)
case CHIP_NAVI10:
case CHIP_NAVI12:
case CHIP_NAVI14:
-   pm->pmf = _v9_pm_funcs;
+   pm->pmf = _pm_funcs_ai;
break;
default:
WARN(1, "Unexpected ASIC family %u",
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
similarity index 94%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
index 2de01009f1b6..713530cd9760 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
@@ -27,7 +27,7 @@
 #include "kfd_pm4_opcodes.h"
 #include "gc/gc_10_1_0_sh_mask.h"
 
-static int pm_map_process_v9(struct packet_manager *pm,
+static int pm_map_process_ai(struct packet_manager *pm,
uint32_t *buffer, struct qcm_process_device *qpd)
 {
struct pm4_mes_map_process *packet;
@@ -73,7 +73,7 @@ static int pm_map_process_v9(struct packet_manager *pm,
return 0;
 }
 
-static int pm_runlist_v9(struct packet_manager *pm, uint32_t *buffer,
+static int pm_runlist_ai(struct packet_manager *pm, uint32_t *buffer,
uint64_t ib, size_t ib_size_in_dwords, bool chain)
 {
struct pm4_mes_runlist *packet;
@@ -111,7 +111,7 @@ static int pm_runlist_v9(struct packet_manager *pm, 
uint32_t *buffer,
return 0;
 }
 
-static int pm_set_resources_v9(struct packet_manager *pm, uint32_t *buffer,
+static int pm_set_resources_ai(struct packet_manager *pm, uint32_t *buffer,
struct scheduling_resources *res)
 {
struct pm4_mes_set_resources *packet;
@@ -139,7 +139,7 @@ static int pm_set_resources_v9(struct packet_manager *pm, 
uint32_t *buffer,
return 0;
 }
 
-static int pm_map_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+static int pm_map_queues_ai(struct packet_manager *pm, uint32_t *buffer,
struct queue *q, bool is_static)
 {
struct pm4_mes_map_queues *packet;
@@ -206,7 +206,7 @@ static int pm_map_queues_v9(struct packet_manager *pm, 
uint32_t *buffer,
return 0;
 }
 
-static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
+static int pm_unmap_queues_ai(struct packet_manager *pm, uint32_t *buffer,
enum kfd_queue_type type,
enum kfd_unmap_queues_filter filter,
uint32_t filter_param, bool reset,
@@ -282,7 +282,7 @@ static int pm_unmap_queues_v9(struct

Re: [PATCH] drm/amdkfd: Rename kfd_kernel_queue_.c to kfd_packet_manager_.c

2019-11-13 Thread Yong Zhao

Oh, I did not realize the part inside of the file. I think v9->ai is 
better, because the packet format header uses ai. Also v9 will give 
people an impression of gfx9.


Yong

On 2019-11-13 5:19 p.m., Felix Kuehling wrote:

On 2019-11-13 5:09 p.m., Yong Zhao wrote:

After the recent cleanup, the functionalities provided by the previous
kfd_kernel_queue_*.c are actually all packet manager related. So rename
them to reflect that.

Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0
Signed-off-by: Yong Zhao 
---
  drivers/gpu/drm/amd/amdkfd/Makefile | 4 ++--
  .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} | 0
  .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0
  3 files changed, 2 insertions(+), 2 deletions(-)
  rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => 
kfd_packet_manager_ai.c} (100%)
  rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => 
kfd_packet_manager_vi.c} (100%)


diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile

index f93a16372325..55bfecf04239 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,9 +38,9 @@ AMDKFD_FILES    := $(AMDKFD_PATH)/kfd_module.o \
  $(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
  $(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
  $(AMDKFD_PATH)/kfd_kernel_queue.o \
-    $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
-    $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
  $(AMDKFD_PATH)/kfd_packet_manager.o \
+    $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+    $(AMDKFD_PATH)/kfd_packet_manager_ai.o \


This naming convention is inconsistent with the rest of KFD. We use 
_v9, not _ai. Also the function s inside this file are named _v9. If 
we decide to change that naming convention, it should not be 
accidental and piece-meal. It should be deliberate and comprehensive.


Regards,
  Felix



$(AMDKFD_PATH)/kfd_process_queue_manager.o \
  $(AMDKFD_PATH)/kfd_device_queue_manager.o \
  $(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c

similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c

similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Rename kfd_kernel_queue_.c to kfd_packet_manager_.c

2019-11-13 Thread Yong Zhao

After the recent cleanup, the functionalities provided by the previous
kfd_kernel_queue_*.c are actually all packet manager related. So rename
them to reflect that.

Change-Id: I6544ccb38da827c747544c0787aa949df20edbb0
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   | 4 ++--
 .../amdkfd/{kfd_kernel_queue_v9.c => kfd_packet_manager_ai.c} | 0
 .../amdkfd/{kfd_kernel_queue_vi.c => kfd_packet_manager_vi.c} | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_v9.c => 
kfd_packet_manager_ai.c} (100%)
 rename drivers/gpu/drm/amd/amdkfd/{kfd_kernel_queue_vi.c => 
kfd_packet_manager_vi.c} (100%)

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index f93a16372325..55bfecf04239 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,9 +38,9 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_vi.o \
+   $(AMDKFD_PATH)/kfd_packet_manager_ai.o \
$(AMDKFD_PATH)/kfd_process_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager.o \
$(AMDKFD_PATH)/kfd_device_queue_manager_cik.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_ai.c
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
similarity index 100%
rename from drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c
rename to drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Eliminate unnecessary kernel queue function pointers

2019-11-13 Thread Yong Zhao

Up to this point, those functions are all the same for all ASICs, so
no need to call them by functions pointers. Removing the function
pointers will greatly increase the code readablity. If there is ever
need for those function pointers, we can add it back then.

Change-Id: I9515fdece70110067cda66e2d24d6768b4846c2f
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c   |  8 ++---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 30 
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h | 34 +--
 .../gpu/drm/amd/amdkfd/kfd_packet_manager.c   | 28 +++
 4 files changed, 41 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index 1d33c4f25263..27bcc5b472f6 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -72,11 +72,11 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 * The receive packet buff will be sitting on the Indirect Buffer
 * and in the PQ we put the IB packet + sync packet(s).
 */
-   status = kq->ops.acquire_packet_buffer(kq,
+   status = kq_acquire_packet_buffer(kq,
pq_packets_size_in_bytes / sizeof(uint32_t),
_packet_buff);
if (status) {
-   pr_err("acquire_packet_buffer failed\n");
+   pr_err("kq_acquire_packet_buffer failed\n");
return status;
}
 
@@ -115,7 +115,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
if (status) {
pr_err("Failed to allocate GART memory\n");
-   kq->ops.rollback_packet(kq);
+   kq_rollback_packet(kq);
return status;
}
 
@@ -151,7 +151,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
rm_packet->data_lo = QUEUESTATE__ACTIVE;
 
-   kq->ops.submit_packet(kq);
+   kq_submit_packet(kq);
 
/* Wait till CP writes sync code: */
status = amdkfd_fence_wait_timeout(
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 59ee9053498c..2d56dc534459 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -34,7 +34,10 @@
 
 #define PM4_COUNT_ZERO (((1 << 15) - 1) << 16)
 
-static bool initialize(struct kernel_queue *kq, struct kfd_dev *dev,
+/* Initialize a kernel queue, including allocations of GART memory
+ * needed for the queue.
+ */
+static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev,
enum kfd_queue_type type, unsigned int queue_size)
 {
struct queue_properties prop;
@@ -88,7 +91,7 @@ static bool initialize(struct kernel_queue *kq, struct 
kfd_dev *dev,
kq->pq_gpu_addr = kq->pq->gpu_addr;
 
/* For CIK family asics, kq->eop_mem is not needed */
-   if (dev->device_info->asic_family > CHIP_HAWAII) {
+   if (dev->device_info->asic_family > CHIP_MULLINS) {
retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem);
if (retval != 0)
goto err_eop_allocate_vidmem;
@@ -191,7 +194,8 @@ static bool initialize(struct kernel_queue *kq, struct 
kfd_dev *dev,
 
 }
 
-static void uninitialize(struct kernel_queue *kq)
+/* Uninitialize a kernel queue and free all its memory usages. */
+static void kq_uninitialize(struct kernel_queue *kq)
 {
if (kq->queue->properties.type == KFD_QUEUE_TYPE_HIQ)
kq->mqd_mgr->destroy_mqd(kq->mqd_mgr,
@@ -220,7 +224,7 @@ static void uninitialize(struct kernel_queue *kq)
uninit_queue(kq->queue);
 }
 
-static int acquire_packet_buffer(struct kernel_queue *kq,
+int kq_acquire_packet_buffer(struct kernel_queue *kq,
size_t packet_size_in_dwords, unsigned int **buffer_ptr)
 {
size_t available_size;
@@ -281,7 +285,7 @@ static int acquire_packet_buffer(struct kernel_queue *kq,
return -ENOMEM;
 }
 
-static void submit_packet(struct kernel_queue *kq)
+void kq_submit_packet(struct kernel_queue *kq)
 {
 #ifdef DEBUG
int i;
@@ -304,7 +308,7 @@ static void submit_packet(struct kernel_queue *kq)
}
 }
 
-static void rollback_packet(struct kernel_queue *kq)
+void kq_rollback_packet(struct kernel_queue *kq)
 {
if (kq->dev->device_info->doorbell_size == 8) {
kq->pending_wptr64 = *kq->wptr64_kernel;
@@ -324,13 +328,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
if (!kq)
return NULL;
 
-   kq->ops.initialize = initialize;
-   kq->ops.uninitialize = uninitialize;
-   kq->ops.acquire_packet_buffer = acquire_packet_buffer;
-   kq->ops.submit_packet = submit_packet;
-   kq->ops.rollb

Re: [PATCH 2/2] drm/amdkfd: Eliminate ops_asic_specific in kernel queue

2019-11-13 Thread Yong Zhao


I will change it to CHIP_MULLINS.

Yes , I also spotted the kq->ops cleanup, will send it out shortly.

Regards,

Yong

On 2019-11-13 2:31 p.m., Felix Kuehling wrote:

See one comment inline. With that fixed, the series is

Reviewed-by: Felix Kuehling 

I could think of more follow-up cleanup while you're at it:

1. Can you see any reason why the kq->ops need to be function pointers.
   Looks to me like they are the same for all kernel queues, so those
   functions could be called without the pointer indirection.
2. The only think left in the ASIC-specific kfd_kernel_queue_*.c files
   is the PM4 packet writer functions that are called by the
   kfd_packet_manager. It may make sense to rename them to reflect
   that. Maybe kfd_packet_manager_*.c

Regards,
  Felix

On 2019-11-12 5:18 p.m., Yong Zhao wrote:

The ops_asic_specific function pointers are actually quite generic after
using a simple if condition. Eliminate it by code refactoring.

Change-Id: Icb891289cca31acdbe2d2eea76a426f1738b9c08
Signed-off-by: Yong Zhao 
---
  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 ---
  drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  4 --
  .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 36 ---
  .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  | 48 --
  4 files changed, 26 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c

index a750b1d110eb..59ee9053498c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -87,9 +87,17 @@ static bool initialize(struct kernel_queue *kq, 
struct kfd_dev *dev,

  kq->pq_kernel_addr = kq->pq->cpu_ptr;
  kq->pq_gpu_addr = kq->pq->gpu_addr;
  -    retval = kq->ops_asic_specific.initialize(kq, dev, type, 
queue_size);

-    if (!retval)
-    goto err_eop_allocate_vidmem;
+    /* For CIK family asics, kq->eop_mem is not needed */
+    if (dev->device_info->asic_family > CHIP_HAWAII) {


This is not the correct condition to distinguish GFXv7 (CIK) vs v8 
(VI). CHIP_MULLINS comes after Hawaii, but it is also GFXv7 (CIK), 
even though KFD current doesn't support it.




+    retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem);
+    if (retval != 0)
+    goto err_eop_allocate_vidmem;
+
+    kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+    kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+    memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+    }
    retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
  >rptr_mem);
@@ -200,7 +208,12 @@ static void uninitialize(struct kernel_queue *kq)
    kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
  kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
-    kq->ops_asic_specific.uninitialize(kq);
+
+    /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
+ * is able to handle NULL properly.
+ */
+    kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+
  kfd_gtt_sa_free(kq->dev, kq->pq);
  kfd_release_kernel_doorbell(kq->dev,
  kq->queue->properties.doorbell_ptr);
@@ -280,8 +293,15 @@ static void submit_packet(struct kernel_queue *kq)
  }
  pr_debug("\n");
  #endif
-
-    kq->ops_asic_specific.submit_packet(kq);
+    if (kq->dev->device_info->doorbell_size == 8) {
+    *kq->wptr64_kernel = kq->pending_wptr64;
+ write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+    kq->pending_wptr64);
+    } else {
+    *kq->wptr_kernel = kq->pending_wptr;
+ write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+    kq->pending_wptr);
+    }
  }
    static void rollback_packet(struct kernel_queue *kq)
@@ -310,42 +330,11 @@ struct kernel_queue *kernel_queue_init(struct 
kfd_dev *dev,

  kq->ops.submit_packet = submit_packet;
  kq->ops.rollback_packet = rollback_packet;
  -    switch (dev->device_info->asic_family) {
-    case CHIP_KAVERI:
-    case CHIP_HAWAII:
-    case CHIP_CARRIZO:
-    case CHIP_TONGA:
-    case CHIP_FIJI:
-    case CHIP_POLARIS10:
-    case CHIP_POLARIS11:
-    case CHIP_POLARIS12:
-    case CHIP_VEGAM:
-    kernel_queue_init_vi(>ops_asic_specific);
-    break;
-
-    case CHIP_VEGA10:
-    case CHIP_VEGA12:
-    case CHIP_VEGA20:
-    case CHIP_RAVEN:
-    case CHIP_RENOIR:
-    case CHIP_ARCTURUS:
-    case CHIP_NAVI10:
-    case CHIP_NAVI12:
-    case CHIP_NAVI14:
-    kernel_queue_init_v9(>ops_asic_specific);
-    break;
-    default:
-    WARN(1, "Unexpected ASIC family %u",
- dev->device_info->asic_family);
-    goto out_free;
-    }
-
  if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
  return kq;
    pr_err("Failed to init kernel que

[PATCH 1/2] drm/amdkfd: Merge CIK kernel queue functions into VI

2019-11-12 Thread Yong Zhao

The only difference that CIK kernel queue functions are different from
VI is avoid allocating eop_mem. We can achieve that by using a if
condition.

Change-Id: Iea9cbc82f603ff008a906c5ee32325ddcd02d963
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/Makefile   |  1 -
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c |  7 +--
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  1 -
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c | 53 ---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  |  7 +++
 5 files changed, 9 insertions(+), 60 deletions(-)
 delete mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c

diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile 
b/drivers/gpu/drm/amd/amdkfd/Makefile
index 017a8b7156da..f93a16372325 100644
--- a/drivers/gpu/drm/amd/amdkfd/Makefile
+++ b/drivers/gpu/drm/amd/amdkfd/Makefile
@@ -38,7 +38,6 @@ AMDKFD_FILES  := $(AMDKFD_PATH)/kfd_module.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v9.o \
$(AMDKFD_PATH)/kfd_mqd_manager_v10.o \
$(AMDKFD_PATH)/kfd_kernel_queue.o \
-   $(AMDKFD_PATH)/kfd_kernel_queue_cik.o \
$(AMDKFD_PATH)/kfd_kernel_queue_vi.o \
$(AMDKFD_PATH)/kfd_kernel_queue_v9.o \
$(AMDKFD_PATH)/kfd_packet_manager.o \
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index 0d966408ea87..a750b1d110eb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -311,6 +311,8 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
kq->ops.rollback_packet = rollback_packet;
 
switch (dev->device_info->asic_family) {
+   case CHIP_KAVERI:
+   case CHIP_HAWAII:
case CHIP_CARRIZO:
case CHIP_TONGA:
case CHIP_FIJI:
@@ -321,11 +323,6 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev *dev,
kernel_queue_init_vi(>ops_asic_specific);
break;
 
-   case CHIP_KAVERI:
-   case CHIP_HAWAII:
-   kernel_queue_init_cik(>ops_asic_specific);
-   break;
-
case CHIP_VEGA10:
case CHIP_VEGA12:
case CHIP_VEGA20:
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index a7116a939029..a9a35897d8b7 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -99,7 +99,6 @@ struct kernel_queue {
struct list_headlist;
 };
 
-void kernel_queue_init_cik(struct kernel_queue_ops *ops);
 void kernel_queue_init_vi(struct kernel_queue_ops *ops);
 void kernel_queue_init_v9(struct kernel_queue_ops *ops);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
deleted file mode 100644
index 19e54acb4125..
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue_cik.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Copyright 2014 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#include "kfd_kernel_queue.h"
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size);
-static void uninitialize_cik(struct kernel_queue *kq);
-static void submit_packet_cik(struct kernel_queue *kq);
-
-void kernel_queue_init_cik(struct kernel_queue_ops *ops)
-{
-   ops->initialize = initialize_cik;
-   ops->uninitialize = uninitialize_cik;
-   ops->submit_packet = submit_packet_cik;
-}
-
-static bool initialize_cik(struct kernel_queue *kq, struct kfd_dev *dev,
-   enum kfd_queue_type type, unsigned int queue_size)
-{
-   return true;
-}
-
-static void uninitialize_cik(struct kernel_queue *kq)
-{
-}
-
-static void sub

[PATCH 2/2] drm/amdkfd: Eliminate ops_asic_specific in kernel queue

2019-11-12 Thread Yong Zhao

The ops_asic_specific function pointers are actually quite generic after
using a simple if condition. Eliminate it by code refactoring.

Change-Id: Icb891289cca31acdbe2d2eea76a426f1738b9c08
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 63 ---
 drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h |  4 --
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_v9.c  | 36 ---
 .../gpu/drm/amd/amdkfd/kfd_kernel_queue_vi.c  | 48 --
 4 files changed, 26 insertions(+), 125 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
index a750b1d110eb..59ee9053498c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
@@ -87,9 +87,17 @@ static bool initialize(struct kernel_queue *kq, struct 
kfd_dev *dev,
kq->pq_kernel_addr = kq->pq->cpu_ptr;
kq->pq_gpu_addr = kq->pq->gpu_addr;
 
-   retval = kq->ops_asic_specific.initialize(kq, dev, type, queue_size);
-   if (!retval)
-   goto err_eop_allocate_vidmem;
+   /* For CIK family asics, kq->eop_mem is not needed */
+   if (dev->device_info->asic_family > CHIP_HAWAII) {
+   retval = kfd_gtt_sa_allocate(dev, PAGE_SIZE, >eop_mem);
+   if (retval != 0)
+   goto err_eop_allocate_vidmem;
+
+   kq->eop_gpu_addr = kq->eop_mem->gpu_addr;
+   kq->eop_kernel_addr = kq->eop_mem->cpu_ptr;
+
+   memset(kq->eop_kernel_addr, 0, PAGE_SIZE);
+   }
 
retval = kfd_gtt_sa_allocate(dev, sizeof(*kq->rptr_kernel),
>rptr_mem);
@@ -200,7 +208,12 @@ static void uninitialize(struct kernel_queue *kq)
 
kfd_gtt_sa_free(kq->dev, kq->rptr_mem);
kfd_gtt_sa_free(kq->dev, kq->wptr_mem);
-   kq->ops_asic_specific.uninitialize(kq);
+
+   /* For CIK family asics, kq->eop_mem is Null, kfd_gtt_sa_free()
+* is able to handle NULL properly.
+*/
+   kfd_gtt_sa_free(kq->dev, kq->eop_mem);
+
kfd_gtt_sa_free(kq->dev, kq->pq);
kfd_release_kernel_doorbell(kq->dev,
kq->queue->properties.doorbell_ptr);
@@ -280,8 +293,15 @@ static void submit_packet(struct kernel_queue *kq)
}
pr_debug("\n");
 #endif
-
-   kq->ops_asic_specific.submit_packet(kq);
+   if (kq->dev->device_info->doorbell_size == 8) {
+   *kq->wptr64_kernel = kq->pending_wptr64;
+   write_kernel_doorbell64(kq->queue->properties.doorbell_ptr,
+   kq->pending_wptr64);
+   } else {
+   *kq->wptr_kernel = kq->pending_wptr;
+   write_kernel_doorbell(kq->queue->properties.doorbell_ptr,
+   kq->pending_wptr);
+   }
 }
 
 static void rollback_packet(struct kernel_queue *kq)
@@ -310,42 +330,11 @@ struct kernel_queue *kernel_queue_init(struct kfd_dev 
*dev,
kq->ops.submit_packet = submit_packet;
kq->ops.rollback_packet = rollback_packet;
 
-   switch (dev->device_info->asic_family) {
-   case CHIP_KAVERI:
-   case CHIP_HAWAII:
-   case CHIP_CARRIZO:
-   case CHIP_TONGA:
-   case CHIP_FIJI:
-   case CHIP_POLARIS10:
-   case CHIP_POLARIS11:
-   case CHIP_POLARIS12:
-   case CHIP_VEGAM:
-   kernel_queue_init_vi(>ops_asic_specific);
-   break;
-
-   case CHIP_VEGA10:
-   case CHIP_VEGA12:
-   case CHIP_VEGA20:
-   case CHIP_RAVEN:
-   case CHIP_RENOIR:
-   case CHIP_ARCTURUS:
-   case CHIP_NAVI10:
-   case CHIP_NAVI12:
-   case CHIP_NAVI14:
-   kernel_queue_init_v9(>ops_asic_specific);
-   break;
-   default:
-   WARN(1, "Unexpected ASIC family %u",
-dev->device_info->asic_family);
-   goto out_free;
-   }
-
if (kq->ops.initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
return kq;
 
pr_err("Failed to init kernel queue\n");
 
-out_free:
kfree(kq);
return NULL;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
index a9a35897d8b7..475e9499c0af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.h
@@ -66,7 +66,6 @@ struct kernel_queue_ops {
 
 struct kernel_queue {
struct kernel_queue_ops ops;
-   struct kernel_queue_ops ops_asic_specific;
 
/* data */
struct kfd_dev  *dev;
@@ -99,7 +98,4 @@ struct kernel_queue {
struct list_headlist;
 };
 
-void kernel_queue_init_vi(struct kernel_q

Re: [PATCH 2/2] drm/amdkfd: Stop using GFP_NOIO explicitly for GFX10

2019-11-12 Thread Yong Zhao


Hi Felix,

See one thing inline I am not too sure.

Yong

On 2019-11-12 4:30 p.m., Felix Kuehling wrote:

On 2019-11-12 4:26 p.m., Yong Zhao wrote:

Adapt the change from 1cd106ecfc1f04

The change is:

 drm/amdkfd: Stop using GFP_NOIO explicitly

 This is no longer needed with the memalloc_nofs_save/restore in
 dqm_lock/unlock

Change-Id: I42450b2c149d2b1842be99a8f355c829a0079e7c
Signed-off-by: Yong Zhao 


The series is

Reviewed-by: Felix Kuehling 



---
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c

index 46ddb33b624a..579c5ffcfa79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -393,7 +393,7 @@ struct mqd_manager *mqd_manager_init_v10(enum 
KFD_MQD_TYPE type,

  if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
  return NULL;
  -    mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+    mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
  if (!mqd)
  return NULL;
  diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c

index be27ff01cdb8..22a819c888d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct 
kfd_dev *kfd,

   * instead of sub-allocation function.
   */
  if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-    mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+    mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
[yz] This should be kept probably. With the latest code, allocate_mqd() 
is called outside of the dqm. So now the situation is different from the 
original one.

  if (!mqd_mem_obj)
  return NULL;
  retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Use QUEUE_IS_ACTIVE macro in mqd v10

2019-11-12 Thread Yong Zhao

This is done for other GFX in commit bb2d2128a54c4. Port it to GFX10.

Change-Id: I9e04872be3af0e90f5f6930226896b1ea545f3d9
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 954dc8ac4ff1..46ddb33b624a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -213,10 +213,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
update_cu_mask(mm, mqd, q);
set_priority(m, q);
 
-   q->is_active = (q->queue_size > 0 &&
-   q->queue_address != 0 &&
-   q->queue_percent > 0 &&
-   !q->is_evicted);
+   q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
 static int destroy_mqd(struct mqd_manager *mm, void *mqd,
@@ -348,11 +345,7 @@ static void update_mqd_sdma(struct mqd_manager *mm, void 
*mqd,
m->sdma_queue_id = q->sdma_queue_id;
m->sdmax_rlcx_dummy_reg = SDMA_RLC_DUMMY_DEFAULT;
 
-
-   q->is_active = (q->queue_size > 0 &&
-   q->queue_address != 0 &&
-   q->queue_percent > 0 &&
-   !q->is_evicted);
+   q->is_active = QUEUE_IS_ACTIVE(*q);
 }
 
 /*
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: Stop using GFP_NOIO explicitly for GFX10

2019-11-12 Thread Yong Zhao

Adapt the change from 1cd106ecfc1f04

The change is:

drm/amdkfd: Stop using GFP_NOIO explicitly

This is no longer needed with the memalloc_nofs_save/restore in
dqm_lock/unlock

Change-Id: I42450b2c149d2b1842be99a8f355c829a0079e7c
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 46ddb33b624a..579c5ffcfa79 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -393,7 +393,7 @@ struct mqd_manager *mqd_manager_init_v10(enum KFD_MQD_TYPE 
type,
if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
return NULL;
 
-   mqd = kzalloc(sizeof(*mqd), GFP_NOIO);
+   mqd = kzalloc(sizeof(*mqd), GFP_KERNEL);
if (!mqd)
return NULL;
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index be27ff01cdb8..22a819c888d8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -92,7 +92,7 @@ static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
 * instead of sub-allocation function.
 */
if (kfd->cwsr_enabled && (q->type == KFD_QUEUE_TYPE_COMPUTE)) {
-   mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_NOIO);
+   mqd_mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
if (!mqd_mem_obj)
return NULL;
retval = amdgpu_amdkfd_alloc_gtt_mem(kfd->kgd,
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 3/3] drm/amdkfd: Fix a bug when calculating save_area_used_size

2019-11-11 Thread Yong Zhao

workgroup context data writes from m->cp_hqd_cntl_stack_size, so we
should deduct it when calculating the used size.

Change-Id: I5252e25662c3b8221f451c39115bf084d1911eae
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index d3380c5bdbde..3a2ee1f01aae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -302,7 +302,8 @@ static int get_wave_state(struct mqd_manager *mm, void *mqd,
 
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
-   *save_area_used_size = m->cp_hqd_wg_state_offset;
+   *save_area_used_size = m->cp_hqd_wg_state_offset -
+   m->cp_hqd_cntl_stack_size;;
 
if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
return -EFAULT;
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/3] drm/amdkfd: Implement queue priority controls for gfx10

2019-11-11 Thread Yong Zhao

Ported from gfx9.

Change-Id: I388dc7c609ed724a6d600840f8e7317d9c2c877d
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 4a236b2c2354..4884cd6c65ce 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -66,6 +66,12 @@ static void update_cu_mask(struct mqd_manager *mm, void *mqd,
m->compute_static_thread_mgmt_se3);
 }
 
+static void set_priority(struct v10_compute_mqd *m, struct queue_properties *q)
+{
+   m->cp_hqd_pipe_priority = pipe_priority_map[q->priority];
+   m->cp_hqd_queue_priority = q->priority;
+}
+
 static struct kfd_mem_obj *allocate_mqd(struct kfd_dev *kfd,
struct queue_properties *q)
 {
@@ -109,9 +115,6 @@ static void init_mqd(struct mqd_manager *mm, void **mqd,
1 << CP_HQD_QUANTUM__QUANTUM_SCALE__SHIFT |
10 << CP_HQD_QUANTUM__QUANTUM_DURATION__SHIFT;
 
-   m->cp_hqd_pipe_priority = 1;
-   m->cp_hqd_queue_priority = 15;
-
if (q->format == KFD_QUEUE_FORMAT_AQL) {
m->cp_hqd_aql_control =
1 << CP_HQD_AQL_CONTROL__CONTROL0__SHIFT;
@@ -208,6 +211,7 @@ static void update_mqd(struct mqd_manager *mm, void *mqd,
m->cp_hqd_ctx_save_control = 0;
 
update_cu_mask(mm, mqd, q);
+   set_priority(m, q);
 
q->is_active = (q->queue_size > 0 &&
q->queue_address != 0 &&
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/3] drm/amdkfd: Update get_wave_state() for GFX10

2019-11-11 Thread Yong Zhao

Given control stack is now in the userspace context save restore area
on GFX10, the same as GFX8, it is not needed to copy it back to userspace.

Change-Id: I063ddc3026eefa57713ec47b466a90f9bf9d49b8
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
index 4884cd6c65ce..954dc8ac4ff1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
@@ -251,18 +251,22 @@ static int get_wave_state(struct mqd_manager *mm, void 
*mqd,
 {
struct v10_compute_mqd *m;
 
-   /* Control stack is located one page after MQD. */
-   void *mqd_ctl_stack = (void *)((uintptr_t)mqd + PAGE_SIZE);
-
m = get_mqd(mqd);
 
+   /* Control stack is written backwards, while workgroup context data
+* is written forwards. Both starts from m->cp_hqd_cntl_stack_size.
+* Current position is at m->cp_hqd_cntl_stack_offset and
+* m->cp_hqd_wg_state_offset, respectively.
+*/
*ctl_stack_used_size = m->cp_hqd_cntl_stack_size -
m->cp_hqd_cntl_stack_offset;
*save_area_used_size = m->cp_hqd_wg_state_offset -
m->cp_hqd_cntl_stack_size;
 
-   if (copy_to_user(ctl_stack, mqd_ctl_stack, m->cp_hqd_cntl_stack_size))
-   return -EFAULT;
+   /* Control stack is not copied to user mode for GFXv10 because
+* it's part of the context save area that is already
+* accessible to user mode
+*/
 
return 0;
 }
-- 
2.17.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 2/2] drm/amdkfd: Avoid using doorbell_off as offset in process doorbell pages

2019-11-11 Thread Yong Zhao

The NULL pointer is not an issue, because for DIQ, the if (q) condition, 
which guards the section but is now shown, will never be satisfied. 
Anyway, I still added the NULL pointer check.


With that, I have pushed the change.


Yong

On 2019-11-11 3:51 p.m., Felix Kuehling wrote:

On 2019-11-11 15:43, Felix Kuehling wrote:

On 2019-11-01 16:10, Zhao, Yong wrote:

dorbell_off in the queue properties is mainly used for the doorbell dw
offset in pci bar. We should not set it to the doorbell byte offset in
process doorbell pages. This makes the code much easier to read.


I kind of agree. I think what's confusing is that the 
queue_properties structure is used for two different purposes.


 1. For storing queue properties provided by user mode through KFD ioctls
 2. A subset of struct queue passed to mqd_manager and elsewhere
(that's why some driver state is creeping into it)

Maybe a follow-up could cleanly separate the queue properties from 
the queue driver state. That would probably change some internal 
interfaces to use struct queue instead of queue_properties.


Anyway, this patch is

Reviewed-by: Felix Kuehling 

I pointed out a missing NULL pointer check inline near the end of the 
patch. I should have mentioned it here. Please fix that before you submit.


Thanks,
  Felix



Change-Id: I553045ff9fcb3676900c92d10426f2ceb3660005
Signed-off-by: Yong Zhao
---
  drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 12 ++--
  drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c  |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h|  3 ++-
  .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c   |  8 ++--
  4 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index d9e36dbf13d5..b91993753b82 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -258,6 +258,7 @@ static int kfd_ioctl_create_queue(struct file *filep, 
struct kfd_process *p,
unsigned int queue_id;
struct kfd_process_device *pdd;
struct queue_properties q_properties;
+   uint32_t doorbell_offset_in_process = 0;
  
  	memset(_properties, 0, sizeof(struct queue_properties));
  
@@ -286,7 +287,8 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,

p->pasid,
dev->id);
  
-	err = pqm_create_queue(>pqm, dev, filep, _properties, _id);

+   err = pqm_create_queue(>pqm, dev, filep, _properties, _id,
+   _offset_in_process);
if (err != 0)
goto err_create_queue;
  
@@ -298,12 +300,10 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p,

args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id);
args->doorbell_offset <<= PAGE_SHIFT;
if (KFD_IS_SOC15(dev->device_info->asic_family))
-   /* On SOC15 ASICs, doorbell allocation must be
-* per-device, and independent from the per-process
-* queue_id. Return the doorbell offset within the
-* doorbell aperture to user mode.
+   /* On SOC15 ASICs, include the doorbell offset within the
+* process doorbell frame, which could be 1 page or 2 pages.
 */
-   args->doorbell_offset |= q_properties.doorbell_off;
+   args->doorbell_offset |= doorbell_offset_in_process;
  
  	mutex_unlock(>mutex);
  
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c

index d59f2cd056c6..1d33c4f25263 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -185,7 +185,7 @@ static int dbgdev_register_diq(struct kfd_dbgdev *dbgdev)
properties.type = KFD_QUEUE_TYPE_DIQ;
  
  	status = pqm_create_queue(dbgdev->pqm, dbgdev->dev, NULL,

-   , );
+   , , NULL);
  
  	if (status) {

pr_err("Failed to create DIQ\n");
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 7c561c98f2e2..66bae8f2dad1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -907,7 +907,8 @@ int pqm_create_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev,
struct file *f,
struct queue_properties *properties,
-   unsigned int *qid);
+   unsigned int *qid,
+   uint32_t *p_doorbell_offset_in_process);
  int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid);
  int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid,
struct queue_properties *p);
diff --gi

Re: [PATCH 1/2] drm/amdkfd: Use better name to indicate the offset is in dwords

2019-11-11 Thread Yong Zhao


ping

On 2019-11-01 4:10 p.m., Zhao, Yong wrote:

Change-Id: I75da23bba90231762cf58da3170f5bb77ece45ed
Signed-off-by: Yong Zhao 
---
  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  |  2 +-
  drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c  | 14 +++---
  drivers/gpu/drm/amd/amdkfd/kfd_priv.h  |  8 
  3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 984c2f2b24b6..4503fb26fe5b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -170,7 +170,7 @@ static int allocate_doorbell(struct qcm_process_device 
*qpd, struct queue *q)
}
  
  	q->properties.doorbell_off =

-   kfd_doorbell_id_to_offset(dev, q->process,
+   kfd_get_doorbell_dw_offset_from_bar(dev, q->process,
  q->doorbell_id);
  
  	return 0;

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
index ebe79bf00145..f904355c44a1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c
@@ -91,7 +91,7 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
  
-	kfd->doorbell_id_offset = doorbell_start_offset / sizeof(u32);

+   kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32);
  
  	kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base,

   kfd_doorbell_process_slice(kfd));
@@ -103,8 +103,8 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
pr_debug("doorbell base   == 0x%08lX\n",
(uintptr_t)kfd->doorbell_base);
  
-	pr_debug("doorbell_id_offset  == 0x%08lX\n",

-   kfd->doorbell_id_offset);
+   pr_debug("doorbell_base_dw_offset  == 0x%08lX\n",
+   kfd->doorbell_base_dw_offset);
  
  	pr_debug("doorbell_process_limit  == 0x%08lX\n",

doorbell_process_limit);
@@ -185,7 +185,7 @@ void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd,
 * Calculating the kernel doorbell offset using the first
 * doorbell page.
 */
-   *doorbell_off = kfd->doorbell_id_offset + inx;
+   *doorbell_off = kfd->doorbell_base_dw_offset + inx;
  
  	pr_debug("Get kernel queue doorbell\n"

" doorbell offset   == 0x%08X\n"
@@ -225,17 +225,17 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
  }
  
-unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd,

+unsigned int kfd_get_doorbell_dw_offset_from_bar(struct kfd_dev *kfd,
struct kfd_process *process,
unsigned int doorbell_id)
  {
/*
-* doorbell_id_offset accounts for doorbells taken by KGD.
+* doorbell_base_dw_offset accounts for doorbells taken by KGD.
 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to
 * the process's doorbells. The offset returned is in dword
 * units regardless of the ASIC-dependent doorbell size.
 */
-   return kfd->doorbell_id_offset +
+   return kfd->doorbell_base_dw_offset +
process->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 62db4d20ed32..7c561c98f2e2 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -238,9 +238,9 @@ struct kfd_dev {
 * KFD. It is aligned for mapping
 * into user mode
 */
-   size_t doorbell_id_offset;  /* Doorbell offset (from KFD doorbell
-* to HW doorbell, GFX reserved some
-* at the start)
+   size_t doorbell_base_dw_offset; /* Doorbell dword offset (from KFD
+* doorbell to PCI doorbell bar,
+* GFX reserved some at the start)
 */
u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells
   * page used by kernel queue
@@ -821,7 +821,7 @@ void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 
__iomem *db_addr);
  u32 read_kernel_doorbell(u32 __iomem *db);
  void write_

Re: [PATCH] drm/amdkfd: Rename create_cp_queue() to init_user_queue()

2019-11-11 Thread Yong Zhao


ping

On 2019-11-01 4:12 p.m., Zhao, Yong wrote:

create_cp_queue() could also work with SDMA queues, so we should rename
it.

Change-Id: I76cbaed8fa95dd9062d786cbc1dd037ff041da9d
Signed-off-by: Yong Zhao 
---
  drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 48185d2957e9..ebb2f69b438c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -162,7 +162,7 @@ void pqm_uninit(struct process_queue_manager *pqm)
pqm->queue_slot_bitmap = NULL;
  }
  
-static int create_cp_queue(struct process_queue_manager *pqm,

+static int init_user_queue(struct process_queue_manager *pqm,
struct kfd_dev *dev, struct queue **q,
struct queue_properties *q_properties,
struct file *f, unsigned int qid)
@@ -251,7 +251,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
  
-		retval = create_cp_queue(pqm, dev, , properties, f, *qid);

+   retval = init_user_queue(pqm, dev, , properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;
@@ -272,7 +272,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
}
  
-		retval = create_cp_queue(pqm, dev, , properties, f, *qid);

+   retval = init_user_queue(pqm, dev, , properties, f, *qid);
if (retval != 0)
goto err_create_queue;
pqn->q = q;

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Update MMHUB power gating register settings

2018-01-11 Thread Yong Zhao


Okay, pushed. Thanks.


On 2018-01-11 01:20 PM, Eric Huang wrote:

The fix makes sense to me.

Acked-by: Eric Huang <jinhuieric.hu...@amd.com>


On 2018-01-11 01:00 PM, Felix Kuehling wrote:

[+Eric]

Acked-by: Felix Kuehling <felix.kuehl...@amd.com>

I'm not familiar with the details of what this does. I'm hoping Eric can
also review this with more power-management experience.

Regards,
   Felix


On 2018-01-10 03:10 PM, Yong Zhao wrote:

The new register settings are needed to fix a tlb invalidation issue
when MMHUB power gating is turned on for Raven.

Change-Id: I846befbb2fcbddf40ca4ecbdc06da1cd442e3554
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 61 
++---

  1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c

index ffd5b7e..bdf94c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = {
  {0x11, 0x6a684},
  {0x19, 0xea68e},
  {0x29, 0xa69e},
-    {0x2b, 0x34a6c0},
-    {0x61, 0x83a707},
-    {0xe6, 0x8a7a4},
-    {0xf0, 0x1a7b8},
-    {0xf3, 0xfa7cc},
-    {0x104, 0x17a7dd},
-    {0x11d, 0xa7dc},
-    {0x11f, 0x12a7f5},
-    {0x133, 0xa808},
-    {0x135, 0x12a810},
-    {0x149, 0x7a82c}
+    {0x2b, 0x0010a6c0},
+    {0x3d, 0x83a707},
+    {0xc2, 0x8a7a4},
+    {0xcc, 0x1a7b8},
+    {0xcf, 0xfa7cc},
+    {0xe0, 0x17a7dd},
+    {0xf9, 0xa7dc},
+    {0xfb, 0x12a7f5},
+    {0x10f, 0xa808},
+    {0x111, 0x12a810},
+    {0x125, 0x7a82c}
  };
  #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
  -#define PCTL0_RENG_EXEC_END_PTR 0x151
+#define PCTL0_RENG_EXEC_END_PTR 0x12d
  #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE  0xa640
  #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
  @@ -385,10 +385,9 @@ void 
mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev)

  if (amdgpu_sriov_vf(adev))
  return;
  +    /** pctl0 **/
  pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
  pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, 
mmPCTL0_RENG_EXECUTE);

-    pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
-    pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
    /* Light sleep must be disabled before writing to pctl0 
registers */

  pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct 
amdgpu_device *adev)

  pctl0_data[i].data);
  }
  -    /* Set the reng execute end ptr for pctl0 */
-    pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
-    PCTL0_RENG_EXECUTE,
-    RENG_EXECUTE_END_PTR,
-    PCTL0_RENG_EXEC_END_PTR);
-    WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+    /* Re-enable light sleep */
+    pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+    WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+
+    /** pctl1 **/
+    pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
+    pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
    /* Light sleep must be disabled before writing to pctl1 
registers */

  pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct 
amdgpu_device *adev)

  pctl1_data[i].data);
  }
  +    /* Re-enable light sleep */
+    pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+    WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+
+    mmhub_v1_0_power_gating_write_save_ranges(adev);
+
+    /* Set the reng execute end ptr for pctl0 */
+    pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+    PCTL0_RENG_EXECUTE,
+    RENG_EXECUTE_END_PTR,
+    PCTL0_RENG_EXEC_END_PTR);
+    WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
  /* Set the reng execute end ptr for pctl1 */
  pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
  PCTL1_RENG_EXECUTE,
  RENG_EXECUTE_END_PTR,
  PCTL1_RENG_EXEC_END_PTR);
  WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-
-    mmhub_v1_0_power_gating_write_save_ranges(adev);
-
-    /* Re-enable light sleep */
-    pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
-    WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
-    pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
-    WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
  }
    void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Update MMHUB power gating register settings

2018-01-10 Thread Yong Zhao

The new register settings are needed to fix a tlb invalidation issue
when MMHUB power gating is turned on for Raven.

Change-Id: I846befbb2fcbddf40ca4ecbdc06da1cd442e3554
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 61 ++---
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c 
b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index ffd5b7e..bdf94c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -272,21 +272,21 @@ static const struct pctl_data pctl0_data[] = {
{0x11, 0x6a684},
{0x19, 0xea68e},
{0x29, 0xa69e},
-   {0x2b, 0x34a6c0},
-   {0x61, 0x83a707},
-   {0xe6, 0x8a7a4},
-   {0xf0, 0x1a7b8},
-   {0xf3, 0xfa7cc},
-   {0x104, 0x17a7dd},
-   {0x11d, 0xa7dc},
-   {0x11f, 0x12a7f5},
-   {0x133, 0xa808},
-   {0x135, 0x12a810},
-   {0x149, 0x7a82c}
+   {0x2b, 0x0010a6c0},
+   {0x3d, 0x83a707},
+   {0xc2, 0x8a7a4},
+   {0xcc, 0x1a7b8},
+   {0xcf, 0xfa7cc},
+   {0xe0, 0x17a7dd},
+   {0xf9, 0xa7dc},
+   {0xfb, 0x12a7f5},
+   {0x10f, 0xa808},
+   {0x111, 0x12a810},
+   {0x125, 0x7a82c}
 };
 #define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data))
 
-#define PCTL0_RENG_EXEC_END_PTR 0x151
+#define PCTL0_RENG_EXEC_END_PTR 0x12d
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE  0xa640
 #define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833
 
@@ -385,10 +385,9 @@ void mmhub_v1_0_initialize_power_gating(struct 
amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
return;
 
+   /** pctl0 **/
pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC);
pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE);
-   pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
-   pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
 
/* Light sleep must be disabled before writing to pctl0 registers */
pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -402,12 +401,13 @@ void mmhub_v1_0_initialize_power_gating(struct 
amdgpu_device *adev)
pctl0_data[i].data);
 }
 
-   /* Set the reng execute end ptr for pctl0 */
-   pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
-   PCTL0_RENG_EXECUTE,
-   RENG_EXECUTE_END_PTR,
-   PCTL0_RENG_EXEC_END_PTR);
-   WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+   /* Re-enable light sleep */
+   pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
+   WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
+
+   /** pctl1 **/
+   pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC);
+   pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE);
 
/* Light sleep must be disabled before writing to pctl1 registers */
pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
@@ -421,20 +421,25 @@ void mmhub_v1_0_initialize_power_gating(struct 
amdgpu_device *adev)
pctl1_data[i].data);
 }
 
+   /* Re-enable light sleep */
+   pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
+   WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
+
+   mmhub_v1_0_power_gating_write_save_ranges(adev);
+
+   /* Set the reng execute end ptr for pctl0 */
+   pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute,
+   PCTL0_RENG_EXECUTE,
+   RENG_EXECUTE_END_PTR,
+   PCTL0_RENG_EXEC_END_PTR);
+   WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute);
+
/* Set the reng execute end ptr for pctl1 */
pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute,
PCTL1_RENG_EXECUTE,
RENG_EXECUTE_END_PTR,
PCTL1_RENG_EXEC_END_PTR);
WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute);
-
-   mmhub_v1_0_power_gating_write_save_ranges(adev);
-
-   /* Re-enable light sleep */
-   pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK;
-   WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc);
-   pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK;
-   WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc);
 }
 
 void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdkfd: avoid calling execute_queues_cpsch() when destroying an unactive queue

2018-01-02 Thread Yong Zhao

Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Oak Zeng <oak.z...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index acfb121..b21285a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1013,13 +1013,13 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
list_del(>list);
qpd->queue_count--;
-   if (q->properties.is_active)
+   if (q->properties.is_active) {
dqm->queue_count--;
-
-   retval = execute_queues_cpsch(dqm,
+   retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
-   if (retval == -ETIME)
-   qpd->reset_wavefronts = true;
+   if (retval == -ETIME)
+   qpd->reset_wavefronts = true;
+   }
 
mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdkfd: Fix return value 0 when execute_queues_cpsch actually fails

2018-01-02 Thread Yong Zhao

Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Oak Zeng <oak.z...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index d0693fd..acfb121 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -1033,7 +1033,7 @@ static int destroy_queue_cpsch(struct 
device_queue_manager *dqm,
 
mutex_unlock(>lock);
 
-   return 0;
+   return retval;
 
 failed:
 failed_try_destroy_debugged_queue:
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven

2017-12-14 Thread Yong Zhao

Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 709587d..93500e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, 
uint32_t vm_size,
uint64_t tmp;
 
/* adjust vm size first */
-   if (amdgpu_vm_size != -1) {
+   if (amdgpu_vm_size != -1 && adev->asic_type != CHIP_RAVEN) {
unsigned max_size = 1 << (max_bits - 30);
 
vm_size = amdgpu_vm_size;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven

2017-12-14 Thread Yong Zhao


Hi Christian,

I don't know much about the background. But according to my experiments, 
as long as we change the vm size to 64G, ATC memory access on Raven will 
fall apart. How should deal with that or can you come up with a fix?


Regards,

Yong


On 2017-12-14 03:47 AM, Christian König wrote:
NAK, that really circumvents the intention of the patch to adjust the 
number of levels based on the vm_size.


Christian.

Am 14.12.2017 um 03:25 schrieb Yong Zhao:

Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c

index 709587d..3b9eb1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device 
*adev, uint32_t vm_size,

  uint64_t tmp;
    /* adjust vm size first */
-    if (amdgpu_vm_size != -1) {
+    if (amdgpu_vm_size != -1 && max_level == 1) {
  unsigned max_size = 1 << (max_bits - 30);
    vm_size = amdgpu_vm_size;




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Fix a bug that vm size is wrong on Raven

2017-12-13 Thread Yong Zhao

Change-Id: Id522c1cbadb8c069720f4e64a31cff42cd014733
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 709587d..3b9eb1a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2534,7 +2534,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, 
uint32_t vm_size,
uint64_t tmp;
 
/* adjust vm size first */
-   if (amdgpu_vm_size != -1) {
+   if (amdgpu_vm_size != -1 && max_level == 1) {
unsigned max_size = 1 << (max_bits - 30);
 
vm_size = amdgpu_vm_size;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer

2017-11-24 Thread Yong Zhao

Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 13 +++--
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  6 ++
 3 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8447810..81ec7bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -149,8 +149,7 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocated_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
 
@@ -170,7 +169,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
if (retval)
goto out_unlock;
}
-   *allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
 
q->properties.tba_addr = qpd->tba_addr;
@@ -184,10 +182,8 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
retval = -EINVAL;
 
if (retval) {
-   if (list_empty(>queues_list)) {
+   if (list_empty(>queues_list))
deallocate_vmid(dqm, qpd, q);
-   *allocated_vmid = 0;
-   }
goto out_unlock;
}
 
@@ -812,16 +808,13 @@ static void destroy_kernel_queue_cpsch(struct 
device_queue_manager *dqm,
 }
 
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue 
*q,
-   struct qcm_process_device *qpd, int *allocate_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
struct mqd_manager *mqd;
 
retval = 0;
 
-   if (allocate_vmid)
-   *allocate_vmid = 0;
-
mutex_lock(>lock);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index 8752edf..c61b693 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -84,8 +84,7 @@ struct device_process_node {
 struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocate_vmid);
+   struct qcm_process_device *qpd);
 
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index eeb7726..fbfa274 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -201,8 +201,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
-   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd,
-   >properties.vmid);
+   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
@@ -222,8 +221,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
-   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd,
-   >properties.vmid);
+   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer

2017-11-24 Thread Yong Zhao

Change-Id: Ia5c74ad567c30e206ed804b204fdf8a0f8a75a19
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 14 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  3 +--
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 53a66e8..1df1123 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -144,8 +144,7 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocated_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
 
@@ -165,7 +164,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
if (retval)
goto out_unlock;
}
-   *allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
 
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
@@ -176,10 +174,9 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
retval = -EINVAL;
 
if (retval) {
-   if (list_empty(>queues_list)) {
+   if (list_empty(>queues_list))
deallocate_vmid(dqm, qpd, q);
-   *allocated_vmid = 0;
-   }
+
goto out_unlock;
}
 
@@ -788,16 +785,13 @@ static void select_sdma_engine_id(struct queue *q)
 }
 
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue 
*q,
-   struct qcm_process_device *qpd, int *allocate_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
struct mqd_manager *mqd;
 
retval = 0;
 
-   if (allocate_vmid)
-   *allocate_vmid = 0;
-
mutex_lock(>lock);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index faf820a..449407a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -84,8 +84,7 @@ struct device_process_node {
 struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocate_vmid);
+   struct qcm_process_device *qpd);
 
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 03bec76..1e7bcae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -199,8 +199,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
-   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd,
-   >properties.vmid);
+   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdkfd: Delete a useless parameter from create_queue function pointer

2017-11-24 Thread Yong Zhao

Change-Id: Ia5c74ad567c30e206ed804b204fdf8a0f8a75a19
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 14 --
 drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |  3 +--
 drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c |  3 +--
 3 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 53a66e8..1df1123 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -144,8 +144,7 @@ static void deallocate_vmid(struct device_queue_manager 
*dqm,
 
 static int create_queue_nocpsch(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocated_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
 
@@ -165,7 +164,6 @@ static int create_queue_nocpsch(struct device_queue_manager 
*dqm,
if (retval)
goto out_unlock;
}
-   *allocated_vmid = qpd->vmid;
q->properties.vmid = qpd->vmid;
 
if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
@@ -176,10 +174,9 @@ static int create_queue_nocpsch(struct 
device_queue_manager *dqm,
retval = -EINVAL;
 
if (retval) {
-   if (list_empty(>queues_list)) {
+   if (list_empty(>queues_list))
deallocate_vmid(dqm, qpd, q);
-   *allocated_vmid = 0;
-   }
+
goto out_unlock;
}
 
@@ -788,16 +785,13 @@ static void select_sdma_engine_id(struct queue *q)
 }
 
 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue 
*q,
-   struct qcm_process_device *qpd, int *allocate_vmid)
+   struct qcm_process_device *qpd)
 {
int retval;
struct mqd_manager *mqd;
 
retval = 0;
 
-   if (allocate_vmid)
-   *allocate_vmid = 0;
-
mutex_lock(>lock);
 
if (dqm->total_queue_count >= max_num_of_queues_per_device) {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h 
b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index faf820a..449407a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -84,8 +84,7 @@ struct device_process_node {
 struct device_queue_manager_ops {
int (*create_queue)(struct device_queue_manager *dqm,
struct queue *q,
-   struct qcm_process_device *qpd,
-   int *allocate_vmid);
+   struct qcm_process_device *qpd);
 
int (*destroy_queue)(struct device_queue_manager *dqm,
struct qcm_process_device *qpd,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c 
b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index 03bec76..1e7bcae 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -199,8 +199,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
goto err_create_queue;
pqn->q = q;
pqn->kq = NULL;
-   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd,
-   >properties.vmid);
+   retval = dev->dqm->ops.create_queue(dev->dqm, q, >qpd);
pr_debug("DQM returned %d for create_queue\n", retval);
print_queue(q);
break;
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory on Raven

2017-10-06 Thread Yong Zhao

From: Yong Zhao <yong.z...@amd.com>

Without the additional bits set in PDEs/PTEs, the ATC memory access
would have failed on Raven.

Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4
Signed-off-by: Yong Zhao <yong.z...@amd.com>
Acked-by: Alex Deucher <alexander.deuc...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  9 ++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 10 ++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index bca9eeb..d98d58a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
AMDGPU_GEM_CREATE_SHADOW);
 
if (vm->pte_support_ats) {
-   init_value = AMDGPU_PTE_SYSTEM;
+   init_value = AMDGPU_PTE_DEFAULT_ATC;
if (level != adev->vm_manager.num_level - 1)
init_value |= AMDGPU_PDE_PTE;
+
}
 
/* walk over the address space and allocate the page tables */
@@ -2017,7 +2018,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
list_del(>list);
 
if (vm->pte_support_ats)
-   init_pte_value = AMDGPU_PTE_SYSTEM;
+   init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
mapping->start, mapping->last,
@@ -2629,7 +2630,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 
if (adev->asic_type == CHIP_RAVEN) {
vm->pte_support_ats = true;
-   init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
+   init_pde_value = AMDGPU_PTE_DEFAULT_ATC
+   | AMDGPU_PDE_PTE;
+
}
} else
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 66efbc2..5d0cfc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_PTE_MTYPE(a)((uint64_t)a << 57)
 #define AMDGPU_PTE_MTYPE_MASK  AMDGPU_PTE_MTYPE(3ULL)
 
+/* For Raven */
+#define AMDGPU_MTYPE_CC 2
+
+#define AMDGPU_PTE_DEFAULT_ATC  (AMDGPU_PTE_SYSTEM  \
+| AMDGPU_PTE_SNOOPED\
+| AMDGPU_PTE_EXECUTABLE \
+| AMDGPU_PTE_READABLE   \
+| AMDGPU_PTE_WRITEABLE  \
+| AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC))
+
 /* How to programm VM fault handling */
 #define AMDGPU_VM_FAULT_STOP_NEVER 0
 #define AMDGPU_VM_FAULT_STOP_FIRST 1
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory

2017-10-05 Thread Yong Zhao


Hi Christian,

Do you have a minute to review this change?

Yong


On 2017-09-27 02:46 PM, Alex Deucher wrote:

On Tue, Sep 26, 2017 at 7:30 PM, Yong Zhao <yong.z...@amd.com> wrote:

From: Yong Zhao <yong.z...@amd.com>

Without the additional bits set in PDEs/PTEs, the ATC memory access
would have failed.

Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4
Signed-off-by: Yong Zhao <yong.z...@amd.com>

Glad we finally got this sorted.
Acked-by: Alex Deucher <alexander.deuc...@amd.com>


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++---
  1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8fcc743..c848b7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,6 +33,8 @@
  #include "amdgpu.h"
  #include "amdgpu_trace.h"

+#include "vega10/vega10_enum.h"
+
  /*
   * PASID manager
   *
@@ -108,6 +110,13 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, 
uint64_t, __subtree_last,
  #undef START
  #undef LAST

+#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM  \
+   | AMDGPU_PTE_SNOOPED\
+   | AMDGPU_PTE_EXECUTABLE \
+   | AMDGPU_PTE_READABLE   \
+   | AMDGPU_PTE_WRITEABLE  \
+   | AMDGPU_PTE_MTYPE(MTYPE_CC))
+
  /* Local structure. Encapsulate some VM table update parameters to reduce
   * the number of function parameters
   */
@@ -328,9 +337,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
 AMDGPU_GEM_CREATE_SHADOW);

 if (vm->pte_support_ats) {
-   init_value = AMDGPU_PTE_SYSTEM;
+   init_value = AMDGPU_PTE_DEFAULT_ATC;
 if (level != adev->vm_manager.num_level - 1)
 init_value |= AMDGPU_PDE_PTE;
+
 }

 /* walk over the address space and allocate the page tables */
@@ -2017,7 +2027,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 list_del(>list);

 if (vm->pte_support_ats)
-   init_pte_value = AMDGPU_PTE_SYSTEM;
+   init_pte_value = AMDGPU_PTE_DEFAULT_ATC;

 r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 mapping->start, mapping->last,
@@ -2627,7 +2637,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,

 if (adev->asic_type == CHIP_RAVEN) {
 vm->pte_support_ats = true;
-   init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
+   init_pde_value = AMDGPU_PTE_DEFAULT_ATC
+   | AMDGPU_PDE_PTE;
+
 }
 } else
 vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
--
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH] drm/amdgpu: Set the correct value for PDEs/PTEs of ATC memory

2017-09-26 Thread Yong Zhao

From: Yong Zhao <yong.z...@amd.com>

Without the additional bits set in PDEs/PTEs, the ATC memory access
would have failed.

Change-Id: I28429ef6d39cdb01dc6f17fea4264ee22d7121d4
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 18 +++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8fcc743..c848b7c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -33,6 +33,8 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+#include "vega10/vega10_enum.h"
+
 /*
  * PASID manager
  *
@@ -108,6 +110,13 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, 
uint64_t, __subtree_last,
 #undef START
 #undef LAST
 
+#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM  \
+   | AMDGPU_PTE_SNOOPED\
+   | AMDGPU_PTE_EXECUTABLE \
+   | AMDGPU_PTE_READABLE   \
+   | AMDGPU_PTE_WRITEABLE  \
+   | AMDGPU_PTE_MTYPE(MTYPE_CC))
+
 /* Local structure. Encapsulate some VM table update parameters to reduce
  * the number of function parameters
  */
@@ -328,9 +337,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device 
*adev,
AMDGPU_GEM_CREATE_SHADOW);
 
if (vm->pte_support_ats) {
-   init_value = AMDGPU_PTE_SYSTEM;
+   init_value = AMDGPU_PTE_DEFAULT_ATC;
if (level != adev->vm_manager.num_level - 1)
init_value |= AMDGPU_PDE_PTE;
+
}
 
/* walk over the address space and allocate the page tables */
@@ -2017,7 +2027,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
list_del(>list);
 
if (vm->pte_support_ats)
-   init_pte_value = AMDGPU_PTE_SYSTEM;
+   init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
 
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
mapping->start, mapping->last,
@@ -2627,7 +2637,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct 
amdgpu_vm *vm,
 
if (adev->asic_type == CHIP_RAVEN) {
vm->pte_support_ats = true;
-   init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE;
+   init_pde_value = AMDGPU_PTE_DEFAULT_ATC
+   | AMDGPU_PDE_PTE;
+
}
} else
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 2/2] drm/amdgpu: Add copy_pte_num_dw member in amdgpu_vm_pte_funcs

2017-09-19 Thread Yong Zhao

Use it to replace the hard coded value in amdgpu_vm_bo_update_mapping().

Change-Id: I85d89d401b8dbcf01ca9c55c281e552db874fde5
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h| 4 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/si_dma.c| 2 ++
 7 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8708476..e7de600 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -296,10 +296,14 @@ struct amdgpu_buffer_funcs {
 
 /* provided by hw blocks that can write ptes, e.g., sdma */
 struct amdgpu_vm_pte_funcs {
+   /* number of dw to reserve per operation */
+   unsignedcopy_pte_num_dw;
+
/* copy pte entries from GART */
void (*copy_pte)(struct amdgpu_ib *ib,
 uint64_t pe, uint64_t src,
 unsigned count);
+
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
  uint64_t value, unsigned count,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 28d16781..8fcc743 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1597,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
 
if (pages_addr) {
/* copy commands needed */
-   ndw += ncmds * 7;
+   ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
 
/* and also PTEs */
ndw += nptes * 2;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c64dcd1..60cecd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1387,7 +1387,9 @@ static void cik_sdma_set_buffer_funcs(struct 
amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+   .copy_pte_num_dw = 7,
.copy_pte = cik_sdma_vm_copy_pte,
+
.write_pte = cik_sdma_vm_write_pte,
 
.set_max_nums_pte_pde = 0x1f >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c05eb74..acdee3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1324,7 +1324,9 @@ static void sdma_v2_4_set_buffer_funcs(struct 
amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
+   .copy_pte_num_dw = 7,
.copy_pte = sdma_v2_4_vm_copy_pte,
+
.write_pte = sdma_v2_4_vm_write_pte,
 
.set_max_nums_pte_pde = 0x1f >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 2079340..72f31cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1748,7 +1748,9 @@ static void sdma_v3_0_set_buffer_funcs(struct 
amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
+   .copy_pte_num_dw = 7,
.copy_pte = sdma_v3_0_vm_copy_pte,
+
.write_pte = sdma_v3_0_vm_write_pte,
 
/* not 0x3f due to HW limitation */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 798fc2d23..7bf25271 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1714,7 +1714,9 @@ static void sdma_v4_0_set_buffer_funcs(struct 
amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
+   .copy_pte_num_dw = 7,
.copy_pte = sdma_v4_0_vm_copy_pte,
+
.write_pte = sdma_v4_0_vm_write_pte,
 
.set_max_nums_pte_pde = 0x40 >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c 
b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index adb6ae7..3fa2fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -887,7 +887,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device 
*adev)
 }
 
 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
+   .copy_pte_num_dw = 5,
.copy_pte = si_dma_vm_copy_pte,
+
.write_pte = si_dma_vm_write_pte,
 
.set_max_nums_pte_pde = 0x8 >> 3,
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

[PATCH 1/2] drm/amdgpu: Fix a bug in amdgpu_fill_buffer()

2017-09-19 Thread Yong Zhao

When max_bytes is not 8 bytes aligned and bo size is larger than
max_bytes, the last 8 bytes in a ttm node may be left unchanged.
For example, on pre SDMA 4.0, max_bytes = 0x1f, and the bo size
is 0x20, the problem will happen.

In order to fix the problem, we separately store the max nums of
PTEs/PDEs a single operation can set in amdgpu_vm_pte_funcs
structure, rather than inferring it from bytes limit of SDMA
constant fill, i.e. fill_max_bytes.

Together with the fix, we replace the hard code value "10" in
amdgpu_vm_bo_update_mapping() with the corresponding values from
structure amdgpu_vm_pte_funcs.

Change-Id: I37c588a57cb63f1a8251fb5ead2eff4b39e047c9
Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Christian König <christian.koe...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 4 
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/si_dma.c | 3 +++
 8 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e45e6e9..8708476 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -304,6 +304,13 @@ struct amdgpu_vm_pte_funcs {
void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
  uint64_t value, unsigned count,
  uint32_t incr);
+
+   /* maximum nums of PTEs/PDEs in a single operation */
+   uint32_tset_max_nums_pte_pde;
+
+   /* number of dw to reserve per operation */
+   unsignedset_pte_pde_num_dw;
+
/* for linear pte/pde updates without addr mapping */
void (*set_pte_pde)(struct amdgpu_ib *ib,
uint64_t pe,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 0e5f78f..bd43268 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1527,8 +1527,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
   struct dma_fence **fence)
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-   /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/
-   uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+   uint32_t max_bytes = 8 *
+   adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
struct drm_mm_node *mm_node;
@@ -1560,8 +1560,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
++mm_node;
}
 
-   /* 10 double words for each SDMA_OP_PTEPDE cmd */
-   num_dw = num_loops * 10;
+   /* num of dwords for each SDMA_OP_PTEPDE cmd */
+   num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
/* for IB padding */
num_dw += 64;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6c11332..28d16781 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1606,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct 
amdgpu_device *adev,
 
} else {
/* set page commands needed */
-   ndw += ncmds * 10;
+   ndw += ncmds * 
adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
/* extra commands for begin/end fragments */
-   ndw += 2 * 10 * adev->vm_manager.fragment_size;
+   ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
+   * adev->vm_manager.fragment_size;
 
params.func = amdgpu_vm_do_set_ptes;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index f508f4d..c64dcd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1389,6 +1389,9 @@ static void cik_sdma_set_buffer_funcs(struct 
amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
.copy_pte = cik_sdma_vm_copy_pte,
.write_pte = cik_sdma_vm_write_pte,
+
+   .set_max_nums_pte_pde = 0x1f >> 3,
+   .set_pte_pde_num_dw = 10,
.set_pte_pde = cik_sdma_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f2d0710..c05eb74 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1326,6 +1326,9 @@ static void sdma_v2_4_set_buffer_funcs(struct 
amdgpu_device *adev)
 static const struct amdgpu_vm_pte_func

[PATCH 1/2] drm/amdgpu: Correct bytes limit for SDMA 3.0 copy and fill

2017-09-18 Thread Yong Zhao

Change-Id: I10fc5efbc303056c5c5c4dc4f4dd2c3186595a91
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 728c0d8..4e7fe07 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1730,11 +1730,11 @@ static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib 
*ib,
 }
 
 static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = {
-   .copy_max_bytes = 0x1f,
+   .copy_max_bytes = 0x3fffe0, /* not 0x3f due to HW limitation*/
.copy_num_dw = 7,
.emit_copy_buffer = sdma_v3_0_emit_copy_buffer,
 
-   .fill_max_bytes = 0x1f,
+   .fill_max_bytes = 0x3fffe0, /* not 0x3f due to HW limitation*/
.fill_num_dw = 5,
.emit_fill_buffer = sdma_v3_0_emit_fill_buffer,
 };
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 1/2] drm/amdgpu: Add a parameter to amdgpu_bo_create()

2017-07-28 Thread Yong Zhao


Got it.


On 2017-07-28 05:01 AM, Christian König wrote:

Am 27.07.2017 um 21:48 schrieb Yong Zhao:

The parameter init_value contains the value to which we initialized
VRAM bo when AMDGPU_GEM_CREATE_VRAM_CLEARED flag is set.

Change-Id: I9ef3b9dd3ca9b98cc25dd2eaff68fbe1129c3e3c
Signed-off-by: Yong Zhao <yong.z...@amd.com>


I'm about to push a cleanup which removes a bunch of references to 
amdgpu_bo_create(), so don't be surprised when you need to rebase your 
patch once more before pushing.


The patch is Reviewed-by: Christian König <christian.koe...@amd.com> 
and that rebase should only require you to remove changes, so feel 
free to push it after the rebase.


Christian.


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  3 ++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c  |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 12 +---
  drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  2 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c|  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_test.c  |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  4 ++--
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c|  4 ++--
  drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c |  4 ++--
  drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 10 +-
  drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  6 +++---
  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  4 ++--
  20 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

index 2292c77..6d2bd80 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -185,7 +185,8 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
  return -ENOMEM;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, 
AMDGPU_GEM_DOMAIN_GTT,
- AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 
&(*mem)->bo);

+ AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 0,
+ &(*mem)->bo);
  if (r) {
  dev_err(adev->dev,
  "failed to allocate BO for amdkfd (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c

index 2fb299a..63ec1e1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct 
amdgpu_device *adev, unsigned size,

n = AMDGPU_BENCHMARK_ITERATIONS;
  r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, 
NULL,

- NULL, );
+ NULL, 0, );
  if (r) {
  goto out_cleanup;
  }
@@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct 
amdgpu_device *adev, unsigned size,

  goto out_cleanup;
  }
  r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, 
NULL,

- NULL, );
+ NULL, 0, );
  if (r) {
  goto out_cleanup;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c

index 3d41cd4..343cdf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct 
cgs_device *cgs_device,

  ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE,
true, domain, flags,
NULL, , NULL,
-  );
+  0, );
  if (ret) {
  DRM_ERROR("(%d) bo create failed\n", ret);
  return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index fe6783e..cf81f9d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -344,7 +344,7 @@ static int amdgpu_vram_scratch_init(struct 
amdgpu_device *adev)

   PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
   AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
   AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
- NULL, NULL, >vram_scratch.robj);
+ NULL, NULL, 0, >vram_scratch.robj);
  if (r) {
  return r;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c

index 124b237..8cd79dc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/g

[PATCH 2/2] drm/amdgpu: Support IOMMU on Raven

2017-07-26 Thread Yong Zhao

We achieved that by setting the PTEs to 2 (the SYSTEM bit is set) when
the corresponding addresses are not occupied by gpu driver allocated
buffers.

Change-Id: I995c11c7a25bdaf7a16700d9e08a8fe287d49417
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c  |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c| 18 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h|  6 --
 drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c|  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_test.c  |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c   |  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c| 27 ++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h|  3 +++
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c |  6 +++---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c |  4 ++--
 21 files changed, 70 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 2292c77..fce2fa5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -185,7 +185,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
return -ENOMEM;
 
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT,
-AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 
&(*mem)->bo);
+AMDGPU_GEM_CREATE_CPU_GTT_USWC, NULL, NULL, 
&(*mem)->bo, 0);
if (r) {
dev_err(adev->dev,
"failed to allocate BO for amdkfd (%d)\n", r);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
index 2fb299a..56445ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@@ -81,7 +81,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, 
unsigned size,
 
n = AMDGPU_BENCHMARK_ITERATIONS;
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, sdomain, 0, NULL,
-NULL, );
+NULL, , 0);
if (r) {
goto out_cleanup;
}
@@ -94,7 +94,7 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, 
unsigned size,
goto out_cleanup;
}
r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, ddomain, 0, NULL,
-NULL, );
+NULL, , 0);
if (r) {
goto out_cleanup;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index 3d41cd4..ce19419 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -124,7 +124,7 @@ static int amdgpu_cgs_alloc_gpu_mem(struct cgs_device 
*cgs_device,
ret = amdgpu_bo_create_restricted(adev, size, PAGE_SIZE,
  true, domain, flags,
  NULL, , NULL,
- );
+ , 0);
if (ret) {
DRM_ERROR("(%d) bo create failed\n", ret);
return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index fe6783e..d8cd14f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -344,7 +344,7 @@ static int amdgpu_vram_scratch_init(struct amdgpu_device 
*adev)
 PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_VRAM,
 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
-NULL, NULL, >vram_scratch.robj);
+NULL, NULL, >vram_scratch.robj, 0);
if (r) {
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index 124b237..a6d7f55 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -141,7 +141,7 @@ int amdgpu

[PATCH 1/2] drm/amdgpu: Add support for filling a buffer with 64 bit value

2017-07-26 Thread Yong Zhao

That function will be used later to support setting a page table
block with 64 bit value.

Change-Id: Ib142ebd4163d6e23670a3f0ceed536d59133b942
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 19 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h |  2 +-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4d2a454..6ab30da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1509,11 +1509,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, 
uint64_t src_offset,
 }
 
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-  uint32_t src_data,
-  struct reservation_object *resv,
-  struct dma_fence **fence)
+   uint64_t src_data,
+   struct reservation_object *resv,
+   struct dma_fence **fence)
 {
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+   /* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/
uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
@@ -1545,7 +1546,9 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
num_pages -= mm_node->size;
++mm_node;
}
-   num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+
+   /* 10 double words for each SDMA_OP_PTEPDE cmd */
+   num_dw = num_loops * 10;
 
/* for IB padding */
num_dw += 64;
@@ -1570,12 +1573,16 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
uint32_t byte_count = mm_node->size << PAGE_SHIFT;
uint64_t dst_addr;
 
+   WARN_ONCE(byte_count & 0x7, "size should be a multiple of 8");
+
dst_addr = amdgpu_mm_node_addr(>tbo, mm_node, >tbo.mem);
while (byte_count) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
-   amdgpu_emit_fill_buffer(adev, >ibs[0], src_data,
-   dst_addr, cur_size_in_bytes);
+   amdgpu_vm_set_pte_pde(adev, >ibs[0],
+   dst_addr, 0,
+   cur_size_in_bytes >> 3, 0,
+   src_data);
 
dst_addr += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index f137c24..0e2399f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,7 +73,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t 
src_offset,
   struct dma_fence **fence, bool direct_submit,
   bool vm_needs_flush);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-   uint32_t src_data,
+   uint64_t src_data,
struct reservation_object *resv,
struct dma_fence **fence);
 
-- 
2.7.4

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Changes for enabling ATS support from PTE

2017-07-25 Thread Yong Zhao


Hi there,

Attached are two patches made to amdgpu in order to support ATS on 
Raven. Please review them.


Regards,

Yong

>From 0657ddb14a16d1b809c419b51e805287fb6a9989 Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.z...@amd.com>
Date: Thu, 20 Jul 2017 18:44:10 -0400
Subject: [PATCH 1/2] drm/amdgpu: Add support for filling a buffer with 64 bit
 value

That function will be used later to support setting a page table
block with 64 bit value.

Change-Id: Ib142ebd4163d6e23670a3f0ceed536d59133b942
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c| 38 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h|  2 +-
 3 files changed, 34 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 21e0814..4dfec57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -416,7 +416,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	bo->tbo.mem.placement & TTM_PL_FLAG_VRAM) {
 		struct dma_fence *fence;
 
-		r = amdgpu_fill_buffer(bo, 0, bo->tbo.resv, );
+		r = amdgpu_fill_buffer(bo, 0, 4, bo->tbo.resv, );
 		if (unlikely(r))
 			goto fail_unreserve;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7820e81..99db4aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1611,11 +1611,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 }
 
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-		   uint32_t src_data,
-		   struct reservation_object *resv,
-		   struct dma_fence **fence)
+			uint64_t src_data, unsigned int word_size,
+			struct reservation_object *resv,
+			struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	/* max_bytes applies to both SDMA_OP_CONST_FILL and SDMA_OP_PTEPDE */
 	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
@@ -1647,7 +1648,17 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		num_pages -= mm_node->size;
 		++mm_node;
 	}
-	num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+
+	switch (word_size) {
+	case 4:
+		num_dw = num_loops * adev->mman.buffer_funcs->fill_num_dw;
+		break;
+	case 8: /* 10 double words for each SDMA_OP_PTEPDE cmd */
+		num_dw = num_loops * 10;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/* for IB padding */
 	num_dw += 64;
@@ -1676,8 +1687,23 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		while (byte_count) {
 			uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
 
-			amdgpu_emit_fill_buffer(adev, >ibs[0], src_data,
-		dst_addr, cur_size_in_bytes);
+			switch (word_size) {
+			case 4: /* only take the lower 32 bits of src_data */
+amdgpu_emit_fill_buffer(adev, >ibs[0],
+		(uint32_t)src_data, dst_addr,
+		cur_size_in_bytes);
+break;
+			case 8:
+WARN_ONCE(cur_size_in_bytes & 0x7,
+	"size should be a multiple of 8");
+amdgpu_vm_set_pte_pde(adev, >ibs[0],
+		dst_addr, 0,
+		cur_size_in_bytes >> 3, 0,
+		src_data);
+break;
+			}
+
+
 
 			dst_addr += cur_size_in_bytes;
 			byte_count -= cur_size_in_bytes;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index a22e430..067e5e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -73,7 +73,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 		   struct dma_fence **fence, bool direct_submit,
 		   bool vm_needs_flush);
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
-			uint32_t src_data,
+			uint64_t src_data, unsigned int word_size,
 			struct reservation_object *resv,
 			struct dma_fence **fence);
 
-- 
2.7.4

>From e0bb154b8ae014989e88a40f19379eec9a8b Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.z...@amd.com>
Date: Thu, 20 Jul 2017 18:49:09 -0400
Subject: [PATCH 2/2] drm/amdgpu: Support IOMMU on Raven

We achieved that by setting the PTEs to 2 (the SYSTEM bit is set) when
the corresponding addresses are not occupied by gpu driver allocated
buffers.

Change-Id: I995c11c7a25bdaf7a16700d9e08a8fe287d49417
Signed-off-by: Yong Zhao <yong.z...@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c|  4 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 10 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 30 ++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  8 
 include/uapi/drm/amdgpu_drm.h  |  2 ++
 5 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 9182def..433a90e

Re: [PATCH 06/12] drm/amdgpu: Correctly establish the suspend/resume hook for amdkfd

2017-07-06 Thread Yong Zhao


Hi Alex,

As far as I know, we never tested suspend/resume on the setting you 
mentioned. Theoretically it should work.


When I read the code now, I was wondering whether we should stop kfd 
before amdgpu_bo_evict_vram() and amdgpu_fence_driver_suspend(). If 
that's not needed, it may make more sense to stick to the previous 
design which kept the kfd suspend/resume inside your IP block 
suspend/resume.


Regards,

Yong


On 2017-07-06 05:06 PM, Alex Deucher wrote:

On Mon, Jul 3, 2017 at 5:11 PM, Felix Kuehling <felix.kuehl...@amd.com> wrote:

From: Yong Zhao <yong.z...@amd.com>

Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>

Does this work properly for multiple GPUs?  E.g., if one is suspended
and another is not?  E.g., PX laptops where we runtime suspend the
dGPU while the APU is still running.

Alex


---
  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++
  drivers/gpu/drm/amd/amdgpu/cik.c   | 9 +
  2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5b1220f..bc69b9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -56,6 +56,8 @@
  #include 
  #include "amdgpu_vf_error.h"

+#include "amdgpu_amdkfd.h"
+
  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");

@@ -2397,6 +2399,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool 
suspend, bool fbcon)
 drm_modeset_unlock_all(dev);
 }

+   amdgpu_amdkfd_suspend(adev);
+
 /* unpin the front buffers and cursors */
 list_for_each_entry(crtc, >mode_config.crtc_list, head) {
 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
@@ -2537,6 +2541,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool 
resume, bool fbcon)
 }
 }
 }
+   r = amdgpu_amdkfd_resume(adev);
+   if (r)
+   return r;

 /* blat the mode back in */
 if (fbcon) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 6ce9f80..00639bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1825,21 +1825,14 @@ static int cik_common_suspend(void *handle)
  {
 struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-   amdgpu_amdkfd_suspend(adev);
-
 return cik_common_hw_fini(adev);
  }

  static int cik_common_resume(void *handle)
  {
-   int r;
 struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-   r = cik_common_hw_init(adev);
-   if (r)
-   return r;
-
-   return amdgpu_amdkfd_resume(adev);
+   return cik_common_hw_init(adev);
  }

  static bool cik_common_is_idle(void *handle)
--
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Re: [PATCH 06/12] drm/amdgpu: Correctly establish the suspend/resume hook for amdkfd

2017-07-06 Thread Yong Zhao




On 2017-07-06 05:44 PM, Alex Deucher wrote:

On Thu, Jul 6, 2017 at 5:33 PM, Yong Zhao <yong.z...@amd.com> wrote:

Hi Alex,

As far as I know, we never tested suspend/resume on the setting you
mentioned. Theoretically it should work.

Are the kfd s/r entry points global or per GPU?  If you have two GPUs
and you suspend one, will it suspend the entire kfd?  I'm fine with
the change, it's no worse than the current situation.  Mostly just
curious.
kfd s/r is per GPU. If we suspend only one out of two GPUs, the other 
one will keep working.



When I read the code now, I was wondering whether we should stop kfd before
amdgpu_bo_evict_vram() and amdgpu_fence_driver_suspend(). If that's not
needed, it may make more sense to stick to the previous design which kept
the kfd suspend/resume inside your IP block suspend/resume.

I think it makes more sense to put the kfd calls in the common device
s/r code rather than in the soc specific ip functions.  Change is:
Reviewed-by: Alex Deucher <alexander.deuc...@amd.com>



Regards,

Yong



On 2017-07-06 05:06 PM, Alex Deucher wrote:

On Mon, Jul 3, 2017 at 5:11 PM, Felix Kuehling <felix.kuehl...@amd.com>
wrote:

From: Yong Zhao <yong.z...@amd.com>

Signed-off-by: Yong Zhao <yong.z...@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehl...@amd.com>

Does this work properly for multiple GPUs?  E.g., if one is suspended
and another is not?  E.g., PX laptops where we runtime suspend the
dGPU while the APU is still running.

Alex


---
   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +++
   drivers/gpu/drm/amd/amdgpu/cik.c   | 9 +
   2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 5b1220f..bc69b9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -56,6 +56,8 @@
   #include 
   #include "amdgpu_vf_error.h"

+#include "amdgpu_amdkfd.h"
+
   MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
   MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");

@@ -2397,6 +2399,8 @@ int amdgpu_device_suspend(struct drm_device *dev,
bool suspend, bool fbcon)
  drm_modeset_unlock_all(dev);
  }

+   amdgpu_amdkfd_suspend(adev);
+
  /* unpin the front buffers and cursors */
  list_for_each_entry(crtc, >mode_config.crtc_list, head) {
  struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
@@ -2537,6 +2541,9 @@ int amdgpu_device_resume(struct drm_device *dev,
bool resume, bool fbcon)
  }
  }
  }
+   r = amdgpu_amdkfd_resume(adev);
+   if (r)
+   return r;

  /* blat the mode back in */
  if (fbcon) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c
b/drivers/gpu/drm/amd/amdgpu/cik.c
index 6ce9f80..00639bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1825,21 +1825,14 @@ static int cik_common_suspend(void *handle)
   {
  struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-   amdgpu_amdkfd_suspend(adev);
-
  return cik_common_hw_fini(adev);
   }

   static int cik_common_resume(void *handle)
   {
-   int r;
  struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-   r = cik_common_hw_init(adev);
-   if (r)
-   return r;
-
-   return amdgpu_amdkfd_resume(adev);
+   return cik_common_hw_init(adev);
   }

   static bool cik_common_is_idle(void *handle)
--
1.9.1

___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx




___
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

97 matches

Mail list logo