what:
the MQD's save and restore of kernel compute queues cost lots of clocks
during world switch which impacts a lot to multi-VF performance

how:
introduce a paramter to control the number of kernel compute queues to
avoid performance drop if there is no kernel compute queue needed

notes:
this paramter only affects gfx 8/9/10

TODO:
in the future we will let hypervisor driver to set this paramter
automatically thus no need for user to configure it through
modprobe in virtual machine

Signed-off-by: Monk Liu <monk....@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  5 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    | 27 +++++++++++++-------------
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c     | 30 +++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c      | 29 ++++++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      | 31 +++++++++++++++---------------
 7 files changed, 71 insertions(+), 56 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e97c088..71a3d6a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -201,6 +201,7 @@ extern int amdgpu_si_support;
 #ifdef CONFIG_DRM_AMDGPU_CIK
 extern int amdgpu_cik_support;
 #endif
+extern int amdgpu_num_kcq_user_set;
 
 #define AMDGPU_VM_MAX_NUM_CTX                  4096
 #define AMDGPU_SG_THRESHOLD                    (256*1024*1024)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 62ecac9..18b93ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1199,6 +1199,11 @@ static int amdgpu_device_check_arguments(struct 
amdgpu_device *adev)
 
        amdgpu_gmc_tmz_set(adev);
 
+       if (amdgpu_num_kcq_user_set > 8 || amdgpu_num_kcq_user_set < 0) {
+               amdgpu_num_kcq_user_set = 8;
+               dev_warn(adev-dev, "set KCQ number to 8 due to invalid paramter 
provided by user\n");
+       }
+
        return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6291f5f..03a94e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -150,6 +150,7 @@ int amdgpu_noretry;
 int amdgpu_force_asic_type = -1;
 int amdgpu_tmz = 0;
 int amdgpu_reset_method = -1; /* auto */
+int amdgpu_num_kcq_user_set = 8;
 
 struct amdgpu_mgpu_info mgpu_info = {
        .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@@ -765,6 +766,9 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);
 MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = 
legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");
 module_param_named(reset_method, amdgpu_reset_method, int, 0444);
 
+MODULE_PARM_DESC(num_kcq, "number of KCQ user want to setup (8 if set to 
greater than 8 or less than 0, only affect gfx 8+)");
+module_param_named(num_kcq, amdgpu_num_kcq_user_set, int, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
        {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 8eff017..0b59049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -202,7 +202,7 @@ bool amdgpu_gfx_is_high_priority_compute_queue(struct 
amdgpu_device *adev,
 
 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 {
-       int i, queue, pipe, mec;
+       int i, queue, pipe, mec, j = 0;
        bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
 
        /* policy for amdgpu compute queue ownership */
@@ -219,23 +219,24 @@ void amdgpu_gfx_compute_queue_acquire(struct 
amdgpu_device *adev)
 
                if (multipipe_policy) {
                        /* policy: amdgpu owns the first two queues of the 
first MEC */
-                       if (mec == 0 && queue < 2)
-                               set_bit(i, adev->gfx.mec.queue_bitmap);
+                       if (mec == 0 && queue < 2) {
+                               if (j++ < adev->gfx.num_compute_rings)
+                                       set_bit(i, adev->gfx.mec.queue_bitmap);
+                               else
+                                       break;
+                       }
                } else {
                        /* policy: amdgpu owns all queues in the first pipe */
-                       if (mec == 0 && pipe == 0)
-                               set_bit(i, adev->gfx.mec.queue_bitmap);
+                       if (mec == 0 && pipe == 0) {
+                               if (j++ < adev->gfx.num_compute_rings)
+                                       set_bit(i, adev->gfx.mec.queue_bitmap);
+                               else
+                                       break;
+                       }
                }
        }
 
-       /* update the number of active compute rings */
-       adev->gfx.num_compute_rings =
-               bitmap_weight(adev->gfx.mec.queue_bitmap, 
AMDGPU_MAX_COMPUTE_QUEUES);
-
-       /* If you hit this case and edited the policy, you probably just
-        * need to increase AMDGPU_MAX_COMPUTE_RINGS */
-       if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
-               adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+       dev_info(adev->dev, "mec queue bitmap weight=%d\n", 
bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
 }
 
 void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index db9f1e8..2ad8393 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4022,21 +4022,23 @@ static int gfx_v10_0_mec_init(struct amdgpu_device 
*adev)
        amdgpu_gfx_compute_queue_acquire(adev);
        mec_hpd_size = adev->gfx.num_compute_rings * GFX10_MEC_HPD_SIZE;
 
-       r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_GTT,
-                                     &adev->gfx.mec.hpd_eop_obj,
-                                     &adev->gfx.mec.hpd_eop_gpu_addr,
-                                     (void **)&hpd);
-       if (r) {
-               dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-               gfx_v10_0_mec_fini(adev);
-               return r;
-       }
+       if (mec_hpd_size) {
+               r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+                                                                         
AMDGPU_GEM_DOMAIN_GTT,
+                                                                         
&adev->gfx.mec.hpd_eop_obj,
+                                                                         
&adev->gfx.mec.hpd_eop_gpu_addr,
+                                                                         (void 
**)&hpd);
+               if (r) {
+                       dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
+                       gfx_v10_0_mec_fini(adev);
+                       return r;
+               }
 
-       memset(hpd, 0, mec_hpd_size);
+               memset(hpd, 0, mec_hpd_size);
 
-       amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-       amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+       }
 
        if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
                mec_hdr = (const struct gfx_firmware_header_v1_0 
*)adev->gfx.mec_fw->data;
@@ -7159,7 +7161,7 @@ static int gfx_v10_0_early_init(void *handle)
                break;
        }
 
-       adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+       adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
 
        gfx_v10_0_set_kiq_pm4_funcs(adev);
        gfx_v10_0_set_ring_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 8d72089..6d95b4b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1343,21 +1343,22 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
        amdgpu_gfx_compute_queue_acquire(adev);
 
        mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE;
+       if (mec_hpd_size) {
+               r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+                                                                         
AMDGPU_GEM_DOMAIN_VRAM,
+                                                                         
&adev->gfx.mec.hpd_eop_obj,
+                                                                         
&adev->gfx.mec.hpd_eop_gpu_addr,
+                                                                         (void 
**)&hpd);
+               if (r) {
+                       dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
+                       return r;
+               }
 
-       r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
-                                     &adev->gfx.mec.hpd_eop_obj,
-                                     &adev->gfx.mec.hpd_eop_gpu_addr,
-                                     (void **)&hpd);
-       if (r) {
-               dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-               return r;
-       }
-
-       memset(hpd, 0, mec_hpd_size);
+               memset(hpd, 0, mec_hpd_size);
 
-       amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-       amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+       }
 
        return 0;
 }
@@ -5294,7 +5295,7 @@ static int gfx_v8_0_early_init(void *handle)
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
        adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
-       adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+       adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
        adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
        gfx_v8_0_set_ring_funcs(adev);
        gfx_v8_0_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c 
b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index e4e751f..43bcfe3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1938,22 +1938,23 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev)
        /* take ownership of the relevant compute queues */
        amdgpu_gfx_compute_queue_acquire(adev);
        mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE;
+       if (mec_hpd_size) {
+               r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
+                                                                         
AMDGPU_GEM_DOMAIN_VRAM,
+                                                                         
&adev->gfx.mec.hpd_eop_obj,
+                                                                         
&adev->gfx.mec.hpd_eop_gpu_addr,
+                                                                         (void 
**)&hpd);
+               if (r) {
+                       dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", 
r);
+                       gfx_v9_0_mec_fini(adev);
+                       return r;
+               }
 
-       r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
-                                     &adev->gfx.mec.hpd_eop_obj,
-                                     &adev->gfx.mec.hpd_eop_gpu_addr,
-                                     (void **)&hpd);
-       if (r) {
-               dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
-               gfx_v9_0_mec_fini(adev);
-               return r;
-       }
-
-       memset(hpd, 0, mec_hpd_size);
+               memset(hpd, 0, mec_hpd_size);
 
-       amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
-       amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
+               amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
+       }
 
        mec_hdr = (const struct gfx_firmware_header_v1_0 
*)adev->gfx.mec_fw->data;
 
@@ -4625,7 +4626,7 @@ static int gfx_v9_0_early_init(void *handle)
                adev->gfx.num_gfx_rings = 0;
        else
                adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS;
-       adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+       adev->gfx.num_compute_rings = amdgpu_num_kcq_user_set;
        gfx_v9_0_set_kiq_pm4_funcs(adev);
        gfx_v9_0_set_ring_funcs(adev);
        gfx_v9_0_set_irq_funcs(adev);
-- 
2.7.4

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to