The code is fine. Some comments about your comment changes inline to 
help clarify things a bit.

On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> Reserved doorbells for SDMA IH and VCN were not properly masked out
> when allocating doorbells for CP user queues. This patch fixed that.
>
> Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2
> Signed-off-by: Yong Zhao <yong.z...@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 17 +++++++++++----
>   drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h  |  4 ++++
>   drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c  |  3 +++
>   drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c  |  3 +++
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  9 ++++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 21 ++++++++++++++-----
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   | 19 ++++++-----------
>   7 files changed, 54 insertions(+), 22 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index e957e42c539a..ee8527701731 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device 
> *adev)
>                       gpu_resources.sdma_doorbell[1][i+1] =
>                               adev->doorbell_index.sdma_engine[1] + 0x200 + 
> (i >> 1);
>               }
> -             /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
> -              * SDMA, IH and VCN. So don't use them for the CP.
> +
> +             /* Because of the setting in registers like
> +              * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> +              * lower 12 bits of doorbell address for routing. In
> +              * order to route the CP queue doorbells to CP engine,
> +              * the doorbells allocated to CP queues have to be
> +              * outside the range set for SDMA, VCN, and IH blocks
> +              * Prior to SOC15, all queues use queue ID to
> +              * determine doorbells.

This comment mixes up cause and effect:

 1. Doorbell routing by the BIF (cause)
 2. Changes to doorbell assignment for user mode queues (effect)

The first is a HW change (cause). The second is a consequence from that 
(effect). Amdgpu doesn't know anything about KFD queue IDs. I'd just 
drop that sentence.

In this context, what distinguishes SOC15 from older chips is, that on 
older chips the BIF does not route doorbells. All doorbells can be used 
by all engines. Only on SOC15 and newer, KFD needs to know about how 
doorbells are routed by the BIF in order to correctly assign doorbells 
to queues. Therefore the reserved_doorbell_... and sdma_doorbell fields 
are only used on SOC15.


>                */
> -             gpu_resources.reserved_doorbell_mask = 0x1e0;
> -             gpu_resources.reserved_doorbell_val  = 0x0e0;
> +             gpu_resources.reserved_doorbells_start =
> +                             adev->doorbell_index.sdma_engine[0];
> +             gpu_resources.reserved_doorbells_end =
> +                             adev->doorbell_index.last_non_cp;
>   
>               kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
>       }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> index 59c41841cbce..74b8e2bfabd3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> @@ -70,6 +70,7 @@ struct amdgpu_doorbell_index {
>                       uint32_t vce_ring6_7;
>               } uvd_vce;
>       };
> +     uint32_t last_non_cp;
>       uint32_t max_assignment;
>       uint32_t last_idx;
>       /* Per engine SDMA doorbell size in dword */
> @@ -141,6 +142,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
>       AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
>       AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
>       AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
> +     AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = 
> AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
>       AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
>       AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
>   } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
> @@ -216,6 +218,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
>       AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
>       AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,
>   
> +     AMDGPU_DOORBELL64_LAST_NON_CP             = 
> AMDGPU_DOORBELL64_VCE_RING6_7,
> +
>       AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
>       AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
>   } AMDGPU_DOORBELL64_ASSIGNMENT;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c 
> b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> index 65214c7b0b20..76166c0ec509 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> @@ -80,6 +80,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
>       adev->doorbell_index.uvd_vce.vce_ring2_3 = 
> AMDGPU_DOORBELL64_VCE_RING2_3;
>       adev->doorbell_index.uvd_vce.vce_ring4_5 = 
> AMDGPU_DOORBELL64_VCE_RING4_5;
>       adev->doorbell_index.uvd_vce.vce_ring6_7 = 
> AMDGPU_DOORBELL64_VCE_RING6_7;
> +
> +     adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
> +
>       /* In unit of dword doorbell */
>       adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT 
> << 1;
>       adev->doorbell_index.dw_range_per_sdma_eng =
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c 
> b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> index a388d306391a..10df2fed5a99 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> @@ -84,6 +84,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
>       adev->doorbell_index.uvd_vce.vce_ring2_3 = 
> AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
>       adev->doorbell_index.uvd_vce.vce_ring4_5 = 
> AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
>       adev->doorbell_index.uvd_vce.vce_ring6_7 = 
> AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
> +
> +     adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
> +
>       adev->doorbell_index.max_assignment = 
> AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;
>       adev->doorbell_index.dw_range_per_sdma_eng =
>                       (adev->doorbell_index.sdma_engine[1]
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index e5ebcca7f031..6b8459f852cc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -103,6 +103,15 @@
>   
>   #define KFD_KERNEL_QUEUE_SIZE 2048
>   
> +/* 512 = 0x200
> + * On SOC15, the doorbell index distance for SDMA RLC i and (i + 1) in the
> + * same SDMA engine, where i is a even number.
> + * For 8-bytes doorbells, it ensures that the mirror doorbell range (in terms
> + * of low 12 bit address for each HW engine) on the second doorbell page is
> + * the same as the range of the first doorbell page.*/
> +#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
> +
> +
>   /*
>    * Kernel module parameter to specify maximum number of supported queues per
>    * device
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c 
> b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 80b36e860a0a..e904d6036b3d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -607,13 +607,24 @@ static int init_doorbell_bitmap(struct 
> qcm_process_device *qpd,
>       if (!qpd->doorbell_bitmap)
>               return -ENOMEM;
>   
> -     /* Mask out any reserved doorbells */
> -     for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
> -             if ((dev->shared_resources.reserved_doorbell_mask & i) ==
> -                 dev->shared_resources.reserved_doorbell_val) {
> +     /* Mask out all reserved doorbells for SDMA, IH, and VCN on SOC15.
> +      * Because of the setting in registers like SDMA0_DOORBELL_RANGE etc.,
> +      * BIF statically uses the lower 12 bits of doorbell address for
> +      * routing. In order to route the CP queue doorbells to CP engine,
> +      * the doorbells allocated to CP queues have to be outside the range
> +      * set for SDMA, VCN, and IH blocks.
> +      * Prior to SOC15, all queues use queue ID to
> +      * determine doorbells. */
> +     for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS / 2; i++) {
> +             if (i >= dev->shared_resources.reserved_doorbells_start
> +                     && i <= dev->shared_resources.reserved_doorbells_end) {
>                       set_bit(i, qpd->doorbell_bitmap);
> -                     pr_debug("reserved doorbell 0x%03x\n", i);
> +                     set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
> +                             qpd->doorbell_bitmap);
> +                     pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
> +                             i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
>               }
> +     }
>   
>       return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h 
> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 83d960110d23..b1bf45419d93 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -137,20 +137,13 @@ struct kgd2kfd_shared_resources {
>       /* Bit n == 1 means Queue n is available for KFD */
>       DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
>   
> -     /* Doorbell assignments (SOC15 and later chips only). Only
> -      * specific doorbells are routed to each SDMA engine. Others
> -      * are routed to IH and VCN. They are not usable by the CP.

This first paragraph is still applicable and explains why we need the 
sdma_doorbell array on SOC15 to assign doorbells for SDMA queues.


> -      *
> -      * Any doorbell number D that satisfies the following condition
> -      * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
> -      *
> -      * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
> -      * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means
> -      * mask would be set to 0x1e0 and val set to 0x0e0.
> -      */
>       unsigned int sdma_doorbell[2][8];
> -     unsigned int reserved_doorbell_mask;
> -     unsigned int reserved_doorbell_val;
> +
> +     /* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
> +      * and VCN

The important bit that you should not forget to mention, is that these 
doorbells are not routed to the CP, so they're reserved (unusable) for 
CP queues.

Regards,
   Felix


> +      */
> +     unsigned int reserved_doorbells_start;
> +     unsigned int reserved_doorbells_end;
>   
>       /* Base address of doorbell aperture. */
>       phys_addr_t doorbell_physical_address;
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to