Re: [PATCH 5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

2019-02-08 Thread Kuehling, Felix
Some nit-picks inline. Looks good otherwise.

On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> We can directly calculate the sdma doorbell index in the process doorbell
> pages through the doorbell_index structure in amdgpu_device, so no need
> to cache them in kgd2kfd_shared_resources any more, resulting in more
> portable code.

What do you mean by "portable" here? Portable to what? Other 
architectures? Kernels? GPUs?


>
> Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
> Signed-off-by: Yong Zhao 
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 55 ++-
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 --
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
>   3 files changed, 31 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index ee8527701731..e62c3703169a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct 
> amdgpu_device *adev,
>   
>   void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   {
> - int i, n;
> + int i;
>   int last_valid_bit;
>   
>   if (adev->kfd.dev) {
> @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   .gpuvm_size = min(adev->vm_manager.max_pfn
> << AMDGPU_GPU_PAGE_SHIFT,
> AMDGPU_GMC_HOLE_START),
> - .drm_render_minor = adev->ddev->render->index
> + .drm_render_minor = adev->ddev->render->index,
> + .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
> +
>   };
>   
>   /* this is going to have a few of the MSBs set that we need to
> @@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device 
> *adev)
>   &gpu_resources.doorbell_aperture_size,
>   &gpu_resources.doorbell_start_offset);
>   
> - if (adev->asic_type < CHIP_VEGA10) {
> - kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
> - return;
> - }
> -
> - n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
> -
> - for (i = 0; i < n; i += 2) {
> - /* On SOC15 the BIF is involved in routing
> -  * doorbells using the low 12 bits of the
> -  * address. Communicate the assignments to
> -  * KFD. KFD uses two doorbell pages per
> -  * process in case of 64-bit doorbells so we
> -  * can use each doorbell assignment twice.
> + if (adev->asic_type >= CHIP_VEGA10) {
> + /* Because of the setting in registers like
> +  * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> +  * lower 12 bits of doorbell address for routing. In
> +  * order to route the CP queue doorbells to CP engine,
> +  * the doorbells allocated to CP queues have to be
> +  * outside the range set for SDMA, VCN, and IH blocks
> +  * Prior to SOC15, all queues use queue ID to
> +  * determine doorbells.
>*/
> - gpu_resources.sdma_doorbell[0][i] =
> - adev->doorbell_index.sdma_engine[0] + (i >> 1);
> - gpu_resources.sdma_doorbell[0][i+1] =
> - adev->doorbell_index.sdma_engine[0] + 0x200 + 
> (i >> 1);
> - gpu_resources.sdma_doorbell[1][i] =
> - adev->doorbell_index.sdma_engine[1] + (i >> 1);
> - gpu_resources.sdma_doorbell[1][i+1] =
> - adev->doorbell_index.sdma_engine[1] + 0x200 + 
> (i >> 1);
> + gpu_resources.reserved_doorbells_start =
> + adev->doorbell_index.sdma_engine[0];
> + gpu_resources.reserved_doorbells_end =
> + adev->doorbell_index.last_non_cp;
>   }
>   
> - /* Because of the setting in registers like
> -  * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> -  * lower 12 bits of doorbell address for routing. In
> -  * order to route the CP queue doorbells to CP engine,
> -  * the doorbells allocated to CP queues have to be
> -  * outside the range set for SDMA, VCN, and IH blocks
> -  * Prior to SOC15, all queues use queue ID to
> -  * determine doorbells.
> -  */
> - gpu_resources.reserved_doorbells_start =
> - adev->doorbell_index.sdm

[PATCH 5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

2019-02-05 Thread Zhao, Yong
We can directly calculate the sdma doorbell index in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more, resulting in more
portable code.

Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c| 55 ++-
 .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 --
 .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
 3 files changed, 31 insertions(+), 44 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index ee8527701731..e62c3703169a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct 
amdgpu_device *adev,
 
 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-   int i, n;
+   int i;
int last_valid_bit;
 
if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.gpuvm_size = min(adev->vm_manager.max_pfn
  << AMDGPU_GPU_PAGE_SHIFT,
  AMDGPU_GMC_HOLE_START),
-   .drm_render_minor = adev->ddev->render->index
+   .drm_render_minor = adev->ddev->render->index,
+   .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
};
 
/* this is going to have a few of the MSBs set that we need to
@@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
 
-   if (adev->asic_type < CHIP_VEGA10) {
-   kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
-   return;
-   }
-
-   n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-   for (i = 0; i < n; i += 2) {
-   /* On SOC15 the BIF is involved in routing
-* doorbells using the low 12 bits of the
-* address. Communicate the assignments to
-* KFD. KFD uses two doorbell pages per
-* process in case of 64-bit doorbells so we
-* can use each doorbell assignment twice.
+   if (adev->asic_type >= CHIP_VEGA10) {
+   /* Because of the setting in registers like
+* SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+* lower 12 bits of doorbell address for routing. In
+* order to route the CP queue doorbells to CP engine,
+* the doorbells allocated to CP queues have to be
+* outside the range set for SDMA, VCN, and IH blocks
+* Prior to SOC15, all queues use queue ID to
+* determine doorbells.
 */
-   gpu_resources.sdma_doorbell[0][i] =
-   adev->doorbell_index.sdma_engine[0] + (i >> 1);
-   gpu_resources.sdma_doorbell[0][i+1] =
-   adev->doorbell_index.sdma_engine[0] + 0x200 + 
(i >> 1);
-   gpu_resources.sdma_doorbell[1][i] =
-   adev->doorbell_index.sdma_engine[1] + (i >> 1);
-   gpu_resources.sdma_doorbell[1][i+1] =
-   adev->doorbell_index.sdma_engine[1] + 0x200 + 
(i >> 1);
+   gpu_resources.reserved_doorbells_start =
+   adev->doorbell_index.sdma_engine[0];
+   gpu_resources.reserved_doorbells_end =
+   adev->doorbell_index.last_non_cp;
}
 
-   /* Because of the setting in registers like
-* SDMA0_DOORBELL_RANGE etc., BIF statically uses the
-* lower 12 bits of doorbell address for routing. In
-* order to route the CP queue doorbells to CP engine,
-* the doorbells allocated to CP queues have to be
-* outside the range set for SDMA, VCN, and IH blocks
-* Prior to SOC15, all queues use queue ID to
-* determine doorbells.
-*/
-   gpu_resources.reserved_doorbells_start =
-   adev->doorbell_index.sdma_engine[0];
-   gpu_resources.reserved_doorbells_end =
-   adev->doorbell_index.last_non_cp;
-
kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
}
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dev