On Fri, Jan 9, 2026 at 5:09 AM Lang Yu <[email protected]> wrote:
>
> "adev->mes.compute_hqd_mask[i] = adev->gfx.disable_kq ? 0xF" is
> incorrect for MEC with 8 queues per pipe. Let's calculate hqd
> mask with number of queues per pipe and number of gfx/compute
> queues kernel used, and get rid of version check and hardcode.
>
> Currently, only MEC1 is used for both kernel/user compute queue.
> To enable more MECs, we need to redistribute queue per pipe and
> adjust queue resource shared with kfd that needs a separate patch.
> Skip other MECs for now.
>
> v2: Force reserved queues to 0 if kernel queue is explicitly disabled.
>
> Signed-off-by: Lang Yu <[email protected]>

Reviewed-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 59 +++++++++++++++----------
>  1 file changed, 35 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index dffa0f7276b7..bed37e50d45b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -31,7 +31,6 @@
>
>  #define AMDGPU_MES_MAX_NUM_OF_QUEUES_PER_PROCESS 1024
>  #define AMDGPU_ONE_DOORBELL_SIZE 8
> -#define AMDGPU_MES_RESERVED_QUEUES     2
>
>  int amdgpu_mes_doorbell_process_slice(struct amdgpu_device *adev)
>  {
> @@ -89,12 +88,25 @@ static void amdgpu_mes_doorbell_free(struct amdgpu_device 
> *adev)
>         bitmap_free(adev->mes.doorbell_bitmap);
>  }
>
> +static inline u32 amdgpu_mes_get_hqd_mask(u32 num_pipe, u32 
> num_hqd_per_pipe, u32 num_reserved_hqd)
> +{
> +       u32 total_hqd_mask = (u32)((1ULL << num_hqd_per_pipe) - 1);
> +       u32 reserved_hqd_mask = (u32)((1ULL << DIV_ROUND_UP(num_reserved_hqd, 
> num_pipe)) - 1);
> +
> +       return (total_hqd_mask & ~reserved_hqd_mask);
> +}
> +
>  int amdgpu_mes_init(struct amdgpu_device *adev)
>  {
>         int i, r, num_pipes;
>         u32 total_vmid_mask, reserved_vmid_mask;
> -       u32 queue_mask, reserved_queue_mask;
>         int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1;
> +       u32 gfx_hqd_mask = 
> amdgpu_mes_get_hqd_mask(adev->gfx.me.num_pipe_per_me,
> +                               adev->gfx.me.num_queue_per_pipe,
> +                               adev->gfx.disable_kq ? 0 : 
> adev->gfx.num_gfx_rings);
> +       u32 compute_hqd_mask = 
> amdgpu_mes_get_hqd_mask(adev->gfx.mec.num_pipe_per_mec,
> +                               adev->gfx.mec.num_queue_per_pipe,
> +                               adev->gfx.disable_kq ? 0 : 
> adev->gfx.num_compute_rings);
>
>         adev->mes.adev = adev;
>
> @@ -115,9 +127,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>         adev->mes.vmid_mask_mmhub = 0xFF00;
>         adev->mes.vmid_mask_gfxhub = total_vmid_mask & ~reserved_vmid_mask;
>
> -       queue_mask = (u32)(1UL << adev->gfx.mec.num_queue_per_pipe) - 1;
> -       reserved_queue_mask = (u32)(1UL << AMDGPU_MES_RESERVED_QUEUES) - 1;
> -
>         num_pipes = adev->gfx.me.num_pipe_per_me * adev->gfx.me.num_me;
>         if (num_pipes > AMDGPU_MES_MAX_GFX_PIPES)
>                 dev_warn(adev->dev, "more gfx pipes than supported by MES! 
> (%d vs %d)\n",
> @@ -126,22 +135,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>         for (i = 0; i < AMDGPU_MES_MAX_GFX_PIPES; i++) {
>                 if (i >= num_pipes)
>                         break;
> -               if (amdgpu_ip_version(adev, GC_HWIP, 0) >=
> -                   IP_VERSION(12, 0, 0))
> -                       /*
> -                        * GFX V12 has only one GFX pipe, but 8 queues in it.
> -                        * GFX pipe 0 queue 0 is being used by Kernel queue.
> -                        * Set GFX pipe 0 queue 1-7 for MES scheduling
> -                        * mask = 1111 1110b
> -                        */
> -                       adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 
> 0xFF : 0xFE;
> -               else
> -                       /*
> -                        * GFX pipe 0 queue 0 is being used by Kernel queue.
> -                        * Set GFX pipe 0 queue 1 for MES scheduling
> -                        * mask = 10b
> -                        */
> -                       adev->mes.gfx_hqd_mask[i] = adev->gfx.disable_kq ? 
> 0x3 : 0x2;
> +
> +               adev->mes.gfx_hqd_mask[i] = gfx_hqd_mask;
>         }
>
>         num_pipes = adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec;
> @@ -150,10 +145,15 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>                          num_pipes, AMDGPU_MES_MAX_COMPUTE_PIPES);
>
>         for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) {
> -               if (i >= num_pipes)
> +               /*
> +                * Currently, only MEC1 is used for both kernel and user 
> compute queue.
> +                * To enable more MECs, we need to redistribute queue per 
> pipe and
> +                * adjust queue resource shared with kfd. Skip other MECs for 
> now.
> +                */
> +               if (i >= adev->gfx.mec.num_pipe_per_mec)
>                         break;
> -               adev->mes.compute_hqd_mask[i] =
> -                       adev->gfx.disable_kq ? 0xF : (queue_mask & 
> ~reserved_queue_mask);
> +
> +               adev->mes.compute_hqd_mask[i] = compute_hqd_mask;
>         }
>
>         num_pipes = adev->sdma.num_instances;
> @@ -167,6 +167,17 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
>                 adev->mes.sdma_hqd_mask[i] = 0xfc;
>         }
>
> +       dev_info(adev->dev,
> +                        "MES: vmid_mask_mmhub 0x%08x, vmid_mask_gfxhub 
> 0x%08x\n",
> +                        adev->mes.vmid_mask_mmhub,
> +                        adev->mes.vmid_mask_gfxhub);
> +
> +       dev_info(adev->dev,
> +                        "MES: gfx_hqd_mask 0x%08x, compute_hqd_mask 0x%08x, 
> sdma_hqd_mask 0x%08x\n",
> +                        adev->mes.gfx_hqd_mask[0],
> +                        adev->mes.compute_hqd_mask[0],
> +                        adev->mes.sdma_hqd_mask[0]);
> +
>         for (i = 0; i < AMDGPU_MAX_MES_PIPES * num_xcc; i++) {
>                 r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]);
>                 if (r) {
> --
> 2.34.1
>

Reply via email to