Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl
On 9/26/2022 5:19 PM, Christian König wrote: Am 26.09.22 um 17:14 schrieb Sharma, Shashank: Hello Christian, On 9/26/2022 5:10 PM, Christian König wrote: Am 26.09.22 um 17:02 schrieb Shashank Sharma: Allow the user to specify a workload hint to the kernel. We can use these to tweak the dpm heuristics to better match the workload for improved performance. Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c2c9c674a223..da5019a6e233 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -212,6 +212,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE2 4 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 +#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7 Do we really have an use case for getting the profile or is that just added for completeness? To be honest, there is no direct use case for a get(). We have self-reset in kernel to clear the profile, so this is just for the sake of completeness. The problem is we usually need an userspace use case to justify upstreaming of an UAPI. We already had a couple of cases where we only upstreamed UAPI for the sake of completeness (set/get pair for example) and then years later found out that the getter is completely broken for years because nobody used it. So if we don't really need it I would try to avoid it. Christian. Makes sense, I can remove get API as UAPI and just keep it kernel internal. - Shashank At base minimum we need a test-case for both to exercise the UAPI. Agree, I have already added some test cases in libdrm/tests/amdgpu for my local testing, I will start publishing them once we have an agreement on the UAPI and kernel design. - Shashank Regards, Christian. +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -252,6 +254,17 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 #define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 +/* GPU workload hints, flag bits 8-15 */ +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8 +#define AMDGPU_CTX_WORKLOAD_HINT_MASK (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_NONE (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_3D (1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VR (3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE +#define AMDGPU_CTX_WORKLOAD_INDEX(n) (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT) + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; @@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out { __u32 flags; __u32 _pad; } pstate; + + struct { + __u32 flags; + __u32 _pad; + } workload; }; union drm_amdgpu_ctx {
Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl
Am 26.09.22 um 17:14 schrieb Sharma, Shashank: Hello Christian, On 9/26/2022 5:10 PM, Christian König wrote: Am 26.09.22 um 17:02 schrieb Shashank Sharma: Allow the user to specify a workload hint to the kernel. We can use these to tweak the dpm heuristics to better match the workload for improved performance. Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c2c9c674a223..da5019a6e233 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -212,6 +212,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE2 4 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 +#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7 Do we really have an use case for getting the profile or is that just added for completeness? To be honest, there is no direct use case for a get(). We have self-reset in kernel to clear the profile, so this is just for the sake of completeness. The problem is we usually need an userspace use case to justify upstreaming of an UAPI. We already had a couple of cases where we only upstreamed UAPI for the sake of completeness (set/get pair for example) and then years later found out that the getter is completely broken for years because nobody used it. So if we don't really need it I would try to avoid it. Christian. At base minimum we need a test-case for both to exercise the UAPI. Agree, I have already added some test cases in libdrm/tests/amdgpu for my local testing, I will start publishing them once we have an agreement on the UAPI and kernel design. - Shashank Regards, Christian. +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -252,6 +254,17 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 #define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 +/* GPU workload hints, flag bits 8-15 */ +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8 +#define AMDGPU_CTX_WORKLOAD_HINT_MASK (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_NONE (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_3D (1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VR (3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE +#define AMDGPU_CTX_WORKLOAD_INDEX(n) (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT) + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; @@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out { __u32 flags; __u32 _pad; } pstate; + + struct { + __u32 flags; + __u32 _pad; + } workload; }; union drm_amdgpu_ctx {
Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl
Hello Christian, On 9/26/2022 5:10 PM, Christian König wrote: Am 26.09.22 um 17:02 schrieb Shashank Sharma: Allow the user to specify a workload hint to the kernel. We can use these to tweak the dpm heuristics to better match the workload for improved performance. Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c2c9c674a223..da5019a6e233 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -212,6 +212,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE2 4 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 +#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7 Do we really have an use case for getting the profile or is that just added for completeness? To be honest, there is no direct use case for a get(). We have self-reset in kernel to clear the profile, so this is just for the sake of completeness. At base minimum we need a test-case for both to exercise the UAPI. Agree, I have already added some test cases in libdrm/tests/amdgpu for my local testing, I will start publishing them once we have an agreement on the UAPI and kernel design. - Shashank Regards, Christian. +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -252,6 +254,17 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 #define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 +/* GPU workload hints, flag bits 8-15 */ +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8 +#define AMDGPU_CTX_WORKLOAD_HINT_MASK (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_NONE (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_3D (1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VR (3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE +#define AMDGPU_CTX_WORKLOAD_INDEX(n) (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT) + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; @@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out { __u32 flags; __u32 _pad; } pstate; + + struct { + __u32 flags; + __u32 _pad; + } workload; }; union drm_amdgpu_ctx {
Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl
Am 26.09.22 um 17:02 schrieb Shashank Sharma: Allow the user to specify a workload hint to the kernel. We can use these to tweak the dpm heuristics to better match the workload for improved performance. Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c2c9c674a223..da5019a6e233 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -212,6 +212,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE24 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE 5 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE 6 +#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7 Do we really have an use case for getting the profile or is that just added for completeness? At base minimum we need a test-case for both to exercise the UAPI. Regards, Christian. +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET 0 @@ -252,6 +254,17 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 #define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 +/* GPU workload hints, flag bits 8-15 */ +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8 +#define AMDGPU_CTX_WORKLOAD_HINT_MASK (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_NONE (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE +#define AMDGPU_CTX_WORKLOAD_INDEX(n) (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT) + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; @@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out { __u32 flags; __u32 _pad; } pstate; + + struct { + __u32 flags; + __u32 _pad; + } workload; }; union drm_amdgpu_ctx {
[PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl
Allow the user to specify a workload hint to the kernel. We can use these to tweak the dpm heuristics to better match the workload for improved performance. Signed-off-by: Alex Deucher Signed-off-by: Shashank Sharma --- include/uapi/drm/amdgpu_drm.h | 18 ++ 1 file changed, 18 insertions(+) diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index c2c9c674a223..da5019a6e233 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -212,6 +212,8 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_OP_QUERY_STATE2 4 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE5 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE6 +#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7 +#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8 /* GPU reset status */ #define AMDGPU_CTX_NO_RESET0 @@ -252,6 +254,17 @@ union drm_amdgpu_bo_list { #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK 3 #define AMDGPU_CTX_STABLE_PSTATE_PEAK 4 +/* GPU workload hints, flag bits 8-15 */ +#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8 +#define AMDGPU_CTX_WORKLOAD_HINT_MASK (0xff << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_NONE (0 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE (4 << AMDGPU_CTX_WORKLOAD_HINT_SHIFT) +#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE +#define AMDGPU_CTX_WORKLOAD_INDEX(n) (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT) + struct drm_amdgpu_ctx_in { /** AMDGPU_CTX_OP_* */ __u32 op; @@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out { __u32 flags; __u32 _pad; } pstate; + + struct { + __u32 flags; + __u32 _pad; + } workload; }; union drm_amdgpu_ctx { -- 2.34.1