Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2022-09-26 Thread Sharma, Shashank




On 9/26/2022 5:19 PM, Christian König wrote:

Am 26.09.22 um 17:14 schrieb Sharma, Shashank:


Hello Christian,

On 9/26/2022 5:10 PM, Christian König wrote:

Am 26.09.22 um 17:02 schrieb Shashank Sharma:

Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index c2c9c674a223..da5019a6e233 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,8 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_OP_QUERY_STATE2    4
  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE    5
  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE    6
+#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE    7


Do we really have an use case for getting the profile or is that just 
added for completeness?
To be honest, there is no direct use case for a get(). We have 
self-reset in kernel to clear the profile, so this is just for the 
sake of completeness.


The problem is we usually need an userspace use case to justify 
upstreaming of an UAPI.


We already had a couple of cases where we only upstreamed UAPI for the 
sake of completeness (set/get pair for example) and then years later 
found out that the getter is completely broken for years because nobody 
used it.


So if we don't really need it I would try to avoid it.

Christian.


Makes sense, I can remove get API as UAPI and just keep it kernel internal.

- Shashank






At base minimum we need a test-case for both to exercise the UAPI.



Agree, I have already added some test cases in libdrm/tests/amdgpu for 
my local testing, I will start publishing them once we have an 
agreement on the UAPI and kernel design.


- Shashank


Regards,
Christian.


+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE    8
  /* GPU reset status */
  #define AMDGPU_CTX_NO_RESET    0
@@ -252,6 +254,17 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D    (1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR    (3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

+#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)   (n >> 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

+
  struct drm_amdgpu_ctx_in {
  /** AMDGPU_CTX_OP_* */
  __u32    op;
@@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out {
  __u32    flags;
  __u32    _pad;
  } pstate;
+
+    struct {
+    __u32    flags;
+    __u32    _pad;
+    } workload;
  };
  union drm_amdgpu_ctx {






Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2022-09-26 Thread Christian König

Am 26.09.22 um 17:14 schrieb Sharma, Shashank:


Hello Christian,

On 9/26/2022 5:10 PM, Christian König wrote:

Am 26.09.22 um 17:02 schrieb Shashank Sharma:

Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index c2c9c674a223..da5019a6e233 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,8 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_OP_QUERY_STATE2    4
  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE    5
  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE    6
+#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE    7


Do we really have an use case for getting the profile or is that just 
added for completeness?
To be honest, there is no direct use case for a get(). We have 
self-reset in kernel to clear the profile, so this is just for the 
sake of completeness.


The problem is we usually need an userspace use case to justify 
upstreaming of an UAPI.


We already had a couple of cases where we only upstreamed UAPI for the 
sake of completeness (set/get pair for example) and then years later 
found out that the getter is completely broken for years because nobody 
used it.


So if we don't really need it I would try to avoid it.

Christian.




At base minimum we need a test-case for both to exercise the UAPI.



Agree, I have already added some test cases in libdrm/tests/amdgpu for 
my local testing, I will start publishing them once we have an 
agreement on the UAPI and kernel design.


- Shashank


Regards,
Christian.


+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE    8
  /* GPU reset status */
  #define AMDGPU_CTX_NO_RESET    0
@@ -252,6 +254,17 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D    (1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR    (3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

+#define AMDGPU_CTX_WORKLOAD_HINT_MAX AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)   (n >> 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

+
  struct drm_amdgpu_ctx_in {
  /** AMDGPU_CTX_OP_* */
  __u32    op;
@@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out {
  __u32    flags;
  __u32    _pad;
  } pstate;
+
+    struct {
+    __u32    flags;
+    __u32    _pad;
+    } workload;
  };
  union drm_amdgpu_ctx {






Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2022-09-26 Thread Sharma, Shashank



Hello Christian,

On 9/26/2022 5:10 PM, Christian König wrote:

Am 26.09.22 um 17:02 schrieb Shashank Sharma:

Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h 
b/include/uapi/drm/amdgpu_drm.h

index c2c9c674a223..da5019a6e233 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,8 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_OP_QUERY_STATE2    4
  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE    5
  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE    6
+#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE    7


Do we really have an use case for getting the profile or is that just 
added for completeness?
To be honest, there is no direct use case for a get(). We have 
self-reset in kernel to clear the profile, so this is just for the sake 
of completeness.



At base minimum we need a test-case for both to exercise the UAPI.



Agree, I have already added some test cases in libdrm/tests/amdgpu for 
my local testing, I will start publishing them once we have an agreement 
on the UAPI and kernel design.


- Shashank


Regards,
Christian.


+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE    8
  /* GPU reset status */
  #define AMDGPU_CTX_NO_RESET    0
@@ -252,6 +254,17 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D    (1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR    (3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX   
AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)   (n >> 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)

+
  struct drm_amdgpu_ctx_in {
  /** AMDGPU_CTX_OP_* */
  __u32    op;
@@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out {
  __u32    flags;
  __u32    _pad;
  } pstate;
+
+    struct {
+    __u32    flags;
+    __u32    _pad;
+    } workload;
  };
  union drm_amdgpu_ctx {




Re: [PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2022-09-26 Thread Christian König

Am 26.09.22 um 17:02 schrieb Shashank Sharma:

Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
  include/uapi/drm/amdgpu_drm.h | 18 ++
  1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c2c9c674a223..da5019a6e233 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,8 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_OP_QUERY_STATE24
  #define AMDGPU_CTX_OP_GET_STABLE_PSTATE   5
  #define AMDGPU_CTX_OP_SET_STABLE_PSTATE   6
+#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7


Do we really have an use case for getting the profile or is that just 
added for completeness?


At base minimum we need a test-case for both to exercise the UAPI.

Regards,
Christian.


+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8
  
  /* GPU reset status */

  #define AMDGPU_CTX_NO_RESET   0
@@ -252,6 +254,17 @@ union drm_amdgpu_bo_list {
  #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
  #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
  
+/* GPU workload hints, flag bits 8-15 */

+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX  AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)  (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+
  struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
__u32   op;
@@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out {
__u32   flags;
__u32   _pad;
} pstate;
+
+   struct {
+   __u32   flags;
+   __u32   _pad;
+   } workload;
  };
  
  union drm_amdgpu_ctx {




[PATCH v2 1/5] drm/amdgpu: add UAPI for workload hints to ctx ioctl

2022-09-26 Thread Shashank Sharma
Allow the user to specify a workload hint to the kernel.
We can use these to tweak the dpm heuristics to better match
the workload for improved performance.

Signed-off-by: Alex Deucher 
Signed-off-by: Shashank Sharma 
---
 include/uapi/drm/amdgpu_drm.h | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index c2c9c674a223..da5019a6e233 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -212,6 +212,8 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_OP_QUERY_STATE2 4
 #define AMDGPU_CTX_OP_GET_STABLE_PSTATE5
 #define AMDGPU_CTX_OP_SET_STABLE_PSTATE6
+#define AMDGPU_CTX_OP_GET_WORKLOAD_PROFILE 7
+#define AMDGPU_CTX_OP_SET_WORKLOAD_PROFILE 8
 
 /* GPU reset status */
 #define AMDGPU_CTX_NO_RESET0
@@ -252,6 +254,17 @@ union drm_amdgpu_bo_list {
 #define AMDGPU_CTX_STABLE_PSTATE_MIN_MCLK  3
 #define AMDGPU_CTX_STABLE_PSTATE_PEAK  4
 
+/* GPU workload hints, flag bits 8-15 */
+#define AMDGPU_CTX_WORKLOAD_HINT_SHIFT 8
+#define AMDGPU_CTX_WORKLOAD_HINT_MASK  (0xff << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_NONE  (0 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_3D(1 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VIDEO (2 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_VR(3 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_COMPUTE   (4 << 
AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+#define AMDGPU_CTX_WORKLOAD_HINT_MAX  AMDGPU_CTX_WORKLOAD_HINT_COMPUTE
+#define AMDGPU_CTX_WORKLOAD_INDEX(n)  (n >> AMDGPU_CTX_WORKLOAD_HINT_SHIFT)
+
 struct drm_amdgpu_ctx_in {
/** AMDGPU_CTX_OP_* */
__u32   op;
@@ -281,6 +294,11 @@ union drm_amdgpu_ctx_out {
__u32   flags;
__u32   _pad;
} pstate;
+
+   struct {
+   __u32   flags;
+   __u32   _pad;
+   } workload;
 };
 
 union drm_amdgpu_ctx {
-- 
2.34.1