[PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN

2023-06-29 Thread Alan Previn
In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because
GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification
where all command formats are defined in units of dwords so that '1'
is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255
dwords). However, h2g_write was incorrectly assuming that
GUC_CTB_MSG_MAX_LEN was in bytes. Fix this.

v3: Fix nit on #define location.(Matt)
v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes
a local array to consume 4x the stack size. Rework the function
to avoid consuming stack even if the action size is large. (Matt)

Signed-off-by: Alan Previn 
Reviewed-by: Matthew Brost 

---
 drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 22bc9ce846db..aa04b5c4822f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
*action, u32 len,
 {
struct xe_device *xe = ct_to_xe(ct);
struct guc_ctb *h2g = >ctbs.h2g;
-   u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
-   u32 cmd_len = len + GUC_CTB_HDR_LEN;
-   u32 cmd_idx = 0, i;
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW 
HxG header */
+   u32 cmd[H2G_CT_HEADERS];
u32 tail = h2g->info.tail;
+   u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds,
 tail * sizeof(u32));
 
+   full_len = len + GUC_CTB_HDR_LEN;
+
lockdep_assert_held(>lock);
-   XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+   XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
XE_BUG_ON(tail > h2g->info.size);
 
/* Command will wrap, zero fill (NOPs), return and check credits again 
*/
-   if (tail + cmd_len > h2g->info.size) {
+   if (tail + full_len > h2g->info.size) {
xe_map_memset(xe, , 0, 0,
  (h2g->info.size - tail) * sizeof(u32));
h2g_reserve_space(ct, (h2g->info.size - tail));
@@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
*action, u32 len,
 * dw1: HXG header (including action code)
 * dw2+: action data
 */
-   cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+   cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
if (want_response) {
-   cmd[cmd_idx++] =
+   cmd[1] =
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
} else {
-   cmd[cmd_idx++] =
+   cmd[1] =
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
}
-   for (i = 1; i < len; ++i)
-   cmd[cmd_idx++] = action[i];
+
+   /* H2G header in cmd[1] replaces action[0] so: */
+   --len;
+   ++action;
 
/* Write H2G ensuring visable before descriptor update */
-   xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32));
+   xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+   xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * 
sizeof(u32));
xe_device_wmb(ct_to_xe(ct));
 
/* Update local copies */
-   h2g->info.tail = (tail + cmd_len) % h2g->info.size;
-   h2g_reserve_space(ct, cmd_len);
+   h2g->info.tail = (tail + full_len) % h2g->info.size;
+   h2g_reserve_space(ct, full_len);
 
/* Update descriptor */
desc_write(xe, h2g, tail, h2g->info.tail);

base-commit: 2ec46ad7578ebba3048d6031c1a75c21920f0e19
-- 
2.39.0



Re: [PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN

2023-06-29 Thread Teres Alexis, Alan Previn
On Wed, 2023-06-28 at 21:44 +, Brost, Matthew wrote:
> On Wed, Jun 28, 2023 at 11:17:18AM -0700, Alan Previn wrote:
> > In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because
> > GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification
> > where all command formats are defined in units of dwords so that '1'
> > is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255
> > dwords). However, h2g_write was incorrectly assuming that
> > GUC_CTB_MSG_MAX_LEN was in bytes. Fix this.
> 
alan:snip
> > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> > index 22bc9ce846db..aa04b5c4822f 100644
> > --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> > +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> > @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
> > *action, u32 len,
> >  {
> > struct xe_device *xe = ct_to_xe(ct);
> > struct guc_ctb *h2g = >ctbs.h2g;
> > -   u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
> > -   u32 cmd_len = len + GUC_CTB_HDR_LEN;
> > -   u32 cmd_idx = 0, i;
> > +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one 
> > DW HxG header */
> 
> Hate to nit pick but again this should be above the h2g_write per
> feedback from Oden on Xe in general.
> 
> Otherwise LGTM.
> 
> With the nit addressed:
> 
> Reviewed-by: Matthew Brost 

Thanks for reviewing. My bad on the #define - you mentioned that before.
Will fix that now.
...alan


Re: [PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN

2023-06-28 Thread Matthew Brost
On Wed, Jun 28, 2023 at 11:17:18AM -0700, Alan Previn wrote:
> In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because
> GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification
> where all command formats are defined in units of dwords so that '1'
> is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255
> dwords). However, h2g_write was incorrectly assuming that
> GUC_CTB_MSG_MAX_LEN was in bytes. Fix this.
> 
> v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes
> a local array to consume 4x the stack size. Rework the function
> to avoid consuming stack even if the action size is large.
> 
> Signed-off-by: Alan Previn 
> ---
>  drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++-
>  1 file changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 22bc9ce846db..aa04b5c4822f 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
> *action, u32 len,
>  {
>   struct xe_device *xe = ct_to_xe(ct);
>   struct guc_ctb *h2g = >ctbs.h2g;
> - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
> - u32 cmd_len = len + GUC_CTB_HDR_LEN;
> - u32 cmd_idx = 0, i;
> +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW 
> HxG header */

Hate to nit pick but again this should be above the h2g_write per
feedback from Oden on Xe in general.

Otherwise LGTM.

With the nit addressed:

Reviewed-by: Matthew Brost 

> + u32 cmd[H2G_CT_HEADERS];
>   u32 tail = h2g->info.tail;
> + u32 full_len;
>   struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds,
>tail * sizeof(u32));
>  
> + full_len = len + GUC_CTB_HDR_LEN;
> +
>   lockdep_assert_held(>lock);
> - XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
> + XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
>   XE_BUG_ON(tail > h2g->info.size);
>  
>   /* Command will wrap, zero fill (NOPs), return and check credits again 
> */
> - if (tail + cmd_len > h2g->info.size) {
> + if (tail + full_len > h2g->info.size) {
>   xe_map_memset(xe, , 0, 0,
> (h2g->info.size - tail) * sizeof(u32));
>   h2g_reserve_space(ct, (h2g->info.size - tail));
> @@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
> *action, u32 len,
>* dw1: HXG header (including action code)
>* dw2+: action data
>*/
> - cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
> + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
>   FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
>   FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
>   if (want_response) {
> - cmd[cmd_idx++] =
> + cmd[1] =
>   FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
>   FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>  GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>   } else {
> - cmd[cmd_idx++] =
> + cmd[1] =
>   FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
>   FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
>  GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
>   }
> - for (i = 1; i < len; ++i)
> - cmd[cmd_idx++] = action[i];
> +
> + /* H2G header in cmd[1] replaces action[0] so: */
> + --len;
> + ++action;
>  
>   /* Write H2G ensuring visable before descriptor update */
> - xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32));
> + xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32));
> + xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * 
> sizeof(u32));
>   xe_device_wmb(ct_to_xe(ct));
>  
>   /* Update local copies */
> - h2g->info.tail = (tail + cmd_len) % h2g->info.size;
> - h2g_reserve_space(ct, cmd_len);
> + h2g->info.tail = (tail + full_len) % h2g->info.size;
> + h2g_reserve_space(ct, full_len);
>  
>   /* Update descriptor */
>   desc_write(xe, h2g, tail, h2g->info.tail);
> 
> base-commit: abdb420db479bae28a2abd7ba2c66229b7e8cb77
> -- 
> 2.39.0
> 


[PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN

2023-06-28 Thread Alan Previn
In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because
GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification
where all command formats are defined in units of dwords so that '1'
is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255
dwords). However, h2g_write was incorrectly assuming that
GUC_CTB_MSG_MAX_LEN was in bytes. Fix this.

v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes
a local array to consume 4x the stack size. Rework the function
to avoid consuming stack even if the action size is large.

Signed-off-by: Alan Previn 
---
 drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++-
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 22bc9ce846db..aa04b5c4822f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
*action, u32 len,
 {
struct xe_device *xe = ct_to_xe(ct);
struct guc_ctb *h2g = >ctbs.h2g;
-   u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)];
-   u32 cmd_len = len + GUC_CTB_HDR_LEN;
-   u32 cmd_idx = 0, i;
+#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW 
HxG header */
+   u32 cmd[H2G_CT_HEADERS];
u32 tail = h2g->info.tail;
+   u32 full_len;
struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds,
 tail * sizeof(u32));
 
+   full_len = len + GUC_CTB_HDR_LEN;
+
lockdep_assert_held(>lock);
-   XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN);
+   XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN));
XE_BUG_ON(tail > h2g->info.size);
 
/* Command will wrap, zero fill (NOPs), return and check credits again 
*/
-   if (tail + cmd_len > h2g->info.size) {
+   if (tail + full_len > h2g->info.size) {
xe_map_memset(xe, , 0, 0,
  (h2g->info.size - tail) * sizeof(u32));
h2g_reserve_space(ct, (h2g->info.size - tail));
@@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 
*action, u32 len,
 * dw1: HXG header (including action code)
 * dw2+: action data
 */
-   cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
+   cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) |
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
if (want_response) {
-   cmd[cmd_idx++] =
+   cmd[1] =
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
} else {
-   cmd[cmd_idx++] =
+   cmd[1] =
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) |
FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
   GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
}
-   for (i = 1; i < len; ++i)
-   cmd[cmd_idx++] = action[i];
+
+   /* H2G header in cmd[1] replaces action[0] so: */
+   --len;
+   ++action;
 
/* Write H2G ensuring visable before descriptor update */
-   xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32));
+   xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32));
+   xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * 
sizeof(u32));
xe_device_wmb(ct_to_xe(ct));
 
/* Update local copies */
-   h2g->info.tail = (tail + cmd_len) % h2g->info.size;
-   h2g_reserve_space(ct, cmd_len);
+   h2g->info.tail = (tail + full_len) % h2g->info.size;
+   h2g_reserve_space(ct, full_len);
 
/* Update descriptor */
desc_write(xe, h2g, tail, h2g->info.tail);

base-commit: abdb420db479bae28a2abd7ba2c66229b7e8cb77
-- 
2.39.0