[PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN
In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification where all command formats are defined in units of dwords so that '1' is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255 dwords). However, h2g_write was incorrectly assuming that GUC_CTB_MSG_MAX_LEN was in bytes. Fix this. v3: Fix nit on #define location.(Matt) v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes a local array to consume 4x the stack size. Rework the function to avoid consuming stack even if the action size is large. (Matt) Signed-off-by: Alan Previn Reviewed-by: Matthew Brost --- drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 22bc9ce846db..aa04b5c4822f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *h2g = >ctbs.h2g; - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; - u32 cmd_len = len + GUC_CTB_HDR_LEN; - u32 cmd_idx = 0, i; +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ + u32 cmd[H2G_CT_HEADERS]; u32 tail = h2g->info.tail; + u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds, tail * sizeof(u32)); + full_len = len + GUC_CTB_HDR_LEN; + lockdep_assert_held(>lock); - XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); + XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); XE_BUG_ON(tail > h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ - if (tail + cmd_len > h2g->info.size) { + if (tail + full_len > h2g->info.size) { xe_map_memset(xe, , 0, 0, (h2g->info.size - tail) * sizeof(u32)); h2g_reserve_space(ct, (h2g->info.size - tail)); @@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, * dw1: HXG header (including action code) * dw2+: action data */ - cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); if (want_response) { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } - for (i = 1; i < len; ++i) - cmd[cmd_idx++] = action[i]; + + /* H2G header in cmd[1] replaces action[0] so: */ + --len; + ++action; /* Write H2G ensuring visable before descriptor update */ - xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32)); + xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32)); + xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(ct_to_xe(ct)); /* Update local copies */ - h2g->info.tail = (tail + cmd_len) % h2g->info.size; - h2g_reserve_space(ct, cmd_len); + h2g->info.tail = (tail + full_len) % h2g->info.size; + h2g_reserve_space(ct, full_len); /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); base-commit: 2ec46ad7578ebba3048d6031c1a75c21920f0e19 -- 2.39.0
Re: [PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN
On Wed, 2023-06-28 at 21:44 +, Brost, Matthew wrote: > On Wed, Jun 28, 2023 at 11:17:18AM -0700, Alan Previn wrote: > > In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because > > GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification > > where all command formats are defined in units of dwords so that '1' > > is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255 > > dwords). However, h2g_write was incorrectly assuming that > > GUC_CTB_MSG_MAX_LEN was in bytes. Fix this. > alan:snip > > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c > > index 22bc9ce846db..aa04b5c4822f 100644 > > --- a/drivers/gpu/drm/xe/xe_guc_ct.c > > +++ b/drivers/gpu/drm/xe/xe_guc_ct.c > > @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 > > *action, u32 len, > > { > > struct xe_device *xe = ct_to_xe(ct); > > struct guc_ctb *h2g = >ctbs.h2g; > > - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; > > - u32 cmd_len = len + GUC_CTB_HDR_LEN; > > - u32 cmd_idx = 0, i; > > +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one > > DW HxG header */ > > Hate to nit pick but again this should be above the h2g_write per > feedback from Oden on Xe in general. > > Otherwise LGTM. > > With the nit addressed: > > Reviewed-by: Matthew Brost Thanks for reviewing. My bad on the #define - you mentioned that before. Will fix that now. ...alan
Re: [PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN
On Wed, Jun 28, 2023 at 11:17:18AM -0700, Alan Previn wrote: > In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because > GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification > where all command formats are defined in units of dwords so that '1' > is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255 > dwords). However, h2g_write was incorrectly assuming that > GUC_CTB_MSG_MAX_LEN was in bytes. Fix this. > > v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes > a local array to consume 4x the stack size. Rework the function > to avoid consuming stack even if the action size is large. > > Signed-off-by: Alan Previn > --- > drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++- > 1 file changed, 18 insertions(+), 13 deletions(-) > > diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c > index 22bc9ce846db..aa04b5c4822f 100644 > --- a/drivers/gpu/drm/xe/xe_guc_ct.c > +++ b/drivers/gpu/drm/xe/xe_guc_ct.c > @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 > *action, u32 len, > { > struct xe_device *xe = ct_to_xe(ct); > struct guc_ctb *h2g = >ctbs.h2g; > - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; > - u32 cmd_len = len + GUC_CTB_HDR_LEN; > - u32 cmd_idx = 0, i; > +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW > HxG header */ Hate to nit pick but again this should be above the h2g_write per feedback from Oden on Xe in general. Otherwise LGTM. With the nit addressed: Reviewed-by: Matthew Brost > + u32 cmd[H2G_CT_HEADERS]; > u32 tail = h2g->info.tail; > + u32 full_len; > struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds, >tail * sizeof(u32)); > > + full_len = len + GUC_CTB_HDR_LEN; > + > lockdep_assert_held(>lock); > - XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); > + XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); > XE_BUG_ON(tail > h2g->info.size); > > /* Command will wrap, zero fill (NOPs), return and check credits again > */ > - if (tail + cmd_len > h2g->info.size) { > + if (tail + full_len > h2g->info.size) { > xe_map_memset(xe, , 0, 0, > (h2g->info.size - tail) * sizeof(u32)); > h2g_reserve_space(ct, (h2g->info.size - tail)); > @@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 > *action, u32 len, >* dw1: HXG header (including action code) >* dw2+: action data >*/ > - cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | > + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | > FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | > FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); > if (want_response) { > - cmd[cmd_idx++] = > + cmd[1] = > FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | > FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | > GUC_HXG_EVENT_MSG_0_DATA0, action[0]); > } else { > - cmd[cmd_idx++] = > + cmd[1] = > FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | > FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | > GUC_HXG_EVENT_MSG_0_DATA0, action[0]); > } > - for (i = 1; i < len; ++i) > - cmd[cmd_idx++] = action[i]; > + > + /* H2G header in cmd[1] replaces action[0] so: */ > + --len; > + ++action; > > /* Write H2G ensuring visable before descriptor update */ > - xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32)); > + xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32)); > + xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * > sizeof(u32)); > xe_device_wmb(ct_to_xe(ct)); > > /* Update local copies */ > - h2g->info.tail = (tail + cmd_len) % h2g->info.size; > - h2g_reserve_space(ct, cmd_len); > + h2g->info.tail = (tail + full_len) % h2g->info.size; > + h2g_reserve_space(ct, full_len); > > /* Update descriptor */ > desc_write(xe, h2g, tail, h2g->info.tail); > > base-commit: abdb420db479bae28a2abd7ba2c66229b7e8cb77 > -- > 2.39.0 >
[PATCH v3] drm/xe/guc: Fix h2g_write usage of GUC_CTB_MSG_MAX_LEN
In the ABI header, GUC_CTB_MSG_MIN_LEN is '1' because GUC_CTB_HDR_LEN is 1. This aligns with H2G/G2H CTB specification where all command formats are defined in units of dwords so that '1' is a dword. Accordingly, GUC_CTB_MSG_MAX_LEN is 256-1 (i.e. 255 dwords). However, h2g_write was incorrectly assuming that GUC_CTB_MSG_MAX_LEN was in bytes. Fix this. v2: By correctly treating GUC_CTB_MSG_MAX_LEN as dwords, it causes a local array to consume 4x the stack size. Rework the function to avoid consuming stack even if the action size is large. Signed-off-by: Alan Previn --- drivers/gpu/drm/xe/xe_guc_ct.c | 31 ++- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 22bc9ce846db..aa04b5c4822f 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -401,19 +401,21 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, { struct xe_device *xe = ct_to_xe(ct); struct guc_ctb *h2g = >ctbs.h2g; - u32 cmd[GUC_CTB_MSG_MAX_LEN / sizeof(u32)]; - u32 cmd_len = len + GUC_CTB_HDR_LEN; - u32 cmd_idx = 0, i; +#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */ + u32 cmd[H2G_CT_HEADERS]; u32 tail = h2g->info.tail; + u32 full_len; struct iosys_map map = IOSYS_MAP_INIT_OFFSET(>cmds, tail * sizeof(u32)); + full_len = len + GUC_CTB_HDR_LEN; + lockdep_assert_held(>lock); - XE_BUG_ON(len * sizeof(u32) > GUC_CTB_MSG_MAX_LEN); + XE_BUG_ON(full_len > (GUC_CTB_MSG_MAX_LEN - GUC_CTB_HDR_LEN)); XE_BUG_ON(tail > h2g->info.size); /* Command will wrap, zero fill (NOPs), return and check credits again */ - if (tail + cmd_len > h2g->info.size) { + if (tail + full_len > h2g->info.size) { xe_map_memset(xe, , 0, 0, (h2g->info.size - tail) * sizeof(u32)); h2g_reserve_space(ct, (h2g->info.size - tail)); @@ -428,30 +430,33 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len, * dw1: HXG header (including action code) * dw2+: action data */ - cmd[cmd_idx++] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | + cmd[0] = FIELD_PREP(GUC_CTB_MSG_0_FORMAT, GUC_CTB_FORMAT_HXG) | FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) | FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value); if (want_response) { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } else { - cmd[cmd_idx++] = + cmd[1] = FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_EVENT) | FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | GUC_HXG_EVENT_MSG_0_DATA0, action[0]); } - for (i = 1; i < len; ++i) - cmd[cmd_idx++] = action[i]; + + /* H2G header in cmd[1] replaces action[0] so: */ + --len; + ++action; /* Write H2G ensuring visable before descriptor update */ - xe_map_memcpy_to(xe, , 0, cmd, cmd_len * sizeof(u32)); + xe_map_memcpy_to(xe, , 0, cmd, H2G_CT_HEADERS * sizeof(u32)); + xe_map_memcpy_to(xe, , H2G_CT_HEADERS * sizeof(u32), action, len * sizeof(u32)); xe_device_wmb(ct_to_xe(ct)); /* Update local copies */ - h2g->info.tail = (tail + cmd_len) % h2g->info.size; - h2g_reserve_space(ct, cmd_len); + h2g->info.tail = (tail + full_len) % h2g->info.size; + h2g_reserve_space(ct, full_len); /* Update descriptor */ desc_write(xe, h2g, tail, h2g->info.tail); base-commit: abdb420db479bae28a2abd7ba2c66229b7e8cb77 -- 2.39.0