Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 20.03.23 um 17:01 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:55 AM Christian König
 wrote:

Am 20.03.23 um 16:49 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
   Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
   amdgpu_cs_pass1()
   Only initialize shadow on first use
   (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;

default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
return 0;
}

+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ bool shadow_initialized = false;
+ int i;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;

Do we really need all three VAs separately?


+ if (!p->ctx->shadow_initialized) {
+ p->jobs[i]->init_shadow = true;
+ shadow_initialized = true;
+ }
+ }
+ if (shadow_initialized)
+ p->ctx->shadow_initialized = true;

This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.

I don't get what you mean with that? This here doesn't make any sense at
all.

The shadow buffer addresses must be given with every CS and updated over
and over again. Otherwise this solution won't work correctly.

The shadow replaces the old SET/LOAD model.  When the UMD uses the
shadow buffer, they no longer have to send all of their state anymore
in the IB.  The CP FW will automatically load whatever was saved when
it processes this packet.  However, the shadow needs to be initialized
by the CP FW the first time it is used.  All subsequent times, it will
just be a save/restore by the FW.  I guess the alternative would be
for the UMD to specify if it wants initialization or not, but tracking
it in the kernel aligns better with the user mode queue model where
this is handled by the MQD and is initialized the first time the queue
is loaded.


This approach is just utterly nonsense.

The problem is that neither the kernel nor the fw can know if that's the 
first submission. Only the UMD can know that.


That's the same issue we previously had with PAL and the old model which 
didn't worked at all.


Christian.



Alex


Christian.


Alex


Regards,
Christian.



+}
+
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ amdgpu_cs_p2_shadow(p, chunk);
+ break;
}
}

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long   ras_counter_ce;
unsigned long   ras_counter_ue;
uint32_tstable_pstate;
+ boolshadow_initialized;
};

struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:55 AM Christian König
 wrote:
>
> Am 20.03.23 um 16:49 schrieb Alex Deucher:
> > On Mon, Mar 20, 2023 at 11:46 AM Christian König
> >  wrote:
> >> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> >>> From: Christian König 
> >>>
> >>> Add support for submitting the shadow update packet
> >>> when submitting an IB.  Needed for MCBP on GFX11.
> >>>
> >>> v2: update API for CSA (Alex)
> >>> v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >>>   Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >>>   amdgpu_cs_pass1()
> >>>   Only initialize shadow on first use
> >>>   (Alex)
> >>>
> >>> Signed-off-by: Christian König 
> >>> Signed-off-by: Alex Deucher 
> >>> ---
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >>>drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >>>5 files changed, 37 insertions(+)
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> index f6144b378617..9bdda246b09c 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> >>> @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >>>case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >>>case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >>>case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> >>> + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >>>break;
> >>>
> >>>default:
> >>> @@ -587,6 +588,26 @@ static int 
> >>> amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
> >>>return 0;
> >>>}
> >>>
> >>> +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> >>> + struct amdgpu_cs_chunk *chunk)
> >>> +{
> >>> + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> >>> + bool shadow_initialized = false;
> >>> + int i;
> >>> +
> >>> + for (i = 0; i < p->gang_size; ++i) {
> >>> + p->jobs[i]->shadow_va = shadow->shadow_va;
> >>> + p->jobs[i]->csa_va = shadow->csa_va;
> >>> + p->jobs[i]->gds_va = shadow->gds_va;
> >> Do we really need all three VAs separately?
> >>
> >>> + if (!p->ctx->shadow_initialized) {
> >>> + p->jobs[i]->init_shadow = true;
> >>> + shadow_initialized = true;
> >>> + }
> >>> + }
> >>> + if (shadow_initialized)
> >>> + p->ctx->shadow_initialized = true;
> >> This is a really bad idea since the IOCTL can be interrupted later on.
> >>
> >> Why do we need that?
> > Yes.  We have to only initial the buffer the first time it's used.
> > Doing it again will overwrite whatever userspace has done with it
> > since then.
>
> I don't get what you mean with that? This here doesn't make any sense at
> all.
>
> The shadow buffer addresses must be given with every CS and updated over
> and over again. Otherwise this solution won't work correctly.

The shadow replaces the old SET/LOAD model.  When the UMD uses the
shadow buffer, they no longer have to send all of their state anymore
in the IB.  The CP FW will automatically load whatever was saved when
it processes this packet.  However, the shadow needs to be initialized
by the CP FW the first time it is used.  All subsequent times, it will
just be a save/restore by the FW.  I guess the alternative would be
for the UMD to specify if it wants initialization or not, but tracking
it in the kernel aligns better with the user mode queue model where
this is handled by the MQD and is initialized the first time the queue
is loaded.

Alex

>
> Christian.
>
> >
> > Alex
> >
> >> Regards,
> >> Christian.
> >>
> >>
> >>> +}
> >>> +
> >>>static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >>>{
> >>>unsigned int ce_preempt = 0, de_preempt = 0;
> >>> @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >>>if (r)
> >>>return r;
> >>>break;
> >>> + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >>> + amdgpu_cs_p2_shadow(p, chunk);
> >>> + break;
> >>>}
> >>>}
> >>>
> >>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> >>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> index 0fa0e56daf67..909d188c41f2 100644
> >>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> >>> @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >>>unsigned long   ras_counter_ce;
> >>>unsigned long   ras_counter_ue;
> >>>uint32_t 

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 20.03.23 um 16:49 schrieb Alex Deucher:

On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
  amdgpu_cs_pass1()
  Only initialize shadow on first use
  (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
   5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
   break;

   default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
   return 0;
   }

+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ bool shadow_initialized = false;
+ int i;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;

Do we really need all three VAs separately?


+ if (!p->ctx->shadow_initialized) {
+ p->jobs[i]->init_shadow = true;
+ shadow_initialized = true;
+ }
+ }
+ if (shadow_initialized)
+ p->ctx->shadow_initialized = true;

This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.


I don't get what you mean with that? This here doesn't make any sense at 
all.


The shadow buffer addresses must be given with every CS and updated over 
and over again. Otherwise this solution won't work correctly.


Christian.



Alex


Regards,
Christian.



+}
+
   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
   {
   unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
   if (r)
   return r;
   break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ amdgpu_cs_p2_shadow(p, chunk);
+ break;
   }
   }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
   unsigned long   ras_counter_ce;
   unsigned long   ras_counter_ue;
   uint32_tstable_pstate;
+ boolshadow_initialized;
   };

   struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348dbe2..d88964b9407f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
   }

   amdgpu_ring_ib_begin(ring);
+
+ if (job && ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, job);
+
   if (job && ring->funcs->init_cond_exec)
   patch_offset = amdgpu_ring_init_cond_exec(ring);

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
   uint64_tuf_addr;
   uint64_tuf_sequence;

+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_tshadow_va;
+ uint64_tcsa_va;
+ uint64_tgds_va;
+ boolinit_shadow;
+
   /* 

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > From: Christian König 
> >
> > Add support for submitting the shadow update packet
> > when submitting an IB.  Needed for MCBP on GFX11.
> >
> > v2: update API for CSA (Alex)
> > v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >  amdgpu_cs_pass1()
> >  Only initialize shadow on first use
> >  (Alex)
> >
> > Signed-off-by: Christian König 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >   5 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f6144b378617..9bdda246b09c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >   break;
> >
> >   default:
> > @@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
> > amdgpu_cs_parser *p,
> >   return 0;
> >   }
> >
> > +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> > + struct amdgpu_cs_chunk *chunk)
> > +{
> > + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> > + bool shadow_initialized = false;
> > + int i;
> > +
> > + for (i = 0; i < p->gang_size; ++i) {
> > + p->jobs[i]->shadow_va = shadow->shadow_va;
> > + p->jobs[i]->csa_va = shadow->csa_va;
> > + p->jobs[i]->gds_va = shadow->gds_va;
>
> Do we really need all three VAs separately?

That is what the firmware uses.  Normally they are stored individually
as part of the MQD so this just matches that only on the fly for IBs.

Alex

>
> > + if (!p->ctx->shadow_initialized) {
> > + p->jobs[i]->init_shadow = true;
> > + shadow_initialized = true;
>
> > + }
> > + }
> > + if (shadow_initialized)
> > + p->ctx->shadow_initialized = true;
>
> This is a really bad idea since the IOCTL can be interrupted later on.
>
> Why do we need that?
>
> Regards,
> Christian.
>
>
> > +}
> > +
> >   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   {
> >   unsigned int ce_preempt = 0, de_preempt = 0;
> > @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   if (r)
> >   return r;
> >   break;
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> > + amdgpu_cs_p2_shadow(p, chunk);
> > + break;
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > index 0fa0e56daf67..909d188c41f2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >   unsigned long   ras_counter_ce;
> >   unsigned long   ras_counter_ue;
> >   uint32_tstable_pstate;
> > + boolshadow_initialized;
> >   };
> >
> >   struct amdgpu_ctx_mgr {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index b348dbe2..d88964b9407f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> > unsigned num_ibs,
> >   }
> >
> >   amdgpu_ring_ib_begin(ring);
> > +
> > + if (job && ring->funcs->emit_gfx_shadow)
> > + amdgpu_ring_emit_gfx_shadow(ring, job);
> > +
> >   if (job && ring->funcs->init_cond_exec)
> >   patch_offset = amdgpu_ring_init_cond_exec(ring);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > index 9790def34815..b470808fa40e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > @@ -68,6 +68,12 @@ struct amdgpu_job {
> >   uint64_tuf_addr;
> >   uint64_tuf_sequence;
> >
> > + /* virtual addresses for shadow/GDS/CSA */
> > + uint64_t

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Alex Deucher
On Mon, Mar 20, 2023 at 11:46 AM Christian König
 wrote:
>
> Am 17.03.23 um 18:17 schrieb Alex Deucher:
> > From: Christian König 
> >
> > Add support for submitting the shadow update packet
> > when submitting an IB.  Needed for MCBP on GFX11.
> >
> > v2: update API for CSA (Alex)
> > v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
> >  Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
> >  amdgpu_cs_pass1()
> >  Only initialize shadow on first use
> >  (Alex)
> >
> > Signed-off-by: Christian König 
> > Signed-off-by: Alex Deucher 
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
> >   5 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index f6144b378617..9bdda246b09c 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
> >   case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
> >   case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> >   break;
> >
> >   default:
> > @@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
> > amdgpu_cs_parser *p,
> >   return 0;
> >   }
> >
> > +static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
> > + struct amdgpu_cs_chunk *chunk)
> > +{
> > + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
> > + bool shadow_initialized = false;
> > + int i;
> > +
> > + for (i = 0; i < p->gang_size; ++i) {
> > + p->jobs[i]->shadow_va = shadow->shadow_va;
> > + p->jobs[i]->csa_va = shadow->csa_va;
> > + p->jobs[i]->gds_va = shadow->gds_va;
>
> Do we really need all three VAs separately?
>
> > + if (!p->ctx->shadow_initialized) {
> > + p->jobs[i]->init_shadow = true;
> > + shadow_initialized = true;
>
> > + }
> > + }
> > + if (shadow_initialized)
> > + p->ctx->shadow_initialized = true;
>
> This is a really bad idea since the IOCTL can be interrupted later on.
>
> Why do we need that?

Yes.  We have to only initial the buffer the first time it's used.
Doing it again will overwrite whatever userspace has done with it
since then.

Alex

>
> Regards,
> Christian.
>
>
> > +}
> > +
> >   static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   {
> >   unsigned int ce_preempt = 0, de_preempt = 0;
> > @@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
> >   if (r)
> >   return r;
> >   break;
> > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
> > + amdgpu_cs_p2_shadow(p, chunk);
> > + break;
> >   }
> >   }
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > index 0fa0e56daf67..909d188c41f2 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
> > @@ -57,6 +57,7 @@ struct amdgpu_ctx {
> >   unsigned long   ras_counter_ce;
> >   unsigned long   ras_counter_ue;
> >   uint32_tstable_pstate;
> > + boolshadow_initialized;
> >   };
> >
> >   struct amdgpu_ctx_mgr {
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > index b348dbe2..d88964b9407f 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> > @@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, 
> > unsigned num_ibs,
> >   }
> >
> >   amdgpu_ring_ib_begin(ring);
> > +
> > + if (job && ring->funcs->emit_gfx_shadow)
> > + amdgpu_ring_emit_gfx_shadow(ring, job);
> > +
> >   if (job && ring->funcs->init_cond_exec)
> >   patch_offset = amdgpu_ring_init_cond_exec(ring);
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h 
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > index 9790def34815..b470808fa40e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
> > @@ -68,6 +68,12 @@ struct amdgpu_job {
> >   uint64_tuf_addr;
> >   uint64_tuf_sequence;
> >
> > + /* virtual addresses for shadow/GDS/CSA */
> > + uint64_t

Re: [PATCH 03/10] drm/amdgpu: add gfx shadow CS IOCTL support

2023-03-20 Thread Christian König

Am 17.03.23 um 18:17 schrieb Alex Deucher:

From: Christian König 

Add support for submitting the shadow update packet
when submitting an IB.  Needed for MCBP on GFX11.

v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
 Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
 amdgpu_cs_pass1()
 Only initialize shadow on first use
 (Alex)

Signed-off-by: Christian König 
Signed-off-by: Alex Deucher 
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   | 24 
  drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h  |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c   |  4 
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.h  |  6 ++
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  2 ++
  5 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
  
  		default:

@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct 
amdgpu_cs_parser *p,
return 0;
  }
  
+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,

+   struct amdgpu_cs_chunk *chunk)
+{
+   struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+   bool shadow_initialized = false;
+   int i;
+
+   for (i = 0; i < p->gang_size; ++i) {
+   p->jobs[i]->shadow_va = shadow->shadow_va;
+   p->jobs[i]->csa_va = shadow->csa_va;
+   p->jobs[i]->gds_va = shadow->gds_va;


Do we really need all three VAs separately?


+   if (!p->ctx->shadow_initialized) {
+   p->jobs[i]->init_shadow = true;
+   shadow_initialized = true;



+   }
+   }
+   if (shadow_initialized)
+   p->ctx->shadow_initialized = true;


This is a really bad idea since the IOCTL can be interrupted later on.

Why do we need that?

Regards,
Christian.



+}
+
  static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
  {
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+   case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+   amdgpu_cs_p2_shadow(p, chunk);
+   break;
}
}
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h

index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long   ras_counter_ce;
unsigned long   ras_counter_ue;
uint32_tstable_pstate;
+   boolshadow_initialized;
  };
  
  struct amdgpu_ctx_mgr {

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b348dbe2..d88964b9407f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned 
num_ibs,
}
  
  	amdgpu_ring_ib_begin(ring);

+
+   if (job && ring->funcs->emit_gfx_shadow)
+   amdgpu_ring_emit_gfx_shadow(ring, job);
+
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h

index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
uint64_tuf_addr;
uint64_tuf_sequence;
  
+	/* virtual addresses for shadow/GDS/CSA */

+   uint64_tshadow_va;
+   uint64_tcsa_va;
+   uint64_tgds_va;
+   boolinit_shadow;
+
/* job_run_counter >= 1 means a resubmit job */
uint32_tjob_run_counter;
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h

index 3989e755a5b4..8643d4a92c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -212,6 +212,7 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);