El jue, 05-03-2026 a las 09:34 +0000, Tvrtko Ursulin escribió:
>
> On 05/03/2026 09:15, Iago Toral wrote:
> > El mar, 17-02-2026 a las 09:18 -0300, Maíra Canal escribió:
> > > From: Tvrtko Ursulin <[email protected]>
> > >
> > > To remove the file_priv NULL-ing dance needed to check if the
> > > file
> > > descriptor is open, move the per-fd reset counter into v3d_stats,
> > > which
> > > is heap-allocated and refcounted, outliving the fd as long as
> > > jobs
> > > reference it.
> > >
> > > This change allows the removal of the last `queue_lock` usage to
> > > protect
> > > `job->file_priv` and avoids possible NULL ptr dereference issues
> > > due
> > > to
> > > lifetime mismatches.
> > >
> > > Also, to simplify locking, replace both the global and per-fd
> > > locked
> > > reset counters with atomics.
> > >
> > > Signed-off-by: Tvrtko Ursulin <[email protected]>
> > > Co-developed-by: Maíra Canal <[email protected]>
> > > Signed-off-by: Maíra Canal <[email protected]>
> > > ---
> > > drivers/gpu/drm/v3d/v3d_drv.c | 20 ++++----------------
> > > drivers/gpu/drm/v3d/v3d_drv.h | 14 ++++----------
> > > drivers/gpu/drm/v3d/v3d_sched.c | 9 ++-------
> > > 3 files changed, 10 insertions(+), 33 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.c
> > > b/drivers/gpu/drm/v3d/v3d_drv.c
> > > index
> > > aafb402c6ac3118a57df9fc0a0d21d35d48e3b2c..4e77f4808145df21746ff4b
> > > 7058
> > > 089d0d161e3fc 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_drv.c
> > > +++ b/drivers/gpu/drm/v3d/v3d_drv.c
> > > @@ -110,13 +110,13 @@ static int v3d_get_param_ioctl(struct
> > > drm_device *dev, void *data,
> > > args->value = !!drm_gem_get_huge_mnt(dev);
> > > return 0;
> > > case DRM_V3D_PARAM_GLOBAL_RESET_COUNTER:
> > > - mutex_lock(&v3d->reset_lock);
> > > - args->value = v3d->reset_counter;
> > > - mutex_unlock(&v3d->reset_lock);
> > > + args->value = atomic_read(&v3d->reset_counter);
> >
> > Don't we still need to take the reset lock here? Otherwise there
> > would
> > be a chance that we read the counter while a reset is in flight,
> > no?
>
> I don't see that it would make a difference but maybe I am not seeing
> your concern. It uses atomic_t so the increment versus read is fine.
> Are
> you maybe saying the v3d ABI guarantees reset is 100% done (so not in
> progress, for some definition of progress, because hardware reset is
> done by then, only re-submit and re-start of the software state is
> poending) if userspace observes an increased global reset counter?
> That
> would be surprising and I don't see how it could make a practical
> difference, but perhaps could be mitigated by moving the atomic_inc
> to
> the end of v3d_gpu_reset_for_timeout(). Or still taking the lock as
> you say.
My concern is just that it is possible for the query and the reset to
race and that I think it would make sense for the counter query to
include in-flight resets (since what apps really care about is whether
a GPU reset happened not if it completed the reset process).
Iago
>
> Regards,
>
> Tvrtko
>
> > > return 0;
> > > case DRM_V3D_PARAM_CONTEXT_RESET_COUNTER:
> > > mutex_lock(&v3d->reset_lock);
> > > - args->value = v3d_priv->reset_counter;
> > > + args->value = 0;
> > > + for (enum v3d_queue q = 0; q < V3D_MAX_QUEUES;
> > > q++)
> > > + args->value += atomic_read(&v3d_priv-
> > > > stats[q]->reset_counter);
> > > mutex_unlock(&v3d->reset_lock);
> > > return 0;
> > > default:
> > > @@ -173,23 +173,11 @@ v3d_open(struct drm_device *dev, struct
> > > drm_file *file)
> > > static void
> > > v3d_postclose(struct drm_device *dev, struct drm_file *file)
> > > {
> > > - struct v3d_dev *v3d = to_v3d_dev(dev);
> > > struct v3d_file_priv *v3d_priv = file->driver_priv;
> > > - unsigned long irqflags;
> > > enum v3d_queue q;
> > >
> > > for (q = 0; q < V3D_MAX_QUEUES; q++) {
> > > - struct v3d_queue_state *queue = &v3d->queue[q];
> > > - struct v3d_job *job = queue->active_job;
> > > -
> > > drm_sched_entity_destroy(&v3d_priv-
> > > > sched_entity[q]);
> > > -
> > > - if (job && job->base.entity == &v3d_priv-
> > > > sched_entity[q]) {
> > > - spin_lock_irqsave(&queue->queue_lock,
> > > irqflags);
> > > - job->file_priv = NULL;
> > > - spin_unlock_irqrestore(&queue-
> > > >queue_lock,
> > > irqflags);
> > > - }
> > > -
> > > v3d_stats_put(v3d_priv->stats[q]);
> > > }
> > >
> > > diff --git a/drivers/gpu/drm/v3d/v3d_drv.h
> > > b/drivers/gpu/drm/v3d/v3d_drv.h
> > > index
> > > 72c3f40715dae6e86e0c8356cb997cdf1cf03fae..3de485abd8fc274b361cd17
> > > a00c
> > > ab189d8e69643 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_drv.h
> > > +++ b/drivers/gpu/drm/v3d/v3d_drv.h
> > > @@ -55,6 +55,8 @@ struct v3d_stats {
> > > * job queues, even the write side never is.
> > > */
> > > seqcount_t lock;
> > > +
> > > + atomic_t reset_counter;
> > > };
> > >
> > > struct v3d_queue_state {
> > > @@ -203,10 +205,8 @@ struct v3d_dev {
> > > */
> > > struct v3d_perfmon *global_perfmon;
> > >
> > > - /* Global reset counter. The counter must be incremented
> > > when
> > > - * a GPU reset happens. It must be protected by
> > > @reset_lock.
> > > - */
> > > - unsigned int reset_counter;
> > > + /* Global reset counter incremented on each GPU reset.
> > > */
> > > + atomic_t reset_counter;
> > > };
> > >
> > > static inline struct v3d_dev *
> > > @@ -233,12 +233,6 @@ struct v3d_file_priv {
> > >
> > > /* Stores the GPU stats for a specific queue for this
> > > fd. */
> > > struct v3d_stats *stats[V3D_MAX_QUEUES];
> > > -
> > > - /* Per-fd reset counter, must be incremented when a job
> > > submitted
> > > - * by this fd causes a GPU reset. It must be protected
> > > by
> > > - * &struct v3d_dev->reset_lock.
> > > - */
> > > - unsigned int reset_counter;
> > > };
> > >
> > > struct v3d_bo {
> > > diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
> > > b/drivers/gpu/drm/v3d/v3d_sched.c
> > > index
> > > 4adbf5175eb005b37d1feac1514150630ce6aab2..de6497741ff789b5de9212a
> > > e3e9
> > > 941a13cd0475d 100644
> > > --- a/drivers/gpu/drm/v3d/v3d_sched.c
> > > +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> > > @@ -701,8 +701,6 @@ v3d_gpu_reset_for_timeout(struct v3d_dev
> > > *v3d,
> > > struct drm_sched_job *sched_job,
> > > enum v3d_queue q)
> > > {
> > > struct v3d_job *job = to_v3d_job(sched_job);
> > > - struct v3d_file_priv *v3d_priv = job->file_priv;
> > > - unsigned long irqflags;
> > > enum v3d_queue i;
> > >
> > > mutex_lock(&v3d->reset_lock);
> > > @@ -717,11 +715,8 @@ v3d_gpu_reset_for_timeout(struct v3d_dev
> > > *v3d,
> > > struct drm_sched_job *sched_job,
> > > /* get the GPU back into the init state */
> > > v3d_reset(v3d);
> > >
> > > - v3d->reset_counter++;
> > > - spin_lock_irqsave(&v3d->queue[q].queue_lock, irqflags);
> > > - if (v3d_priv)
> > > - v3d_priv->reset_counter++;
> > > - spin_unlock_irqrestore(&v3d->queue[q].queue_lock,
> > > irqflags);
> > > + atomic_inc(&v3d->reset_counter);
> > > + atomic_inc(&job->client_stats->reset_counter);
> > >
> > > for (i = 0; i < V3D_MAX_QUEUES; i++)
> > > drm_sched_resubmit_jobs(&v3d->queue[i].sched);
> > >
> >
>
>