On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote: > There are some special registers which are accessible even when GX power > domain is collapsed during an IFPC sleep. Accessing these registers > wakes up GPU from power collapse and allow programming these registers > without additional handshake with GMU. This patch adds support for this > special register write sequence. > > Signed-off-by: Akhil P Oommen <akhi...@oss.qualcomm.com> > --- > drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 63 > ++++++++++++++++++++++++++++++- > drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 + > drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++----- > 3 files changed, 73 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > index > 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c > @@ -16,6 +16,67 @@ > > #define GPU_PAS_ID 13 > > +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, > u32 status, u32 mask) > +{ > + /* Success if !writedropped0/1 */ > + if (!(status & mask)) > + return true; > + > + udelay(10);
Why do we need udelay() here? Why can't we use interval setting inside gmu_poll_timeout()? > + > + /* Try to update fenced register again */ > + gpu_write(gpu, offset, value); > + return false; > +} > + > +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, > u32 mask) > +{ > + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; > + struct msm_gpu *gpu = &adreno_gpu->base; > + struct a6xx_gmu *gmu = &a6xx_gpu->gmu; > + u32 status; > + > + gpu_write(gpu, offset, value); > + > + /* Nothing else to be done in the case of no-GMU */ > + if (adreno_has_gmu_wrapper(adreno_gpu)) > + return 0; > + > + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, > + fence_status_check(gpu, offset, value, status, mask), > 0, 1000)) > + return 0; > + > + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n", > + offset); > + > + /* Try again for another 1ms before failing */ > + gpu_write(gpu, offset, value); > + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status, > + fence_status_check(gpu, offset, value, status, mask), > 0, 1000)) > + return 0; > + > + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n", > + offset); > + > + return -ETIMEDOUT; > +} > + > +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 > mask, bool is_64b) > +{ > + int ret; > + > + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask); > + if (ret) > + return ret; > + > + if (!is_64b) > + return 0; > + > + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask); no need for a separate ret assignment. > + > + return ret; > +} > + > static inline bool _a6xx_check_idle(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > @@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct > msm_ringbuffer *ring) > /* Update HW if this is the current ring and we are not in preempt*/ > if (!a6xx_in_preempt(a6xx_gpu)) { > if (a6xx_gpu->cur_ring == ring) > - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, > BIT(0), false); I can't stop but notice that we don't handle a6xx_fenced_write() errors. Is it fine? Or will it result in some sort of crash / reset? > else > ring->restore_wptr = true; > } else { > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > index > 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h > @@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state); > > void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool > gx_off); > void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert); > +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, > bool is_64b); > > #endif /* __A6XX_GPU_H__ */ > diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > index > 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187 > 100644 > --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c > @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu, > } > > /* Write the most recent wptr for the given ring into the hardware */ > -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer > *ring) > +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct > msm_ringbuffer *ring) > { > unsigned long flags; > uint32_t wptr; > @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct > msm_ringbuffer *ring) > if (ring->restore_wptr) { > wptr = get_wptr(ring); > > - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), > false); > > ring->restore_wptr = false; > } > @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu) > > set_preempt_state(a6xx_gpu, PREEMPT_FINISH); > > - update_wptr(gpu, a6xx_gpu->cur_ring); > + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); > > set_preempt_state(a6xx_gpu, PREEMPT_NONE); > > @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > */ > if (!ring || (a6xx_gpu->cur_ring == ring)) { > set_preempt_state(a6xx_gpu, PREEMPT_FINISH); > - update_wptr(gpu, a6xx_gpu->cur_ring); > + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring); > set_preempt_state(a6xx_gpu, PREEMPT_NONE); > spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags); > return; > @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > > spin_unlock_irqrestore(&ring->preempt_lock, flags); > > - gpu_write64(gpu, > - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, > - a6xx_gpu->preempt_smmu_iova[ring->id]); > + a6xx_fenced_write(a6xx_gpu, > + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, > a6xx_gpu->preempt_smmu_iova[ring->id], > + BIT(1), true); > > - gpu_write64(gpu, > + a6xx_fenced_write(a6xx_gpu, > REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR, > - a6xx_gpu->preempt_iova[ring->id]); > + a6xx_gpu->preempt_iova[ring->id], BIT(1), true); > > a6xx_gpu->next_ring = ring; > > @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu) > set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED); > > /* Trigger the preemption */ > - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl); > + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, > BIT(1), false); > } > > static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu, > > -- > 2.50.1 > -- With best wishes Dmitry