On Thu, Sep 25, 2025 at 9:19 AM Christian König <[email protected]> wrote: > > There has been multiple complains that 10 seconds are usually to long. > > The original requirement for longer timeout came from compute tests on > AMDVLK, since that is no longer a topic reduce the timeout back to 2 > seconds for all queues. > > While at it also remove any special handling for compute queues under > SRIOV or pass through. > > Signed-off-by: Christian König <[email protected]>
Reviewed-by: Alex Deucher <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 85 ++++++++++------------ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 21 ++---- > 2 files changed, 48 insertions(+), 58 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index a77000c2e0bb..ceb3c616292c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -4278,58 +4278,53 @@ static int > amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) > long timeout; > int ret = 0; > > - /* > - * By default timeout for jobs is 10 sec > - */ > - adev->compute_timeout = adev->gfx_timeout = msecs_to_jiffies(10000); > - adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; > + /* By default timeout for all queues is 2 sec */ > + adev->gfx_timeout = adev->compute_timeout = adev->sdma_timeout = > + adev->video_timeout = msecs_to_jiffies(2000); > > - if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { > - while ((timeout_setting = strsep(&input, ",")) && > - strnlen(timeout_setting, > AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { > - ret = kstrtol(timeout_setting, 0, &timeout); > - if (ret) > - return ret; > + if (!strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) > + return 0; > > - if (timeout == 0) { > - index++; > - continue; > - } else if (timeout < 0) { > - timeout = MAX_SCHEDULE_TIMEOUT; > - dev_warn(adev->dev, "lockup timeout > disabled"); > - add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); > - } else { > - timeout = msecs_to_jiffies(timeout); > - } > + while ((timeout_setting = strsep(&input, ",")) && > + strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { > + ret = kstrtol(timeout_setting, 0, &timeout); > + if (ret) > + return ret; > > - switch (index++) { > - case 0: > - adev->gfx_timeout = timeout; > - break; > - case 1: > - adev->compute_timeout = timeout; > - break; > - case 2: > - adev->sdma_timeout = timeout; > - break; > - case 3: > - adev->video_timeout = timeout; > - break; > - default: > - break; > - } > + if (timeout == 0) { > + index++; > + continue; > + } else if (timeout < 0) { > + timeout = MAX_SCHEDULE_TIMEOUT; > + dev_warn(adev->dev, "lockup timeout disabled"); > + add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); > + } else { > + timeout = msecs_to_jiffies(timeout); > } > - /* > - * There is only one value specified and > - * it should apply to all non-compute jobs. > - */ > - if (index == 1) { > - adev->sdma_timeout = adev->video_timeout = > adev->gfx_timeout; > - if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) > - adev->compute_timeout = adev->gfx_timeout; > + > + switch (index++) { > + case 0: > + adev->gfx_timeout = timeout; > + break; > + case 1: > + adev->compute_timeout = timeout; > + break; > + case 2: > + adev->sdma_timeout = timeout; > + break; > + case 3: > + adev->video_timeout = timeout; > + break; > + default: > + break; > } > } > > + /* When only one value specified apply it to all queues. */ > + if (index == 1) > + adev->gfx_timeout = adev->compute_timeout = > adev->sdma_timeout = > + adev->video_timeout = timeout; > + > return ret; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index ece251cbe8c3..fe45dd1d979e 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -353,22 +353,17 @@ module_param_named(svm_default_granularity, > amdgpu_svm_default_granularity, uint > * DOC: lockup_timeout (string) > * Set GPU scheduler timeout value in ms. > * > - * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is > there can be one or > - * multiple values specified. 0 and negative values are invalidated. They > will be adjusted > - * to the default timeout. > + * The format can be [single value] for setting all timeouts at once or > + * [GFX,Compute,SDMA,Video] to set individual timeouts. > + * Negative values mean infinity. > * > - * - With one value specified, the setting will apply to all non-compute > jobs. > - * - With multiple values specified, the first one will be for GFX. > - * The second one is for Compute. The third and fourth ones are > - * for SDMA and Video. > - * > - * By default(with no lockup_timeout settings), the timeout for all jobs is > 10000. > + * By default(with no lockup_timeout settings), the timeout for all queues > is 2000. > */ > MODULE_PARM_DESC(lockup_timeout, > - "GPU lockup timeout in ms (default: 10000 for all jobs. " > - "0: keep default value. negative: infinity timeout), format: > for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " > - "for passthrough or sriov [all jobs] or > [GFX,Compute,SDMA,Video]."); > -module_param_string(lockup_timeout, amdgpu_lockup_timeout, > sizeof(amdgpu_lockup_timeout), 0444); > + "GPU lockup timeout in ms (default: 2000 for all queues. " > + "0: keep default value. negative: infinity timeout), format: > [single value for all] or [GFX,Compute,SDMA,Video]."); > +module_param_string(lockup_timeout, amdgpu_lockup_timeout, > + sizeof(amdgpu_lockup_timeout), 0444); > > /** > * DOC: dpm (int) > -- > 2.43.0 >
