The pm_runtime_put() we were using immediately released power on the device, which meant that we were generally turning the device off and on once per frame. In many profiles I've looked at, that added up to about 1% of CPU time, but this could get worse in the case of frequent rendering and readback (as may happen in X rendering). By keeping the device on until we've been idle for a couple of frames, we drop the overhead of runtime PM down to sub-.1%.
Signed-off-by: Eric Anholt <e...@anholt.net> --- drivers/gpu/drm/vc4/vc4_drv.c | 9 ++++++--- drivers/gpu/drm/vc4/vc4_gem.c | 6 ++++-- drivers/gpu/drm/vc4/vc4_v3d.c | 2 ++ 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index b087404c2784..7abfe088f2d1 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -61,21 +61,24 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data, if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT0); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT1: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT1); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_V3D_IDENT2: ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev); if (ret < 0) return ret; args->value = V3D_READ(V3D_IDENT2); - pm_runtime_put(&vc4->v3d->pdev->dev); + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); break; case DRM_VC4_PARAM_SUPPORTS_BRANCHES: case DRM_VC4_PARAM_SUPPORTS_ETC1: diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 303f23c96220..db920771bfb5 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -709,8 +709,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec) } mutex_lock(&vc4->power_lock); - if (--vc4->power_refcount == 0) - pm_runtime_put(&vc4->v3d->pdev->dev); + if (--vc4->power_refcount == 0) { + pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev); + pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev); + } mutex_unlock(&vc4->power_lock); kfree(exec); diff --git a/drivers/gpu/drm/vc4/vc4_v3d.c b/drivers/gpu/drm/vc4/vc4_v3d.c index e6d3c6028341..7cc346ad9b0b 100644 --- a/drivers/gpu/drm/vc4/vc4_v3d.c +++ b/drivers/gpu/drm/vc4/vc4_v3d.c @@ -222,6 +222,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data) return ret; } + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */ pm_runtime_enable(dev); return 0; -- 2.10.2