Re: [PATCH] drm/nouveau: Add support for clockgating on Fermi+
On Wed, 2017-04-26 at 00:49 +0200, Karol Herbst wrote: > Hi Lyude, > > thanks for the great work. Just a view comments inline. > > 2017-04-25 20:38 GMT+02:00 Lyude : > > This adds support for enabling automatic clockgating on nvidia GPUs > > for > > Fermi and later generations. This saves a little bit of power, > > bringing > > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and > > my > > kepler's idle power consumption from ~23.6W to ~21.65W. > > > > Similar to how the nvidia driver seems to handle this, we enable > > clockgating for each engine that supports it after it's > > initialization. > > > > Signed-off-by: Lyude > > --- > > .../gpu/drm/nouveau/include/nvkm/subdev/therm.h| 4 ++ > > drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +- > > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + > > .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49 > > ++ > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 > > ++ > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ > > 11 files changed, 175 insertions(+), 9 deletions(-) > > create mode 100644 > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > > create mode 100644 > > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > index b268b96..904aa56 100644 > > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > > @@ -84,6 +84,9 @@ struct nvkm_therm { > > > > int (*attr_get)(struct nvkm_therm *, enum > > nvkm_therm_attr_type); > > int (*attr_set)(struct nvkm_therm *, enum > > nvkm_therm_attr_type, int); > > + > > + int (*clkgate_engine)(struct nvkm_therm *, enum > > nvkm_devidx); > > + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool > > enable); > > remove those and have a simple "nvkm_therm_clkgate_engine" function > > This way you know that every user calls this function and don't have > to check for silly function pointers like you currently do in > engine.c > > > }; > > > > int nvkm_therm_temp_get(struct nvkm_therm *); > > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, > > struct nvkm_therm **); > > int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm > > **); > > #endif > > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > index b6c9169..473ad3e 100644 > > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > > @@ -26,6 +26,7 @@ > > #include > > > > #include > > +#include > > > > bool > > nvkm_engine_chsw_load(struct nvkm_engine *engine) > > @@ -86,6 +87,13 @@ static int > > nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) > > { > > struct nvkm_engine *engine = nvkm_engine(subdev); > > + struct nvkm_therm *therm = subdev->device->therm; > > + int gate_idx; > > + > > + gate_idx = therm->clkgate_engine(therm, subdev->index); > > + if (gate_idx != -1) > > + therm->clkgate_set(therm, gate_idx, false); > > + > > move this code inside "nvkm_therm_clkgate_engine". Nobody outside > nvkm_therm should even care about the index. > > > if (engine->func->fini) > > return engine->func->fini(engine, suspend); > > return 0; > > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > { > > struct nvkm_engine *engine = nvkm_engine(subdev); > > struct nvkm_fb *fb = subdev->device->fb; > > + struct nvkm_therm *therm = subdev->device->therm; > > int ret = 0, i; > > s64 time; > > > > if (!engine->usecount) { > > nvkm_trace(subdev, "init skipped, engine has no > > users\n"); > > - return ret; > > + goto finish; > > } > > > > if (engine->func->oneinit && !engine->subdev.oneinit) { > > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > > > for (i = 0; fb && i < fb->tile.regions; i++) > > nvkm_engine_tile(engine, i); > > + > > +
Re: [PATCH] drm/nouveau: Add support for clockgating on Fermi+
Hi Lyude, thanks for the great work. Just a view comments inline. 2017-04-25 20:38 GMT+02:00 Lyude : > This adds support for enabling automatic clockgating on nvidia GPUs for > Fermi and later generations. This saves a little bit of power, bringing > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my > kepler's idle power consumption from ~23.6W to ~21.65W. > > Similar to how the nvidia driver seems to handle this, we enable > clockgating for each engine that supports it after it's initialization. > > Signed-off-by: Lyude > --- > .../gpu/drm/nouveau/include/nvkm/subdev/therm.h| 4 ++ > drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +- > drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + > .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49 ++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 > ++ > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- > drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ > 11 files changed, 175 insertions(+), 9 deletions(-) > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c > create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c > > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > index b268b96..904aa56 100644 > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h > @@ -84,6 +84,9 @@ struct nvkm_therm { > > int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); > int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); > + > + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); > + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable); remove those and have a simple "nvkm_therm_clkgate_engine" function This way you know that every user calls this function and don't have to check for silly function pointers like you currently do in engine.c > }; > > int nvkm_therm_temp_get(struct nvkm_therm *); > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct > nvkm_therm **); > int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); > #endif > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > index b6c9169..473ad3e 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c > @@ -26,6 +26,7 @@ > #include > > #include > +#include > > bool > nvkm_engine_chsw_load(struct nvkm_engine *engine) > @@ -86,6 +87,13 @@ static int > nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > + struct nvkm_therm *therm = subdev->device->therm; > + int gate_idx; > + > + gate_idx = therm->clkgate_engine(therm, subdev->index); > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, false); > + move this code inside "nvkm_therm_clkgate_engine". Nobody outside nvkm_therm should even care about the index. > if (engine->func->fini) > return engine->func->fini(engine, suspend); > return 0; > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > { > struct nvkm_engine *engine = nvkm_engine(subdev); > struct nvkm_fb *fb = subdev->device->fb; > + struct nvkm_therm *therm = subdev->device->therm; > int ret = 0, i; > s64 time; > > if (!engine->usecount) { > nvkm_trace(subdev, "init skipped, engine has no users\n"); > - return ret; > + goto finish; > } > > if (engine->func->oneinit && !engine->subdev.oneinit) { > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) > > for (i = 0; fb && i < fb->tile.regions; i++) > nvkm_engine_tile(engine, i); > + > +finish: > + if (!ret) { > + int gate_idx = therm->clkgate_engine(therm, subdev->index); > + > + if (gate_idx != -1) > + therm->clkgate_set(therm, gate_idx, true); > + } > + same code as above. More code sharing! > return ret; > } > > diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/devic
[PATCH] drm/nouveau: Add support for clockgating on Fermi+
This adds support for enabling automatic clockgating on nvidia GPUs for Fermi and later generations. This saves a little bit of power, bringing my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my kepler's idle power consumption from ~23.6W to ~21.65W. Similar to how the nvidia driver seems to handle this, we enable clockgating for each engine that supports it after it's initialization. Signed-off-by: Lyude --- .../gpu/drm/nouveau/include/nvkm/subdev/therm.h| 4 ++ drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +- drivers/gpu/drm/nouveau/nvkm/engine/device/base.c | 14 ++-- drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c | 2 + .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49 ++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c | 77 ++ drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c | 2 + drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c | 2 +- drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h | 10 +++ 11 files changed, 175 insertions(+), 9 deletions(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h index b268b96..904aa56 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h @@ -84,6 +84,9 @@ struct nvkm_therm { int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type); int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int); + + int (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx); + void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable); }; int nvkm_therm_temp_get(struct nvkm_therm *); @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **); +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **); int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **); #endif diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c index b6c9169..473ad3e 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c @@ -26,6 +26,7 @@ #include #include +#include bool nvkm_engine_chsw_load(struct nvkm_engine *engine) @@ -86,6 +87,13 @@ static int nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend) { struct nvkm_engine *engine = nvkm_engine(subdev); + struct nvkm_therm *therm = subdev->device->therm; + int gate_idx; + + gate_idx = therm->clkgate_engine(therm, subdev->index); + if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, false); + if (engine->func->fini) return engine->func->fini(engine, suspend); return 0; @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev) { struct nvkm_engine *engine = nvkm_engine(subdev); struct nvkm_fb *fb = subdev->device->fb; + struct nvkm_therm *therm = subdev->device->therm; int ret = 0, i; s64 time; if (!engine->usecount) { nvkm_trace(subdev, "init skipped, engine has no users\n"); - return ret; + goto finish; } if (engine->func->oneinit && !engine->subdev.oneinit) { @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev) for (i = 0; fb && i < fb->tile.regions; i++) nvkm_engine_tile(engine, i); + +finish: + if (!ret) { + int gate_idx = therm->clkgate_engine(therm, subdev->index); + + if (gate_idx != -1) + therm->clkgate_set(therm, gate_idx, true); + } + return ret; } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index b690bc1..d133016 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1355,7 +1355,7 @@ nvc0_chipset = { .mxm = nv50_mxm_new, .pci = gf100_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .therm = gf100_therm_new, .timer = nv41_timer_new, .volt = gf100_volt_new, .ce[0] = gf100_ce_new, @@ -1392,7 +1392,7 @@ nvc1_chipset = { .mxm = nv50_mxm_new, .pci = gf106_pci_new, .pmu = gf100_pmu_new, - .therm = gt215_therm_new, + .t