Re: [PATCH] drm/nouveau: Add support for clockgating on Fermi+

2017-04-26 Thread Lyude Paul
On Wed, 2017-04-26 at 00:49 +0200, Karol Herbst wrote:
> Hi Lyude,
> 
> thanks for the great work. Just a view comments inline.
> 
> 2017-04-25 20:38 GMT+02:00 Lyude :
> > This adds support for enabling automatic clockgating on nvidia GPUs
> > for
> > Fermi and later generations. This saves a little bit of power,
> > bringing
> > my fermi GPU's power consumption from ~28.3W on idle to ~27W, and
> > my
> > kepler's idle power consumption from ~23.6W to ~21.65W.
> > 
> > Similar to how the nvidia driver seems to handle this, we enable
> > clockgating for each engine that supports it after it's
> > initialization.
> > 
> > Signed-off-by: Lyude 
> > ---
> >  .../gpu/drm/nouveau/include/nvkm/subdev/therm.h|  4 ++
> >  drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +-
> >  drivers/gpu/drm/nouveau/nvkm/engine/device/base.c  | 14 ++--
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild   |  2 +
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c   |  2 +
> >  .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49
> > ++
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c  | 77
> > ++
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c  |  2 +
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c  |  2 +
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c  |  2 +-
> >  drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h   | 10 +++
> >  11 files changed, 175 insertions(+), 9 deletions(-)
> >  create mode 100644
> > drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c
> >  create mode 100644
> > drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c
> > 
> > diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> > b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> > index b268b96..904aa56 100644
> > --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> > +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> > @@ -84,6 +84,9 @@ struct nvkm_therm {
> > 
> > int (*attr_get)(struct nvkm_therm *, enum
> > nvkm_therm_attr_type);
> > int (*attr_set)(struct nvkm_therm *, enum
> > nvkm_therm_attr_type, int);
> > +
> > +   int  (*clkgate_engine)(struct nvkm_therm *, enum
> > nvkm_devidx);
> > +   void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool
> > enable);
> 
> remove those and have a simple "nvkm_therm_clkgate_engine" function
> 
> This way you know that every user calls this function and don't have
> to check for silly function pointers like you currently do in
> engine.c
> 
> >  };
> > 
> >  int nvkm_therm_temp_get(struct nvkm_therm *);
> > @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int,
> > struct nvkm_therm **);
> >  int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> >  int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> >  int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> > +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> >  int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> >  int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm
> > **);
> >  #endif
> > diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> > b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> > index b6c9169..473ad3e 100644
> > --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> > +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> > @@ -26,6 +26,7 @@
> >  #include 
> > 
> >  #include 
> > +#include 
> > 
> >  bool
> >  nvkm_engine_chsw_load(struct nvkm_engine *engine)
> > @@ -86,6 +87,13 @@ static int
> >  nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend)
> >  {
> > struct nvkm_engine *engine = nvkm_engine(subdev);
> > +   struct nvkm_therm *therm = subdev->device->therm;
> > +   int gate_idx;
> > +
> > +   gate_idx = therm->clkgate_engine(therm, subdev->index);
> > +   if (gate_idx != -1)
> > +   therm->clkgate_set(therm, gate_idx, false);
> > +
> 
> move this code inside "nvkm_therm_clkgate_engine". Nobody outside
> nvkm_therm should even care about the index.
> 
> > if (engine->func->fini)
> > return engine->func->fini(engine, suspend);
> > return 0;
> > @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
> >  {
> > struct nvkm_engine *engine = nvkm_engine(subdev);
> > struct nvkm_fb *fb = subdev->device->fb;
> > +   struct nvkm_therm *therm = subdev->device->therm;
> > int ret = 0, i;
> > s64 time;
> > 
> > if (!engine->usecount) {
> > nvkm_trace(subdev, "init skipped, engine has no
> > users\n");
> > -   return ret;
> > +   goto finish;
> > }
> > 
> > if (engine->func->oneinit && !engine->subdev.oneinit) {
> > @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
> > 
> > for (i = 0; fb && i < fb->tile.regions; i++)
> > nvkm_engine_tile(engine, i);
> > +
> > +

Re: [PATCH] drm/nouveau: Add support for clockgating on Fermi+

2017-04-25 Thread Karol Herbst
Hi Lyude,

thanks for the great work. Just a view comments inline.

2017-04-25 20:38 GMT+02:00 Lyude :
> This adds support for enabling automatic clockgating on nvidia GPUs for
> Fermi and later generations. This saves a little bit of power, bringing
> my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my
> kepler's idle power consumption from ~23.6W to ~21.65W.
>
> Similar to how the nvidia driver seems to handle this, we enable
> clockgating for each engine that supports it after it's initialization.
>
> Signed-off-by: Lyude 
> ---
>  .../gpu/drm/nouveau/include/nvkm/subdev/therm.h|  4 ++
>  drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +-
>  drivers/gpu/drm/nouveau/nvkm/engine/device/base.c  | 14 ++--
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild   |  2 +
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c   |  2 +
>  .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49 ++
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c  | 77 
> ++
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c  |  2 +
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c  |  2 +
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c  |  2 +-
>  drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h   | 10 +++
>  11 files changed, 175 insertions(+), 9 deletions(-)
>  create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c
>  create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c
>
> diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h 
> b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> index b268b96..904aa56 100644
> --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
> @@ -84,6 +84,9 @@ struct nvkm_therm {
>
> int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type);
> int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
> +
> +   int  (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx);
> +   void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable);

remove those and have a simple "nvkm_therm_clkgate_engine" function

This way you know that every user calls this function and don't have
to check for silly function pointers like you currently do in engine.c

>  };
>
>  int nvkm_therm_temp_get(struct nvkm_therm *);
> @@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct 
> nvkm_therm **);
>  int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
>  int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
>  int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
> +int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
>  int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
>  int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
>  #endif
> diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c 
> b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> index b6c9169..473ad3e 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
> @@ -26,6 +26,7 @@
>  #include 
>
>  #include 
> +#include 
>
>  bool
>  nvkm_engine_chsw_load(struct nvkm_engine *engine)
> @@ -86,6 +87,13 @@ static int
>  nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend)
>  {
> struct nvkm_engine *engine = nvkm_engine(subdev);
> +   struct nvkm_therm *therm = subdev->device->therm;
> +   int gate_idx;
> +
> +   gate_idx = therm->clkgate_engine(therm, subdev->index);
> +   if (gate_idx != -1)
> +   therm->clkgate_set(therm, gate_idx, false);
> +

move this code inside "nvkm_therm_clkgate_engine". Nobody outside
nvkm_therm should even care about the index.

> if (engine->func->fini)
> return engine->func->fini(engine, suspend);
> return 0;
> @@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
>  {
> struct nvkm_engine *engine = nvkm_engine(subdev);
> struct nvkm_fb *fb = subdev->device->fb;
> +   struct nvkm_therm *therm = subdev->device->therm;
> int ret = 0, i;
> s64 time;
>
> if (!engine->usecount) {
> nvkm_trace(subdev, "init skipped, engine has no users\n");
> -   return ret;
> +   goto finish;
> }
>
> if (engine->func->oneinit && !engine->subdev.oneinit) {
> @@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
>
> for (i = 0; fb && i < fb->tile.regions; i++)
> nvkm_engine_tile(engine, i);
> +
> +finish:
> +   if (!ret) {
> +   int gate_idx = therm->clkgate_engine(therm, subdev->index);
> +
> +   if (gate_idx != -1)
> +   therm->clkgate_set(therm, gate_idx, true);
> +   }
> +

same code as above. More code sharing!

> return ret;
>  }
>
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/devic

[PATCH] drm/nouveau: Add support for clockgating on Fermi+

2017-04-25 Thread Lyude
This adds support for enabling automatic clockgating on nvidia GPUs for
Fermi and later generations. This saves a little bit of power, bringing
my fermi GPU's power consumption from ~28.3W on idle to ~27W, and my
kepler's idle power consumption from ~23.6W to ~21.65W.

Similar to how the nvidia driver seems to handle this, we enable
clockgating for each engine that supports it after it's initialization.

Signed-off-by: Lyude 
---
 .../gpu/drm/nouveau/include/nvkm/subdev/therm.h|  4 ++
 drivers/gpu/drm/nouveau/nvkm/core/engine.c | 20 +-
 drivers/gpu/drm/nouveau/nvkm/engine/device/base.c  | 14 ++--
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/Kbuild   |  2 +
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c   |  2 +
 .../gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c| 49 ++
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c  | 77 ++
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf119.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gm107.c  |  2 +
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gt215.c  |  2 +-
 drivers/gpu/drm/nouveau/nvkm/subdev/therm/priv.h   | 10 +++
 11 files changed, 175 insertions(+), 9 deletions(-)
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/clkgate.c
 create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/therm/gf100.c

diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h 
b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
index b268b96..904aa56 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/therm.h
@@ -84,6 +84,9 @@ struct nvkm_therm {
 
int (*attr_get)(struct nvkm_therm *, enum nvkm_therm_attr_type);
int (*attr_set)(struct nvkm_therm *, enum nvkm_therm_attr_type, int);
+
+   int  (*clkgate_engine)(struct nvkm_therm *, enum nvkm_devidx);
+   void (*clkgate_set)(struct nvkm_therm *, int gate_idx, bool enable);
 };
 
 int nvkm_therm_temp_get(struct nvkm_therm *);
@@ -94,6 +97,7 @@ int nv40_therm_new(struct nvkm_device *, int, struct 
nvkm_therm **);
 int nv50_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
 int g84_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
 int gt215_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
+int gf100_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
 int gf119_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
 int gm107_therm_new(struct nvkm_device *, int, struct nvkm_therm **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c 
b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index b6c9169..473ad3e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -26,6 +26,7 @@
 #include 
 
 #include 
+#include 
 
 bool
 nvkm_engine_chsw_load(struct nvkm_engine *engine)
@@ -86,6 +87,13 @@ static int
 nvkm_engine_fini(struct nvkm_subdev *subdev, bool suspend)
 {
struct nvkm_engine *engine = nvkm_engine(subdev);
+   struct nvkm_therm *therm = subdev->device->therm;
+   int gate_idx;
+
+   gate_idx = therm->clkgate_engine(therm, subdev->index);
+   if (gate_idx != -1)
+   therm->clkgate_set(therm, gate_idx, false);
+
if (engine->func->fini)
return engine->func->fini(engine, suspend);
return 0;
@@ -96,12 +104,13 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
 {
struct nvkm_engine *engine = nvkm_engine(subdev);
struct nvkm_fb *fb = subdev->device->fb;
+   struct nvkm_therm *therm = subdev->device->therm;
int ret = 0, i;
s64 time;
 
if (!engine->usecount) {
nvkm_trace(subdev, "init skipped, engine has no users\n");
-   return ret;
+   goto finish;
}
 
if (engine->func->oneinit && !engine->subdev.oneinit) {
@@ -123,6 +132,15 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
 
for (i = 0; fb && i < fb->tile.regions; i++)
nvkm_engine_tile(engine, i);
+
+finish:
+   if (!ret) {
+   int gate_idx = therm->clkgate_engine(therm, subdev->index);
+
+   if (gate_idx != -1)
+   therm->clkgate_set(therm, gate_idx, true);
+   }
+
return ret;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c 
b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index b690bc1..d133016 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1355,7 +1355,7 @@ nvc0_chipset = {
.mxm = nv50_mxm_new,
.pci = gf100_pci_new,
.pmu = gf100_pmu_new,
-   .therm = gt215_therm_new,
+   .therm = gf100_therm_new,
.timer = nv41_timer_new,
.volt = gf100_volt_new,
.ce[0] = gf100_ce_new,
@@ -1392,7 +1392,7 @@ nvc1_chipset = {
.mxm = nv50_mxm_new,
.pci = gf106_pci_new,
.pmu = gf100_pmu_new,
-   .therm = gt215_therm_new,
+   .t