See my tiny nit-pick below.
Op 26-10-15 om 18:13 schreef Karol Herbst:
From: Karol Herbst <g...@karolherbst.de>
---
drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h | 788 +++++++++++++++------------
drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h | 740 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/gk104.fuc4.h | 740 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h | 710 ++++++++++++++----------
drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h | 755 ++++++++++++++-----------
drm/nouveau/nvkm/subdev/pmu/fuc/os.h | 4 +
drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc | 148 +++++
7 files changed, 2267 insertions(+), 1618 deletions(-)
[snip]
diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
index c8b06cb..53508d9 100644
--- a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
+++ b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
@@ -49,4 +49,8 @@
#define I2C__MSG_DATA0_WR08_REG 0:7
#define I2C__MSG_DATA1_WR08_VAL 0:7
+
+/* PERF: message identifiers */
+#define PERF_MSG_LOAD 1
+
#endif
diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
index 38eadf7..69a8f8d 100644
--- a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
+++ b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
@@ -30,6 +30,18 @@ process(PROC_PERF, #perf_init, #perf_recv)
* PERF data segment
*****************************************************************************/
#ifdef INCLUDE_DATA
+perf_attr_start:
+// parameters
+perf_polling_period_us: .b32 100000
+
+// engine usage percentage
+perf_eng_gr: .b8 0
+perf_eng_vdec: .b8 0
+perf_eng_mc: .b8 0
+#if NVKM_PPWR_CHIPSET >= GF100
+perf_eng_pcie: .b8 0
+#endif
+.align 4
#endif
/******************************************************************************
@@ -46,6 +58,78 @@ process(PROC_PERF, #perf_init, #perf_recv)
// $r11 - data1
// $r0 - zero
perf_recv:
+ push $r1
+
+ imm32($r10, PROC_HOST)
+ cmp b32 $r14 $r10
+ bra ne #perf_recv_not_host
+ cmp b32 $r13 PERF_MSG_LOAD
+ bra e #perf_load
+ bra #perf_recv_exit
+
+perf_load:
+ clear b32 $r11
+ clear b32 $r12
+#if NVKM_PPWR_CHIPSET >= GF100
+ ld(b8, $r12, #perf_eng_pcie)
+ shl b32 $r12 8
+#endif
+ ld(b8, $r12, #perf_eng_mc)
+ shl b32 $r12 8
+ ld(b8, $r12, #perf_eng_vdec)
+ shl b32 $r12 8
+ ld(b8, $r12, #perf_eng_gr)
+ call(send)
+ bra #perf_recv_exit
+
+perf_recv_not_host:
+ call(perf_counter_readout)
+
+ ld(b32, $r14, #perf_polling_period_us)
+ call #ticks_from_us
+ call(timer)
+
+perf_recv_exit:
+ pop $r1
+ ret
+
+
+// description
+//
+// $r15 - current (perf)
+// $r0 - zero
+perf_counter_readout:
+ nv_iord($r14, NV_PPWR_COUNTER_COUNT(0))
+ div $r14 $r14 0xff
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(1))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_gr, $r13)
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(2))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_vdec, $r13)
+
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(3))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_mc, $r13)
+
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iord($r13, NV_PPWR_COUNTER_COUNT(4))
+ div $r13 $r13 $r14
+ st(b8, #perf_eng_pcie, $r13)
+#endif
+
+ // reset the counters
+ imm32($r14, NV_PPWR_COUNTER_COUNT_RESET)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(0), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(1), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(2), $r14)
+ nv_iowr(NV_PPWR_COUNTER_COUNT(3), $r14)
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iowr(NV_PPWR_COUNTER_COUNT(4), $r14)
+#endif
+
ret
// description
@@ -53,5 +137,69 @@ perf_recv:
// $r15 - current (perf)
// $r0 - zero
perf_init:
+ // set up the total ticks counter first
+ imm32($r14, NV_PPWR_COUNTER_MODE_ALWAYS)
+ nv_iowr(NV_PPWR_COUNTER_MODE(0), $r14)
+
+ // set up the other counters, with fermi there are more
+ imm32($r14, NV_PPWR_COUNTER_MODE_IF_NOT_ALL)
+ nv_iowr(NV_PPWR_COUNTER_MODE(1), $r14)
+ nv_iowr(NV_PPWR_COUNTER_MODE(2), $r14)
+ nv_iowr(NV_PPWR_COUNTER_MODE(3), $r14)
+#if NVKM_PPWR_CHIPSET >= GF100
+ nv_iowr(NV_PPWR_COUNTER_MODE(4), $r14)
+#endif
+
+ // core load counter
+ imm32($r14,
+ NV_PPWR_COUNTER_SIG_GR_IDLE
+ | NV_PPWR_COUNTER_SIG_GR_GPC_IDLE
+ | NV_PPWR_COUNTER_SIG_GR_ROP_IDLE
+#if NVKM_PPWR_CHIPSET >= GF100
+ | NV_PPWR_COUNTER_SIG_GR_HUB_IDLE
+ | NV_PPWR_COUNTER_SIG_PCOPY0_IDLE
+ | NV_PPWR_COUNTER_SIG_PCOPY1_IDLE
+#if NVKM_PPWR_CHIPSET >= GK104
+ | NV_PPWR_COUNTER_SIG_PCOPY2_IDLE
+#endif
+#endif
I'm not a big fan of nesting myself if it's not necessary, find it
clearer to keep them separate "if" preprocessor hints.
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(1), $r14)
+
+ // video load counter
+ imm32($r14,
+ NV_PPWR_COUNTER_SIG_PVLD_IDLE
+ | NV_PPWR_COUNTER_SIG_PPDEC_IDLE
+ | NV_PPWR_COUNTER_SIG_PPPP_IDLE
+#if NVKM_PPWR_CHIPSET >= GK104
+ | NV_PPWR_COUNTER_SIG_PVENC
+#endif
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(2), $r14)
+
+ // memory load counter
+ imm32($r14,
+#if NVKM_PPWR_CHIPSET >= GF100
+ NV_PPWR_COUNTER_SIG_BFB_PART0_REQ
+#else
+ NV_PPWR_COUNTER_SIG_FB_PART0_REQ
+#endif
+ )
+ nv_iowr(NV_PPWR_COUNTER_MASK(3), $r14)
+
+ // pcie load counter
+#if NVKM_PPWR_CHIPSET >= GF100
+ imm32($r14, NV_PPWR_COUNTER_SIG_PCIE)
+ nv_iowr(NV_PPWR_COUNTER_MASK(4), $r14)
+#endif
+
+ // initial read out
+ call(perf_counter_readout)
+
+ // schedule the next read out
+ ld(b32, $r14, #perf_polling_period_us)
+ call #ticks_from_us
+ call(timer)
+
ret
#endif
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau