Because compute support is not enabled by default for these chipsets,
NVF0_COMPUTE=1 needs to be used, along with GALLIUM_HUD to enable
performance counters.
Signed-off-by: Samuel Pitoiset
---
.../drivers/nouveau/nvc0/nvc0_query_hw_sm.c| 755 ++---
.../drivers/nouveau/nvc0/nvc0_query_hw_sm.h| 2 +
.../drivers/nouveau/nvc0/nve4_compute.xml.h| 4 +
3 files changed, 667 insertions(+), 94 deletions(-)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 68c8ff5..b584532 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -65,6 +65,7 @@ static const char *nve4_hw_sm_query_names[] =
"local_load_transactions",
"local_store",
"local_store_transactions",
+ "not_predicated_off_thread_inst_executed",
"prof_trigger_00",
"prof_trigger_01",
"prof_trigger_02",
@@ -78,6 +79,7 @@ static const char *nve4_hw_sm_query_names[] =
"shared_store",
"shared_store_replay",
"sm_cta_launched",
+ "thread_inst_executed",
"threads_launched",
"uncached_global_load_transaction",
"warps_launched",
@@ -169,6 +171,49 @@ static const uint64_t nve4_read_hw_sm_counters_code[] =
0x80001de7ULL
};
+static const uint64_t nvf0_read_hw_sm_counters_code[] =
+{
+ /* Same kernel as GK104:GK110 */
+ 0x0880808080808080ULL,
+ 0x8640109c0022ULL,
+ 0x8640019c0032ULL,
+ 0x8640021c0002ULL,
+ 0x8640029c0006ULL,
+ 0x8640031c000aULL,
+ 0x8640039c000eULL,
+ 0x8640041c0012ULL,
+ 0x08ac1080108c8080ULL,
+ 0x8640049c0016ULL,
+ 0x8640051c001aULL,
+ 0x8640059c001eULL,
+ 0xdb201c007f9c201eULL,
+ 0x64c03c1c002aULL,
+ 0xc0020a1c3021ULL,
+ 0x64c03c9c002eULL,
+ 0x0810a0808010b810ULL,
+ 0xc001041c3025ULL,
+ 0x1820003cULL,
+ 0xdb201c007f9c243eULL,
+ 0xc1c0301c2021ULL,
+ 0xc1c0081c2431ULL,
+ 0xc1c0021c2435ULL,
+ 0xe080069c2026ULL,
+ 0x08b010b010b010a0ULL,
+ 0xe080061c2022ULL,
+ 0xe4c03c00051c0032ULL,
+ 0xe084041c282aULL,
+ 0xe4c03c00059c0036ULL,
+ 0xe08040007f9c2c2eULL,
+ 0xe084049c3032ULL,
+ 0xfe80001c2800ULL,
+ 0x08b81080b010ULL,
+ 0x64c03c00011c0002ULL,
+ 0xe08040007f9c3436ULL,
+ 0xfe8020043010ULL,
+ 0xfc80281c3000ULL,
+ 0x181c003cULL,
+};
+
/* For simplicity, we will allocate as many group slots as we allocate counter
* slots. This means that a single counter which wants to source from 2 groups
* will have to be declared as using 2 counter slots. This shouldn't really be
@@ -192,64 +237,539 @@ struct nvc0_hw_sm_query_cfg
uint8_t norm[2]; /* normalization num,denom */
};
-#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f,
NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, 0, s },
{}, {}, {} }, 1, { nu, dn } }
-#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_HW_SM_QUERY_##n] = { { { f,
NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, 0, s },
{}, {}, {} }, 1, { nu, dn } }
+#define _CA(f, m, g, s) { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0,
NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, 0, s }
+#define _CB(f, m, g, s) { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 1,
NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, 0, s }
+#define _Q(n, c) [NVE4_HW_SM_QUERY_##n] = c
+
+/* Compute capability 3.0 (GK104:GK110) */
+static const struct nvc0_hw_sm_query_cfg
+sm30_active_cycles =
+{
+ .ctr[0] = _CB(0x0001, B6, WARP, 0x),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_active_warps =
+{
+ .ctr[0] = _CB(0x003f, B6, WARP, 0x31483104),
+ .num_counters = 1,
+ .norm = { 2, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_atom_cas_count =
+{
+ .ctr[0] = _CA(0x0001, B6, BRANCH, 0x4),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_atom_count =
+{
+ .ctr[0] = _CA(0x0001, B6, BRANCH, 0x),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_branch =
+{
+ .ctr[0] = _CA(0x0001, B6, BRANCH, 0x000c),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_divergent_branch =
+{
+ .ctr[0] = _CA(0x0001, B6, BRANCH, 0x0010),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_gld_request =
+{
+ .ctr[0] = _CA(0x0001, B6, LDST, 0x0010),
+ .num_counters = 1,
+ .norm = { 1, 1 },
+};
+
+static const struct nvc0_hw_sm_query_cfg
+sm30_gld_mem_div_replay =
+{
+ .ctr[0] = _CB(0x0001, B6, REPLAY, 0x0010),
+ .num_counters = 1