[Intel-gfx] [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL

2019-10-17 Thread Umesh Nerlige Ramappa
From: Lionel Landwerlin 

The design of the OA unit has been split into several units. We now
have a global unit (OAG) and a render specific unit (OAR). This leads
to some changes on how we program things. Some details :

OAR:
  - has its own set of counter registers, they are per-context
saved/restored
  - counters are not written to the circular OA buffer
  - a snapshot of the counters can be acquired with
MI_RECORD_PERF_COUNT, or a single counter can be read with
MI_STORE_REGISTER_MEM.

OAG:
  - has global counters that increment across context switches
  - counters are written into the circular OA buffer (if requested)

v2: Fix checkpatch warnings on code style (Lucas)
v3: (Umesh)
  - Update register from which tail, status and head are read
  - Update logic to sample context reports
  - Update whitelist mux and b counter regs

BSpec: 28727, 30021

Signed-off-by: Lionel Landwerlin 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Lucas De Marchi 
---
 drivers/gpu/drm/i915/Makefile |   3 +-
 drivers/gpu/drm/i915/i915_perf.c  | 280 +++---
 drivers/gpu/drm/i915/i915_reg.h   | 103 ++
 drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++
 drivers/gpu/drm/i915/oa/i915_oa_tgl.h |  16 ++
 5 files changed, 492 insertions(+), 31 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c
 create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e791d9323b51..0ec9fee58baa 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -242,7 +242,8 @@ i915-y += \
oa/i915_oa_cflgt2.o \
oa/i915_oa_cflgt3.o \
oa/i915_oa_cnl.o \
-   oa/i915_oa_icl.o
+   oa/i915_oa_icl.o \
+   oa/i915_oa_tgl.o
 i915-y += i915_perf.o
 
 # Post-mortem debug and GPU hang state capture
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 91707558a0f5..abc2b7a6dc92 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,6 +217,7 @@
 #include "oa/i915_oa_cflgt3.h"
 #include "oa/i915_oa_cnl.h"
 #include "oa/i915_oa_icl.h"
+#include "oa/i915_oa_tgl.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true;
 #define INVALID_CTX_ID 0x
 
 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
-#define OAREPORT_REASON_MASK   0x3f
+#define OAREPORT_REASON_MASK   (IS_GEN(stream->perf->i915, 12) ? \
+   0x7f : 0x3f)
 #define OAREPORT_REASON_SHIFT  19
 #define OAREPORT_REASON_TIMER  (1<<0)
 #define OAREPORT_REASON_CTX_SWITCH (1<<3)
@@ -338,6 +340,10 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_C4_B8]  = { 7, 64 },
 };
 
+static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
+   [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+};
+
 #define SAMPLE_OA_REPORT  (1<<0)
 
 /**
@@ -418,6 +424,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo 
*oa_bo)
kfree(oa_bo);
 }
 
+static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
+{
+   struct intel_uncore *uncore = stream->uncore;
+
+   return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+  GEN12_OAG_OATAILPTR_MASK;
+}
+
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
@@ -538,7 +552,7 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
aging_tail = hw_tail;
stream->oa_buffer.aging_timestamp = now;
} else {
-   DRM_ERROR("Ignoring spurious out of range OA buffer 
tail pointer = %u\n",
+   DRM_ERROR("Ignoring spurious out of range OA buffer 
tail pointer = %x\n",
  hw_tail);
}
}
@@ -757,7 +771,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 * Note: that we don't clear the valid_ctx_bit so userspace can
 * understand that the ID has been squashed by the kernel.
 */
-   if (!(report32[0] & stream->perf->gen8_valid_ctx_bit))
+   if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
+   INTEL_GEN(stream->perf->i915) <= 11)
ctx_id = report32[2] = INVALID_CTX_ID;
 
/*
@@ -824,6 +839,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
}
 
if (start_offset != *offset) {
+   i915_reg_t oaheadptr;
+
+   oaheadptr = IS_GEN(stream->perf->i915, 12) ?
+

[Intel-gfx] [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL

2019-10-16 Thread Umesh Nerlige Ramappa
From: Lionel Landwerlin 

The design of the OA unit has been split into several units. We now
have a global unit (OAG) and a render specific unit (OAR). This leads
to some changes on how we program things. Some details :

OAR:
  - has its own set of counter registers, they are per-context
saved/restored
  - counters are not written to the circular OA buffer
  - a snapshot of the counters can be acquired with
MI_RECORD_PERF_COUNT, or a single counter can be read with
MI_STORE_REGISTER_MEM.

OAG:
  - has global counters that increment across context switches
  - counters are written into the circular OA buffer (if requested)

v2: Fix checkpatch warnings on code style (Lucas)
v3: (Umesh)
  - Update register from which tail, status and head are read
  - Update logic to sample context reports
  - Update whitelist mux and b counter regs

BSpec: 28727, 30021

Signed-off-by: Lionel Landwerlin 
Signed-off-by: Umesh Nerlige Ramappa 
Signed-off-by: Lucas De Marchi 
---
 drivers/gpu/drm/i915/Makefile |   3 +-
 drivers/gpu/drm/i915/i915_perf.c  | 280 +++---
 drivers/gpu/drm/i915/i915_reg.h   | 103 ++
 drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++
 drivers/gpu/drm/i915/oa/i915_oa_tgl.h |  16 ++
 5 files changed, 492 insertions(+), 31 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c
 create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h

diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index e791d9323b51..0ec9fee58baa 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -242,7 +242,8 @@ i915-y += \
oa/i915_oa_cflgt2.o \
oa/i915_oa_cflgt3.o \
oa/i915_oa_cnl.o \
-   oa/i915_oa_icl.o
+   oa/i915_oa_icl.o \
+   oa/i915_oa_tgl.o
 i915-y += i915_perf.o
 
 # Post-mortem debug and GPU hang state capture
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 91707558a0f5..abc2b7a6dc92 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -217,6 +217,7 @@
 #include "oa/i915_oa_cflgt3.h"
 #include "oa/i915_oa_cnl.h"
 #include "oa/i915_oa_icl.h"
+#include "oa/i915_oa_tgl.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
@@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true;
 #define INVALID_CTX_ID 0x
 
 /* On Gen8+ automatically triggered OA reports include a 'reason' field... */
-#define OAREPORT_REASON_MASK   0x3f
+#define OAREPORT_REASON_MASK   (IS_GEN(stream->perf->i915, 12) ? \
+   0x7f : 0x3f)
 #define OAREPORT_REASON_SHIFT  19
 #define OAREPORT_REASON_TIMER  (1<<0)
 #define OAREPORT_REASON_CTX_SWITCH (1<<3)
@@ -338,6 +340,10 @@ static const struct i915_oa_format 
gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = {
[I915_OA_FORMAT_C4_B8]  = { 7, 64 },
 };
 
+static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = {
+   [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 },
+};
+
 #define SAMPLE_OA_REPORT  (1<<0)
 
 /**
@@ -418,6 +424,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo 
*oa_bo)
kfree(oa_bo);
 }
 
+static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream)
+{
+   struct intel_uncore *uncore = stream->uncore;
+
+   return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) &
+  GEN12_OAG_OATAILPTR_MASK;
+}
+
 static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream)
 {
struct intel_uncore *uncore = stream->uncore;
@@ -538,7 +552,7 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
aging_tail = hw_tail;
stream->oa_buffer.aging_timestamp = now;
} else {
-   DRM_ERROR("Ignoring spurious out of range OA buffer 
tail pointer = %u\n",
+   DRM_ERROR("Ignoring spurious out of range OA buffer 
tail pointer = %x\n",
  hw_tail);
}
}
@@ -757,7 +771,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
 * Note: that we don't clear the valid_ctx_bit so userspace can
 * understand that the ID has been squashed by the kernel.
 */
-   if (!(report32[0] & stream->perf->gen8_valid_ctx_bit))
+   if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) &&
+   INTEL_GEN(stream->perf->i915) <= 11)
ctx_id = report32[2] = INVALID_CTX_ID;
 
/*
@@ -824,6 +839,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream 
*stream,
}
 
if (start_offset != *offset) {
+   i915_reg_t oaheadptr;
+
+   oaheadptr = IS_GEN(stream->perf->i915, 12) ?
+