[Intel-gfx] [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL
From: Lionel Landwerlin The design of the OA unit has been split into several units. We now have a global unit (OAG) and a render specific unit (OAR). This leads to some changes on how we program things. Some details : OAR: - has its own set of counter registers, they are per-context saved/restored - counters are not written to the circular OA buffer - a snapshot of the counters can be acquired with MI_RECORD_PERF_COUNT, or a single counter can be read with MI_STORE_REGISTER_MEM. OAG: - has global counters that increment across context switches - counters are written into the circular OA buffer (if requested) v2: Fix checkpatch warnings on code style (Lucas) v3: (Umesh) - Update register from which tail, status and head are read - Update logic to sample context reports - Update whitelist mux and b counter regs BSpec: 28727, 30021 Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_perf.c | 280 +++--- drivers/gpu/drm/i915/i915_reg.h | 103 ++ drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++ drivers/gpu/drm/i915/oa/i915_oa_tgl.h | 16 ++ 5 files changed, 492 insertions(+), 31 deletions(-) create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..0ec9fee58baa 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -242,7 +242,8 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 91707558a0f5..abc2b7a6dc92 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -217,6 +217,7 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true; #define INVALID_CTX_ID 0x /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ -#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK (IS_GEN(stream->perf->i915, 12) ? \ + 0x7f : 0x3f) #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -338,6 +340,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -418,6 +424,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; +} + static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -538,7 +552,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } @@ -757,7 +771,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -824,6 +839,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? +
[Intel-gfx] [PATCH v2 2/3] drm/i915/tgl: Add perf support on TGL
From: Lionel Landwerlin The design of the OA unit has been split into several units. We now have a global unit (OAG) and a render specific unit (OAR). This leads to some changes on how we program things. Some details : OAR: - has its own set of counter registers, they are per-context saved/restored - counters are not written to the circular OA buffer - a snapshot of the counters can be acquired with MI_RECORD_PERF_COUNT, or a single counter can be read with MI_STORE_REGISTER_MEM. OAG: - has global counters that increment across context switches - counters are written into the circular OA buffer (if requested) v2: Fix checkpatch warnings on code style (Lucas) v3: (Umesh) - Update register from which tail, status and head are read - Update logic to sample context reports - Update whitelist mux and b counter regs BSpec: 28727, 30021 Signed-off-by: Lionel Landwerlin Signed-off-by: Umesh Nerlige Ramappa Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/i915/Makefile | 3 +- drivers/gpu/drm/i915/i915_perf.c | 280 +++--- drivers/gpu/drm/i915/i915_reg.h | 103 ++ drivers/gpu/drm/i915/oa/i915_oa_tgl.c | 121 +++ drivers/gpu/drm/i915/oa/i915_oa_tgl.h | 16 ++ 5 files changed, 492 insertions(+), 31 deletions(-) create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.c create mode 100644 drivers/gpu/drm/i915/oa/i915_oa_tgl.h diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index e791d9323b51..0ec9fee58baa 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -242,7 +242,8 @@ i915-y += \ oa/i915_oa_cflgt2.o \ oa/i915_oa_cflgt3.o \ oa/i915_oa_cnl.o \ - oa/i915_oa_icl.o + oa/i915_oa_icl.o \ + oa/i915_oa_tgl.o i915-y += i915_perf.o # Post-mortem debug and GPU hang state capture diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 91707558a0f5..abc2b7a6dc92 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -217,6 +217,7 @@ #include "oa/i915_oa_cflgt3.h" #include "oa/i915_oa_cnl.h" #include "oa/i915_oa_icl.h" +#include "oa/i915_oa_tgl.h" /* HW requires this to be a power of two, between 128k and 16M, though driver * is currently generally designed assuming the largest 16M size is used such @@ -292,7 +293,8 @@ static u32 i915_perf_stream_paranoid = true; #define INVALID_CTX_ID 0x /* On Gen8+ automatically triggered OA reports include a 'reason' field... */ -#define OAREPORT_REASON_MASK 0x3f +#define OAREPORT_REASON_MASK (IS_GEN(stream->perf->i915, 12) ? \ + 0x7f : 0x3f) #define OAREPORT_REASON_SHIFT 19 #define OAREPORT_REASON_TIMER (1<<0) #define OAREPORT_REASON_CTX_SWITCH (1<<3) @@ -338,6 +340,10 @@ static const struct i915_oa_format gen8_plus_oa_formats[I915_OA_FORMAT_MAX] = { [I915_OA_FORMAT_C4_B8] = { 7, 64 }, }; +static const struct i915_oa_format gen12_oa_formats[I915_OA_FORMAT_MAX] = { + [I915_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256 }, +}; + #define SAMPLE_OA_REPORT (1<<0) /** @@ -418,6 +424,14 @@ static void free_oa_config_bo(struct i915_oa_config_bo *oa_bo) kfree(oa_bo); } +static u32 gen12_oa_hw_tail_read(struct i915_perf_stream *stream) +{ + struct intel_uncore *uncore = stream->uncore; + + return intel_uncore_read(uncore, GEN12_OAG_OATAILPTR) & + GEN12_OAG_OATAILPTR_MASK; +} + static u32 gen8_oa_hw_tail_read(struct i915_perf_stream *stream) { struct intel_uncore *uncore = stream->uncore; @@ -538,7 +552,7 @@ static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream) aging_tail = hw_tail; stream->oa_buffer.aging_timestamp = now; } else { - DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %u\n", + DRM_ERROR("Ignoring spurious out of range OA buffer tail pointer = %x\n", hw_tail); } } @@ -757,7 +771,8 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, * Note: that we don't clear the valid_ctx_bit so userspace can * understand that the ID has been squashed by the kernel. */ - if (!(report32[0] & stream->perf->gen8_valid_ctx_bit)) + if (!(report32[0] & stream->perf->gen8_valid_ctx_bit) && + INTEL_GEN(stream->perf->i915) <= 11) ctx_id = report32[2] = INVALID_CTX_ID; /* @@ -824,6 +839,11 @@ static int gen8_append_oa_reports(struct i915_perf_stream *stream, } if (start_offset != *offset) { + i915_reg_t oaheadptr; + + oaheadptr = IS_GEN(stream->perf->i915, 12) ? +