[Intel-gfx] [PATCH 1/7] drm/i915/perf: rework aging tail workaround

2020-03-03 Thread Umesh Nerlige Ramappa
From: Lionel Landwerlin 

We're about to introduce an options to open the perf stream, giving
the user ability to configure how often it wants the kernel to poll
the OA registers for available data.

Right now the workaround against the OA tail pointer race condition
requires at least twice the internal kernel polling timer to make any
data available.

This changes introduce checks on the OA data written into the circular
buffer to make as much data as possible available on the first
iteration of the polling timer.

v2: Use OA_TAKEN macro without the gtt_offset (Lionel)
v3: (Umesh)
- Rebase
- Change report to report32 from below review
  https://patchwork.freedesktop.org/patch/330704/?series=66697=1

Signed-off-by: Lionel Landwerlin 
Signed-off-by: Umesh Nerlige Ramappa 
---
 drivers/gpu/drm/i915/i915_perf.c   | 207 +++--
 drivers/gpu/drm/i915/i915_perf_types.h |  29 ++--
 2 files changed, 106 insertions(+), 130 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 1b074bb4a7fe..92c5c75e2a2b 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -241,23 +241,14 @@
  * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
  * read() attempts.
  *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- *  1) An 'aging' tail with an associated timestamp that is tracked until we
- * can trust the corresponding data is visible to the CPU; at which point
- * it is considered 'aged'.
- *  2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
+ * We workaround this issue in oa_buffer_check() by reading the reports in the
+ * OA buffer, starting from the tail reported by the HW until we find 2
+ * consecutive reports with their first 2 dwords of not at 0. Those dwords are
+ * also set to 0 once read and the whole buffer is cleared upon OA buffer
+ * initialization. The first dword is the reason for this report while the
+ * second is the timestamp, making the chances of having those 2 fields at 0
+ * fairly unlikely. A more detailed explanation is available in
+ * oa_buffer_check().
  *
  * Most of the implementation details for this workaround are in
  * oa_buffer_check_unlocked() and _append_oa_reports()
@@ -270,7 +261,6 @@
  * enabled without any periodic sampling.
  */
 #define OA_TAIL_MARGIN_NSEC10ULL
-#define INVALID_TAIL_PTR   0x
 
 /* frequency for checking whether the OA unit has written new reports to the
  * circular OA buffer...
@@ -476,10 +466,10 @@ static u32 gen7_oa_hw_tail_read(struct i915_perf_stream 
*stream)
  */
 static bool oa_buffer_check_unlocked(struct i915_perf_stream *stream)
 {
+   u32 gtt_offset = i915_ggtt_offset(stream->oa_buffer.vma);
int report_size = stream->oa_buffer.format_size;
unsigned long flags;
-   unsigned int aged_idx;
-   u32 head, hw_tail, aged_tail, aging_tail;
+   u32 hw_tail;
u64 now;
 
/* We have to consider the (unlikely) possibility that read() errors
@@ -488,16 +478,6 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
 */
spin_lock_irqsave(>oa_buffer.ptr_lock, flags);
 
-   /* NB: The head we observe here might effectively be a little out of
-* date (between head and tails[aged_idx].offset if there is currently
-* a read() in progress.
-*/
-   head = stream->oa_buffer.head;
-
-   aged_idx = stream->oa_buffer.aged_tail_idx;
-   aged_tail = stream->oa_buffer.tails[aged_idx].offset;
-   aging_tail = stream->oa_buffer.tails[!aged_idx].offset;
-
hw_tail = stream->perf->ops.oa_hw_tail_read(stream);
 
/* The tail pointer increases in 64 byte increments,
@@ -507,64 +487,76 @@ static bool oa_buffer_check_unlocked(struct 
i915_perf_stream *stream)
 
now = ktime_get_mono_fast_ns();
 
-   /* Update the aged tail
-*
-* Flip the tail pointer available for read()s once the aging tail is
-* old enough to trust that the corresponding data will be visible to
-* the CPU...
-*
-* Do this before updating the aging pointer in case we may be able to
-* immediately start aging a new pointer too (if new data has become
-* available) without needing to wait for a later hrtimer callback.
-*/
-   if (aging_tail != 

[Intel-gfx] [PATCH 1/7] drm/i915/perf: rework aging tail workaround

2019-01-16 Thread Lionel Landwerlin
We're about to introduce an options to open the perf stream, giving
the user ability to configure how often it wants the kernel to poll
the OA registers for available data.

Right now the workaround against the OA tail pointer race condition
requires at least twice the internal kernel polling timer to make any
data available.

This changes introduce checks on the OA data written into the circular
buffer to make as much data as possible available on the first
iteration of the polling timer.

v2: Use OA_TAKEN macro without the gtt_offset (Lionel)

Signed-off-by: Lionel Landwerlin 
---
 drivers/gpu/drm/i915/i915_drv.h  |  32 ++---
 drivers/gpu/drm/i915/i915_perf.c | 200 ++-
 2 files changed, 103 insertions(+), 129 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index da055a86db4d..8cb93718224f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1877,6 +1877,12 @@ struct drm_i915_private {
 */
struct ratelimit_state spurious_report_rs;
 
+   /**
+* For rate limiting any notifications of tail pointer
+* race.
+*/
+   struct ratelimit_state tail_pointer_race;
+
bool periodic;
int period_exponent;
 
@@ -1917,23 +1923,11 @@ struct drm_i915_private {
spinlock_t ptr_lock;
 
/**
-* One 'aging' tail pointer and one 'aged'
-* tail pointer ready to used for reading.
-*
-* Initial values of 0x are invalid
-* and imply that an update is required
-* (and should be ignored by an attempted
-* read)
-*/
-   struct {
-   u32 offset;
-   } tails[2];
-
-   /**
-* Index for the aged tail ready to read()
-* data up to.
+* The last HW tail reported by HW. The data
+* might not have made it to memory yet
+* though.
 */
-   unsigned int aged_tail_idx;
+   u32 aging_tail;
 
/**
 * A monotonic timestamp for when the current
@@ -1952,6 +1946,12 @@ struct drm_i915_private {
 * data to userspace.
 */
u32 head;
+
+   /**
+* The last tail verified tail that can be
+* read by userspace.
+*/
+   u32 tail;
} oa_buffer;
 
u32 gen7_latched_oastatus1;
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index faff6cf1aaa1..575701e2aabc 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -233,23 +233,14 @@
  * for this earlier, as part of the oa_buffer_check to avoid lots of redundant
  * read() attempts.
  *
- * In effect we define a tail pointer for reading that lags the real tail
- * pointer by at least %OA_TAIL_MARGIN_NSEC nanoseconds, which gives enough
- * time for the corresponding reports to become visible to the CPU.
- *
- * To manage this we actually track two tail pointers:
- *  1) An 'aging' tail with an associated timestamp that is tracked until we
- * can trust the corresponding data is visible to the CPU; at which point
- * it is considered 'aged'.
- *  2) An 'aged' tail that can be used for read()ing.
- *
- * The two separate pointers let us decouple read()s from tail pointer aging.
- *
- * The tail pointers are checked and updated at a limited rate within a hrtimer
- * callback (the same callback that is used for delivering EPOLLIN events)
- *
- * Initially the tails are marked invalid with %INVALID_TAIL_PTR which
- * indicates that an updated tail pointer is needed.
+ * We workaround this issue in oa_buffer_check() by reading the reports in the
+ * OA buffer, starting from the tail reported by the HW until we find 2
+ * consecutive reports with their first 2 dwords of not at 0. Those dwords are
+ * also set to 0 once read and the whole buffer is cleared upon OA buffer
+ * initialization. The first dword is the reason for this report while the
+ * second is the timestamp, making the chances of having those 2