On Wednesday, June 14, 2017 5:38:31 PM PDT Lionel Landwerlin wrote:
> Due to an underlying hardware race condition, we have no guarantee
> that all the reports coming from the OA buffer related to the workload
> we're trying to measure have landed to memory by the time all the work
> submitted has completed. That means we need to keep on reading the OA
> stream until we read a report with a timestamp older than the

timestamp newer / larger / more recent, right?  i.e. keep going until
you find a report in the stream beyond the expected end?  (Unless we're
reading backwards...)

> timestamp recored by the MI_REPORT_PERF_COUNT at the end of the
> performance query.
> 
> v2: fix uninitialized offset variable to 0 (Lionel)
> 
> v3: rework the reading to avoid blocking the user of the API unless
>     requested (Rob)
> 
> v4: fix a bug that makes the i965 driver reading the perf stream when
>     not necessary, leading to very long counter accumulation times
>     (Lionel)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
> Cc: Robert Bragg <rob...@sixbynine.org>
> ---
>  src/mesa/drivers/dri/i965/brw_performance_query.c | 133 
> ++++++++++++++++++----
>  1 file changed, 113 insertions(+), 20 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c 
> b/src/mesa/drivers/dri/i965/brw_performance_query.c
> index d10141bf07a..d11784c0352 100644
> --- a/src/mesa/drivers/dri/i965/brw_performance_query.c
> +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
> @@ -219,6 +219,7 @@ struct brw_oa_sample_buf {
>     int refcount;
>     int len;
>     uint8_t buf[I915_PERF_OA_SAMPLE_SIZE * 10];
> +   uint32_t last_timestamp;
>  };
>  
>  /**
> @@ -244,6 +245,11 @@ struct brw_perf_query_object
>           struct brw_bo *bo;
>  
>           /**
> +          * Address of mapped of @bo
> +          */
> +         void *map;
> +
> +         /**
>            * The MI_REPORT_PERF_COUNT command lets us specify a unique
>            * ID that will be reflected in the resulting OA report
>            * that's written by the GPU. This is the ID we're expecting
> @@ -712,11 +718,26 @@ discard_all_queries(struct brw_context *brw)
>     }
>  }
>  
> -static bool
> -read_oa_samples(struct brw_context *brw)
> +enum OaReadStatus {
> +   OA_READ_STATUS_ERROR,
> +   OA_READ_STATUS_UNFINISHED,
> +   OA_READ_STATUS_FINISHED,
> +};
> +
> +static enum OaReadStatus
> +read_oa_samples_until(struct brw_context *brw,
> +                      uint32_t start_timestamp,
> +                      uint32_t end_timestamp)
>  {
> +   struct exec_node *tail_node =
> +      exec_list_get_tail(&brw->perfquery.sample_buffers);
> +   struct brw_oa_sample_buf *tail_buf =
> +      exec_node_data(struct brw_oa_sample_buf, tail_node, link);
> +   uint32_t last_timestamp = tail_buf->last_timestamp;
> +
>     while (1) {
>        struct brw_oa_sample_buf *buf = get_free_sample_buf(brw);
> +      uint32_t offset;
>        int len;
>  
>        while ((len = read(brw->perfquery.oa_stream_fd, buf->buf,
> @@ -728,28 +749,94 @@ read_oa_samples(struct brw_context *brw)
>  
>           if (len < 0) {
>              if (errno == EAGAIN)
> -               return true;
> +               return ((last_timestamp - start_timestamp) >=
> +                       (end_timestamp - start_timestamp)) ?
> +                      OA_READ_STATUS_FINISHED :
> +                      OA_READ_STATUS_UNFINISHED;
>              else {
>                 DBG("Error reading i915 perf samples: %m\n");
> -               return false;
>              }
> -         } else {
> +         } else
>              DBG("Spurious EOF reading i915 perf samples\n");
> -            return false;
> -         }
> +
> +         return OA_READ_STATUS_ERROR;
>        }
>  
>        buf->len = len;
>        exec_list_push_tail(&brw->perfquery.sample_buffers, &buf->link);
> +
> +      /* Go through the reports and update the last timestamp. */
> +      offset = 0;
> +      while (offset < buf->len) {
> +         const struct drm_i915_perf_record_header *header =
> +            (const struct drm_i915_perf_record_header *) &buf->buf[offset];
> +         uint32_t *report = (uint32_t *) (header + 1);
> +
> +         if (header->type == DRM_I915_PERF_RECORD_SAMPLE)
> +            last_timestamp = report[1];
> +
> +         offset += header->size;
> +      }
> +
> +      buf->last_timestamp = last_timestamp;
>     }
>  
>     unreachable("not reached");
> +   return OA_READ_STATUS_ERROR;
> +}
> +
> +/**
> + * Try to read all the reports until either the delimiting timestamp
> + * or an error arises.
> + */
> +static bool
> +read_oa_samples_for_query(struct brw_context *brw,
> +                          struct brw_perf_query_object *obj)
> +{
> +   uint32_t *start;
> +   uint32_t *last;
> +   uint32_t *end;
> +
> +   /* We need the MI_REPORT_PERF_COUNT to land before we can start
> +    * accumulate. */

   */ goes on its own line (here and elsewhere).

This all looks right to me.

Reviewed-by: Kenneth Graunke <kenn...@whitecape.org>

Attachment: signature.asc
Description: This is a digitally signed message part.

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to