perf: rework oa-exponent test

Matthew Auld Mon, 18 Sep 2017 12:53:30 -0700

On 08/31, Lionel Landwerlin wrote:
> New issues that were discovered while making the tests work on Gen8+ :
> 
>  - we need to measure timings between periodic reports and discard all
>    other kind of reports
> 
>  - it seems periodicity of the reports can be affected outside of RC6
>    (frequency change), we can detect this by looking at the amount of
>    clock cycles per timestamp deltas
> 
> v2: Drop some unused variables (Matthew)
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwer...@intel.com>
> ---


<SNIP>

> +
> +static uint32_t
> +i915_get_one_gpu_timestamp(uint32_t *context_id)
> +{
> +     drm_intel_bufmgr *bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> +     drm_intel_context *mi_rpc_ctx = drm_intel_gem_context_create(bufmgr);
> +     drm_intel_bo *mi_rpc_bo = drm_intel_bo_alloc(bufmgr, "mi_rpc dest bo", 
> 4096, 64);
alignment=64 ?

> +     struct intel_batchbuffer *mi_rpc_batch = 
> intel_batchbuffer_alloc(bufmgr, devid);
> +     int ret;
> +     uint32_t timestamp;
> +
> +     drm_intel_bufmgr_gem_enable_reuse(bufmgr);
> +
> +     if (context_id) {
> +             ret = drm_intel_gem_context_get_id(mi_rpc_ctx, context_id);
> +             igt_assert_eq(ret, 0);
> +     }
> +
> +     igt_assert(mi_rpc_ctx);
> +     igt_assert(mi_rpc_bo);
> +     igt_assert(mi_rpc_batch);
> +
> +     ret = drm_intel_bo_map(mi_rpc_bo, true);
> +     igt_assert_eq(ret, 0);
> +     memset(mi_rpc_bo->virtual, 0x80, 4096);
> +     drm_intel_bo_unmap(mi_rpc_bo);
> +
> +     emit_report_perf_count(mi_rpc_batch,
> +                            mi_rpc_bo, /* dst */
> +                            0, /* dst offset in bytes */
> +                            0xdeadbeef); /* report ID */
> +
> +     intel_batchbuffer_flush_with_context(mi_rpc_batch, mi_rpc_ctx);
> +
> +     ret = drm_intel_bo_map(mi_rpc_bo, false /* write enable */);
> +     igt_assert_eq(ret, 0);
> +     timestamp = ((uint32_t *)mi_rpc_bo->virtual)[1];
> +     drm_intel_bo_unmap(mi_rpc_bo);
> +
> +     drm_intel_bo_unreference(mi_rpc_bo);
> +     intel_batchbuffer_free(mi_rpc_batch);
> +     drm_intel_gem_context_destroy(mi_rpc_ctx);
> +     drm_intel_bufmgr_destroy(bufmgr);
> +
> +     return timestamp;
> +}
>  
>  static void
>  hsw_sanity_check_render_basic_reports(uint32_t *oa_report0, uint32_t 
> *oa_report1,
> @@ -1050,7 +1206,6 @@ i915_read_reports_until_timestamp(enum 
> drm_i915_oa_format oa_format,
>       return total_len;
>  }
>  
> -
>  /* CAP_SYS_ADMIN is required to open system wide metrics, unless the system
>   * control parameter dev.i915.perf_stream_paranoid == 0 */
>  static void
> @@ -1365,20 +1520,6 @@ open_and_read_2_oa_reports(int format_id,
>       __perf_close(stream_fd);
>  }
>  
> -static void
> -gen8_read_report_clock_ratios(uint32_t *report,
> -                           uint32_t *slice_freq_mhz,
> -                           uint32_t *unslice_freq_mhz)
> -{
> -     uint32_t unslice_freq = report[0] & 0x1ff;
> -     uint32_t slice_freq_low = (report[0] >> 25) & 0x7f;
> -     uint32_t slice_freq_high = (report[0] >> 9) & 0x3;
> -     uint32_t slice_freq = slice_freq_low | (slice_freq_high << 7);
> -
> -     *slice_freq_mhz = (slice_freq * 16666) / 1000;
> -     *unslice_freq_mhz = (unslice_freq * 16666) / 1000;
> -}
> -
>  static void
>  print_reports(uint32_t *oa_report0, uint32_t *oa_report1, int fmt)
>  {
> @@ -1570,74 +1711,456 @@ test_oa_formats(void)
>       }
>  }
>  
> +
> +enum load {
> +     LOW,
> +     HIGH
> +};
> +
> +#define LOAD_HELPER_PAUSE_USEC 500
> +
> +static struct load_helper {
> +     int devid;
> +     int has_ppgtt;
> +     drm_intel_bufmgr *bufmgr;
> +     drm_intel_context *context;
> +     uint32_t context_id;
> +     struct intel_batchbuffer *batch;
> +     enum load load;
> +     bool exit;
> +     struct igt_helper_process igt_proc;
> +     struct igt_buf src, dst;
> +} lh = { 0, };
We can drop the initialisation since this is static.

> +
> +static void load_helper_signal_handler(int sig)
> +{
> +     if (sig == SIGUSR2)
> +             lh.load = lh.load == LOW ? HIGH : LOW;
> +     else
> +             lh.exit = true;
> +}
> +
> +static void load_helper_set_load(enum load load)
> +{
> +     igt_assert(lh.igt_proc.running);
> +
> +     if (lh.load == load)
> +             return;
> +
> +     lh.load = load;
> +     kill(lh.igt_proc.pid, SIGUSR2);
> +}
> +
> +static void load_helper_run(enum load load)
> +{
> +     /*
> +      * FIXME fork helpers won't get cleaned up when started from within a
> +      * subtest, so handle the case where it sticks around a bit too long.
> +      */
> +     if (lh.igt_proc.running) {
> +             load_helper_set_load(load);
> +             return;
> +     }
> +
> +     lh.load = load;
> +
> +     igt_fork_helper(&lh.igt_proc) {
> +             signal(SIGUSR1, load_helper_signal_handler);
> +             signal(SIGUSR2, load_helper_signal_handler);
> +
> +             while (!lh.exit) {
> +                     int ret;
> +
> +                     render_copy(lh.batch,
> +                                 lh.context,
> +                                 &lh.src, 0, 0, 1920, 1080,
> +                                 &lh.dst, 0, 0);
> +
> +                     intel_batchbuffer_flush_with_context(lh.batch,
> +                                                          lh.context);
> +
> +                     ret = drm_intel_gem_context_get_id(lh.context,
> +                                                        &lh.context_id);
> +                     igt_assert_eq(ret, 0);
> +
> +                     drm_intel_bo_wait_rendering(lh.dst.bo);
> +
> +                     /* Lower the load by pausing after every submitted
> +                      * write. */
> +                     if (lh.load == LOW)
> +                             usleep(LOAD_HELPER_PAUSE_USEC);
> +             }
> +     }
> +}
> +
> +static void load_helper_stop(void)
> +{
> +     kill(lh.igt_proc.pid, SIGUSR1);
> +     igt_assert(igt_wait_helper(&lh.igt_proc) == 0);
> +}
> +
> +static void load_helper_init(void)
> +{
> +     int ret;
> +
> +     lh.devid = intel_get_drm_devid(drm_fd);
> +     lh.has_ppgtt = gem_uses_ppgtt(drm_fd);
> +
> +     /* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
> +      * snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
> +      * that's also all we care about for the rps testcase*/
> +     igt_assert(intel_gen(lh.devid) >= 6);
> +     lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
> +     igt_assert(lh.bufmgr);
> +
> +     drm_intel_bufmgr_gem_enable_reuse(lh.bufmgr);
> +
> +     lh.context = drm_intel_gem_context_create(lh.bufmgr);
> +     igt_assert(lh.context);
> +
> +     lh.context_id = 0xffffffff;
> +     ret = drm_intel_gem_context_get_id(lh.context, &lh.context_id);
> +     igt_assert_eq(ret, 0);
> +     igt_assert_neq(lh.context_id, 0xffffffff);
> +
> +     lh.batch = intel_batchbuffer_alloc(lh.bufmgr, lh.devid);
> +     igt_assert(lh.batch);
> +
> +     scratch_buf_init(lh.bufmgr, &lh.dst, 1920, 1080, 0);
> +     scratch_buf_init(lh.bufmgr, &lh.src, 1920, 1080, 0);
> +}
> +
> +static void load_helper_fini(void)
> +{
> +     if (lh.igt_proc.running)
> +             load_helper_stop();
> +
> +     if (lh.src.bo)
> +             drm_intel_bo_unreference(lh.src.bo);
> +     if (lh.dst.bo)
> +             drm_intel_bo_unreference(lh.dst.bo);
> +
> +     if (lh.batch)
> +             intel_batchbuffer_free(lh.batch);
> +
> +     if (lh.context)
> +             drm_intel_gem_context_destroy(lh.context);
> +
> +     if (lh.bufmgr)
> +             drm_intel_bufmgr_destroy(lh.bufmgr);
> +}
> +
>  static void
>  test_oa_exponents(void)
>  {
> -     igt_debug("Testing OA timer exponents\n");
> +     load_helper_init();
> +     load_helper_run(HIGH);
>  
>       /* It's asking a lot to sample with a 160 nanosecond period and the
>        * test can fail due to buffer overflows if it wasn't possible to
>        * keep up, so we don't start from an exponent of zero...
>        */
> -     for (int i = 5; i < 20; i++) {
> -             uint32_t expected_timestamp_delta;
> -             uint32_t timestamp_delta;
> -             uint32_t oa_report0[64];
> -             uint32_t oa_report1[64];
> +     for (int exponent = 5; exponent < 18; exponent++) {
Why the change from 20 -> 18?

Otherwise looks okay:
Reviewed-by: Matthew Auld <matthew.a...@intel.com>

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Re: [Intel-gfx] [PATCH i-g-t v5 06/11] tests/perf: rework oa-exponent test

Reply via email to