Re: [Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples
On Fri, Nov 04, 2016 at 03:00:43PM +0530, sourab.gu...@intel.com wrote: > diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c > index 06c7b55..0dc2384 100644 > --- a/drivers/gpu/drm/i915/i915_drv.c > +++ b/drivers/gpu/drm/i915/i915_drv.c > @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private > *dev_priv) > DRM_DEBUG_DRIVER("can't enable MSI"); > } > > + i915_perf_init_late(dev_priv); > + > return 0; Just a quick one: Create i915_driver_init_late() to capture the new init phase you want to add. -Chris -- Chris Wilson, Intel Open Source Technology Centre ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples
From: Sourab Gupta Currently, we have the ability to only forward the GPU timestamps in the samples (which are generated via OA reports or PIPE_CONTROL commands inserted in the ring). This limits the ability to correlate these samples with the system events. If we scale the GPU timestamps according the timestamp base/frequency info present in bspec, it is observed that the timestamps drift really quickly from the system time. An ability is therefore needed to report timestamps in different clock domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to be of more practical use to the userspace. This ability becomes important when we want to correlate/plot GPU events/samples with other system events on the same timeline (e.g. vblank events, or timestamps when work was submitted to kernel, etc.) The patch here proposes a mechanism to achieve this. The correlation between gpu time and system time is established using the cross timestamp framework. For this purpose, the timestamp clock associated with the command stream, is abstracted as timecounter/cyclecounter, before utilizing cross timestamp framework to retrieve gpu/system time correlated values. Different such gpu/system time values are then used to detect and correct the error in published gpu timestamp clock frequency. The userspace can request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the corresponding property while opening the stream. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_drv.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 24 +++- drivers/gpu/drm/i915/i915_perf.c | 273 +++ include/uapi/drm/i915_drm.h | 9 +- 4 files changed, 284 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 06c7b55..0dc2384 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -1088,6 +1088,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("can't enable MSI"); } + i915_perf_init_late(dev_priv); + return 0; out_ggtt: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index e912679..557a124 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -1843,6 +1846,9 @@ struct i915_perf_stream { /* Whether the OA unit is in use */ bool using_oa; + /* monotonic_raw clk timestamp (in ns) for last sample */ + u64 last_sample_ts; + const struct i915_perf_stream_ops *ops; }; @@ -1889,6 +1895,20 @@ struct i915_perf_cs_data_node { u32 tag; }; +/** + * struct i915_clock_info - decribes i915 timestamp clock + * + */ +struct i915_clock_info { + struct cyclecounter cc; + struct timecounter tc; + struct system_device_crosststamp xtstamp; + ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time */ + u32 timestamp_frequency; + u32 resync_period; /* in msecs */ + struct delayed_work clk_sync_work; +}; + struct drm_i915_private { struct drm_device drm; @@ -2189,6 +2209,8 @@ struct drm_i915_private { struct i915_runtime_pm pm; + struct i915_clock_info ts_clk_info; + struct { bool initialized; @@ -2213,7 +2235,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; int tail_margin; @@ -3796,6 +3817,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, /* i915_perf.c */ extern void i915_perf_init(struct drm_i915_private *dev_priv); +extern void i915_perf_init_late(struct drm_i915_private *dev_priv); extern void i915_perf_fini(struct drm_i915_private *dev_priv); extern void i915_perf_register(struct drm_i915_private *dev_priv); extern void i915_perf_unregister(struct drm_i915_private *dev_priv); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 8eb80e8..b11e953 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -189,6 +189,7 @@ #include #include +#include #include "i915_drv.h" #include "intel_ringbuffer.h" @@ -228,6 +229,9 @@ #define POLL_FREQUENCY 200 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) +#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC) +#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */ + static u32 i915_perf_stream_paranoid = true; /* The maximum exponent the hardware accepts is 63 (essentially it selects one @@ -254,13 +258,24 @@ static u32 i915_perf_stream_paranoid = true; #define TS_ADDR_ALIGN 8 #define I915_PERF_TS_SAMPLE_SIZE 8 +/* Published frequency of GT command stream timestamp clock */ +#define FREQUENCY_12_5_MHZ (125
[Intel-gfx] [PATCH 14/15] drm/i915: Mechanism to forward clock monotonic raw time in perf samples
From: Sourab Gupta Currently, we have the ability to only forward the GPU timestamps in the samples (which are generated via OA reports or PIPE_CONTROL commands inserted in the ring). This limits the ability to correlate these samples with the system events. If we scale the GPU timestamps according the timestamp base/frequency info present in bspec, it is observed that the timestamps drift really quickly from the system time. An ability is therefore needed to report timestamps in different clock domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to be of more practical use to the userspace. This ability becomes important when we want to correlate/plot GPU events/samples with other system events on the same timeline (e.g. vblank events, or timestamps when work was submitted to kernel, etc.) The patch here proposes a mechanism to achieve this. The correlation between gpu time and system time is established using the cross timestamp framework. For this purpose, the timestamp clock associated with the command stream, is abstracted as timecounter/cyclecounter, before utilizing cross timestamp framework to retrieve gpu/system time correlated values. Different such gpu/system time values are then used to detect and correct the error in published gpu timestamp clock frequency. The userspace can request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the corresponding property while opening the stream. Signed-off-by: Sourab Gupta --- drivers/gpu/drm/i915/i915_dma.c | 2 + drivers/gpu/drm/i915/i915_drv.h | 24 +++- drivers/gpu/drm/i915/i915_perf.c | 273 +++ include/uapi/drm/i915_drm.h | 9 +- 4 files changed, 284 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c index ab1f6c4..01f3559 100644 --- a/drivers/gpu/drm/i915/i915_dma.c +++ b/drivers/gpu/drm/i915/i915_dma.c @@ -1327,6 +1327,8 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv) DRM_DEBUG_DRIVER("can't enable MSI"); } + i915_perf_init_late(dev_priv); + return 0; out_ggtt: diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9ccac83..d99ea73 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -42,6 +42,9 @@ #include #include #include +#include +#include +#include #include #include @@ -1825,6 +1828,9 @@ struct i915_perf_stream { /* Whether the OA unit is in use */ bool using_oa; + /* monotonic_raw clk timestamp (in ns) for last sample */ + u64 last_sample_ts; + const struct i915_perf_stream_ops *ops; }; @@ -1869,6 +1875,20 @@ struct i915_perf_cs_data_node { u32 tag; }; +/** + * struct i915_clock_info - decribes i915 timestamp clock + * + */ +struct i915_clock_info { + struct cyclecounter cc; + struct timecounter tc; + struct system_device_crosststamp xtstamp; + ktime_t clk_offset; /* Offset (in ns) between monoraw clk and gpu time */ + u32 timestamp_frequency; + u32 resync_period; /* in msecs */ + struct delayed_work clk_sync_work; +}; + struct drm_i915_private { struct drm_device *dev; struct kmem_cache *objects; @@ -2147,6 +2167,8 @@ struct drm_i915_private { struct i915_runtime_pm pm; + struct i915_clock_info ts_clk_info; + struct { bool initialized; @@ -2169,7 +2191,6 @@ struct drm_i915_private { bool periodic; int period_exponent; - int timestamp_frequency; int tail_margin; @@ -3699,6 +3720,7 @@ int i915_parse_cmds(struct intel_engine_cs *engine, /* i915_perf.c */ extern void i915_perf_init(struct drm_device *dev); +extern void i915_perf_init_late(struct drm_i915_private *dev_priv); extern void i915_perf_fini(struct drm_device *dev); /* i915_suspend.c */ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index aa3589e..e340cf9f 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -23,6 +23,7 @@ #include #include +#include #include "i915_drv.h" #include "intel_ringbuffer.h" @@ -62,6 +63,9 @@ #define POLL_FREQUENCY 200 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY) +#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC) +#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */ + static u32 i915_perf_stream_paranoid = true; /* The maximum exponent the hardware accepts is 63 (essentially it selects one @@ -88,13 +92,24 @@ static u32 i915_perf_stream_paranoid = true; #define TS_ADDR_ALIGN 8 #define I915_PERF_TS_SAMPLE_SIZE 8 +/* Published frequency of GT command stream timestamp clock */ +#define FREQUENCY_12_5_MHZ (1250) +#define FREQUENCY_12_0_MHZ (1200) +#define FREQUENCY_19_2_MHZ (1920) +#define GT_CS_TIMESTAMP_F