Re: [Intel-gfx] [PATCH 11/20] drm/i915: Optimization to reduce the sampling time of GuC log buffer
On 12/08/16 15:48, Goel, Akash wrote: On 8/12/2016 8:12 PM, Tvrtko Ursulin wrote: On 12/08/16 07:25, akash.g...@intel.com wrote: From: Akash Goel GuC firmware sends an interrupt to flush the log buffer when it becomes half full, so Driver doesn't really need to sample the complete buffer and can just copy only the newly written data by GuC into the local buffer, i.e. as per the read & write pointer values. Moreover the flush interrupt would generally come for one type of log buffer, when it becomes half full, so at that time the other 2 types of log buffer would comparatively have much lesser unread data in them. In case of overflow reported by GuC, Driver do need to copy the entire buffer as the whole buffer would contain the unread data. v2: Rebase. Signed-off-by: Akash Goel --- drivers/gpu/drm/i915/i915_guc_submission.c | 40 +- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1ca1866..8e0f360 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -889,7 +889,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state; struct guc_log_buffer_state log_buffer_state_local; void *src_data_ptr, *dst_data_ptr; -u32 i, buffer_size; +bool new_overflow; +u32 i, buffer_size, read_offset, write_offset, bytes_to_copy; if (!guc->log.buf_addr) return; @@ -912,10 +913,13 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) memcpy(&log_buffer_state_local, log_buffer_state, sizeof(struct guc_log_buffer_state)); buffer_size = log_buffer_state_local.size; +read_offset = log_buffer_state_local.read_ptr; +write_offset = log_buffer_state_local.sampled_write_ptr; guc->log.flush_count[i] += log_buffer_state_local.flush_to_file; if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { Wrong alignment. You can try checkpatch.pl for all of those. Sorry for all the alignment & indentation issues. Should the above condition be written like this ? if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { Yes, but checkpatch.pl is your friend. :) +new_overflow = 1; true/false since it is a bool fine will do that. guc->log.total_overflow_count[i] += (log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]); @@ -929,7 +933,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) guc->log.prev_overflow_count[i] = log_buffer_state_local.buffer_full_cnt; DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); -} +} else +new_overflow = 0; if (log_buffer_snapshot_state) { /* First copy the state structure in local buffer */ @@ -941,13 +946,37 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * for consistency set the write pointer value to same * value of sampled_write_ptr in the snapshot buffer. */ -log_buffer_snapshot_state->write_ptr = -log_buffer_snapshot_state->sampled_write_ptr; +log_buffer_snapshot_state->write_ptr = write_offset; log_buffer_snapshot_state++; /* Now copy the actual logs */ memcpy(dst_data_ptr, src_data_ptr, buffer_size); The confusing bit - the memcpy above still copies the whole buffer, no? Really very sorry for this blooper. No worries, it happens to everyone! Regards, Tvrtko ___ Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx
Re: [Intel-gfx] [PATCH 11/20] drm/i915: Optimization to reduce the sampling time of GuC log buffer
On 8/12/2016 8:12 PM, Tvrtko Ursulin wrote: On 12/08/16 07:25, akash.g...@intel.com wrote: From: Akash Goel GuC firmware sends an interrupt to flush the log buffer when it becomes half full, so Driver doesn't really need to sample the complete buffer and can just copy only the newly written data by GuC into the local buffer, i.e. as per the read & write pointer values. Moreover the flush interrupt would generally come for one type of log buffer, when it becomes half full, so at that time the other 2 types of log buffer would comparatively have much lesser unread data in them. In case of overflow reported by GuC, Driver do need to copy the entire buffer as the whole buffer would contain the unread data. v2: Rebase. Signed-off-by: Akash Goel --- drivers/gpu/drm/i915/i915_guc_submission.c | 40 +- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1ca1866..8e0f360 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -889,7 +889,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state; struct guc_log_buffer_state log_buffer_state_local; void *src_data_ptr, *dst_data_ptr; -u32 i, buffer_size; +bool new_overflow; +u32 i, buffer_size, read_offset, write_offset, bytes_to_copy; if (!guc->log.buf_addr) return; @@ -912,10 +913,13 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) memcpy(&log_buffer_state_local, log_buffer_state, sizeof(struct guc_log_buffer_state)); buffer_size = log_buffer_state_local.size; +read_offset = log_buffer_state_local.read_ptr; +write_offset = log_buffer_state_local.sampled_write_ptr; guc->log.flush_count[i] += log_buffer_state_local.flush_to_file; if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { Wrong alignment. You can try checkpatch.pl for all of those. Sorry for all the alignment & indentation issues. Should the above condition be written like this ? if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { +new_overflow = 1; true/false since it is a bool fine will do that. guc->log.total_overflow_count[i] += (log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]); @@ -929,7 +933,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) guc->log.prev_overflow_count[i] = log_buffer_state_local.buffer_full_cnt; DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); -} +} else +new_overflow = 0; if (log_buffer_snapshot_state) { /* First copy the state structure in local buffer */ @@ -941,13 +946,37 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * for consistency set the write pointer value to same * value of sampled_write_ptr in the snapshot buffer. */ -log_buffer_snapshot_state->write_ptr = -log_buffer_snapshot_state->sampled_write_ptr; +log_buffer_snapshot_state->write_ptr = write_offset; log_buffer_snapshot_state++; /* Now copy the actual logs */ memcpy(dst_data_ptr, src_data_ptr, buffer_size); The confusing bit - the memcpy above still copies the whole buffer, no? Really very sorry for this blooper. Best regards Akash +if (unlikely(new_overflow)) { +/* copy the whole buffer in case of overflow */ +read_offset = 0; +write_offset = buffer_size; +} else if (unlikely((read_offset > buffer_size) || +(write_offset > buffer_size))) { +DRM_ERROR("invalid log buffer state\n"); +/* copy whole buffer as offsets are unreliable */ +read_offset = 0; +write_offset = buffer_size; +} + +/* Just copy the newly written data */ +if (read_offset <= write_offset) { +bytes_to_copy = write_offset - read_offset; +memcpy(dst_data_ptr + read_offset, + src_data_ptr + read_offset, bytes_to_copy); +} else { +bytes_to_copy = buffer_size - read_offset; +memcpy(dst_data_ptr + read_offset, + src_data_ptr + read_offset, bytes_to_copy); + +bytes_to_copy = write_offset; +memcpy(dst_data_ptr, src_data_ptr, bytes_to_copy); +} src_data_ptr += buffer_size; dst_data_ptr += buffer_size;
Re: [Intel-gfx] [PATCH 11/20] drm/i915: Optimization to reduce the sampling time of GuC log buffer
On 12/08/16 07:25, akash.g...@intel.com wrote: From: Akash Goel GuC firmware sends an interrupt to flush the log buffer when it becomes half full, so Driver doesn't really need to sample the complete buffer and can just copy only the newly written data by GuC into the local buffer, i.e. as per the read & write pointer values. Moreover the flush interrupt would generally come for one type of log buffer, when it becomes half full, so at that time the other 2 types of log buffer would comparatively have much lesser unread data in them. In case of overflow reported by GuC, Driver do need to copy the entire buffer as the whole buffer would contain the unread data. v2: Rebase. Signed-off-by: Akash Goel --- drivers/gpu/drm/i915/i915_guc_submission.c | 40 +- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1ca1866..8e0f360 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -889,7 +889,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state; struct guc_log_buffer_state log_buffer_state_local; void *src_data_ptr, *dst_data_ptr; - u32 i, buffer_size; + bool new_overflow; + u32 i, buffer_size, read_offset, write_offset, bytes_to_copy; if (!guc->log.buf_addr) return; @@ -912,10 +913,13 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) memcpy(&log_buffer_state_local, log_buffer_state, sizeof(struct guc_log_buffer_state)); buffer_size = log_buffer_state_local.size; + read_offset = log_buffer_state_local.read_ptr; + write_offset = log_buffer_state_local.sampled_write_ptr; guc->log.flush_count[i] += log_buffer_state_local.flush_to_file; if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { Wrong alignment. You can try checkpatch.pl for all of those. + new_overflow = 1; true/false since it is a bool guc->log.total_overflow_count[i] += (log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]); @@ -929,7 +933,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) guc->log.prev_overflow_count[i] = log_buffer_state_local.buffer_full_cnt; DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); - } + } else + new_overflow = 0; if (log_buffer_snapshot_state) { /* First copy the state structure in local buffer */ @@ -941,13 +946,37 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * for consistency set the write pointer value to same * value of sampled_write_ptr in the snapshot buffer. */ - log_buffer_snapshot_state->write_ptr = - log_buffer_snapshot_state->sampled_write_ptr; + log_buffer_snapshot_state->write_ptr = write_offset; log_buffer_snapshot_state++; /* Now copy the actual logs */ memcpy(dst_data_ptr, src_data_ptr, buffer_size); The confusing bit - the memcpy above still copies the whole buffer, no? + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + DRM_ERROR("invalid log buffer state\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* Just copy the newly written data */ + if (read_offset <= write_offset) { + bytes_to_copy = write_offset - read_offset; + memcpy(dst_data_ptr + read_offset, +src_data_ptr + read_offset, bytes_to_copy); + } else { + bytes_to_copy = buffer_size - read_offset; + memcpy(dst_data_ptr + read_offset, +
[Intel-gfx] [PATCH 11/20] drm/i915: Optimization to reduce the sampling time of GuC log buffer
From: Akash Goel GuC firmware sends an interrupt to flush the log buffer when it becomes half full, so Driver doesn't really need to sample the complete buffer and can just copy only the newly written data by GuC into the local buffer, i.e. as per the read & write pointer values. Moreover the flush interrupt would generally come for one type of log buffer, when it becomes half full, so at that time the other 2 types of log buffer would comparatively have much lesser unread data in them. In case of overflow reported by GuC, Driver do need to copy the entire buffer as the whole buffer would contain the unread data. v2: Rebase. Signed-off-by: Akash Goel --- drivers/gpu/drm/i915/i915_guc_submission.c | 40 +- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c index 1ca1866..8e0f360 100644 --- a/drivers/gpu/drm/i915/i915_guc_submission.c +++ b/drivers/gpu/drm/i915/i915_guc_submission.c @@ -889,7 +889,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) struct guc_log_buffer_state *log_buffer_state, *log_buffer_snapshot_state; struct guc_log_buffer_state log_buffer_state_local; void *src_data_ptr, *dst_data_ptr; - u32 i, buffer_size; + bool new_overflow; + u32 i, buffer_size, read_offset, write_offset, bytes_to_copy; if (!guc->log.buf_addr) return; @@ -912,10 +913,13 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) memcpy(&log_buffer_state_local, log_buffer_state, sizeof(struct guc_log_buffer_state)); buffer_size = log_buffer_state_local.size; + read_offset = log_buffer_state_local.read_ptr; + write_offset = log_buffer_state_local.sampled_write_ptr; guc->log.flush_count[i] += log_buffer_state_local.flush_to_file; if (log_buffer_state_local.buffer_full_cnt != guc->log.prev_overflow_count[i]) { + new_overflow = 1; guc->log.total_overflow_count[i] += (log_buffer_state_local.buffer_full_cnt - guc->log.prev_overflow_count[i]); @@ -929,7 +933,8 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) guc->log.prev_overflow_count[i] = log_buffer_state_local.buffer_full_cnt; DRM_ERROR_RATELIMITED("GuC log buffer overflow\n"); - } + } else + new_overflow = 0; if (log_buffer_snapshot_state) { /* First copy the state structure in local buffer */ @@ -941,13 +946,37 @@ static void guc_read_update_log_buffer(struct intel_guc *guc) * for consistency set the write pointer value to same * value of sampled_write_ptr in the snapshot buffer. */ - log_buffer_snapshot_state->write_ptr = - log_buffer_snapshot_state->sampled_write_ptr; + log_buffer_snapshot_state->write_ptr = write_offset; log_buffer_snapshot_state++; /* Now copy the actual logs */ memcpy(dst_data_ptr, src_data_ptr, buffer_size); + if (unlikely(new_overflow)) { + /* copy the whole buffer in case of overflow */ + read_offset = 0; + write_offset = buffer_size; + } else if (unlikely((read_offset > buffer_size) || + (write_offset > buffer_size))) { + DRM_ERROR("invalid log buffer state\n"); + /* copy whole buffer as offsets are unreliable */ + read_offset = 0; + write_offset = buffer_size; + } + + /* Just copy the newly written data */ + if (read_offset <= write_offset) { + bytes_to_copy = write_offset - read_offset; + memcpy(dst_data_ptr + read_offset, +src_data_ptr + read_offset, bytes_to_copy); + } else { + bytes_to_copy = buffer_size - read_offset; + memcpy(dst_data_ptr + read_offset, +src_data_ptr + read_offset, bytes_to_copy); + + bytes_to_copy = write_offset; + memcpy(dst_data_ptr, src_data_ptr, bytes_to_copy); +