Re: [Intel-gfx] [PATCH v5 27/35] drm/i915: Added debug state dump facilities to scheduler

2016-03-15 Thread Joonas Lahtinen
Hi,

On pe, 2016-03-11 at 16:38 +, John Harrison wrote:
> The intention of the state dump code is not really for it to be a user 
> accessible debugfs entry (although one of the later patches does add a 
> debugfs interface). It is more intended for debugging lock ups and 
> unexpected behaviour by adding a dump function call to whatever WARN_ON 
> or similar is being hit. So in theory, the code should not be necessary 
> and there is no point upstreaming it. On the other hand, it does come in 
> very handy when debugging scheduler related issues and it is complicated 
> enough that you can't just knock it up in five minutes when a bug report 
> is logged.
> 
> The code could certainly be moved into a separate file, e.g. 
> i915_scheduler_debug.c. I don't think it would be good to move it to 
> debugfs and make it all 'seq_printf' style output only. It is much more 
> use as ordinary printk style output so you can call it directly at the 
> point of detecting an inconsistent state and get the dmesg output prior 
> to the kernel panic.
> 

In that case, it could be moved to it's own file and enabled
conditionally through a config parameter?

If we want to keep feature debugging code, there should be a configure switch 
within i915 configuration to enable the bits.

CC'd Daniel and Chris as this relates to the fault injection code too.
Something along CONFIG_I915_DEBUG_SCHEDULER and CONFIG_I915_DEBUG_FAULT_INJECT 
could be added?

Regards, Joonas

> 
> On 07/03/2016 12:31, Joonas Lahtinen wrote:
> > 
> > Hi,
> > 
> > On to, 2016-02-18 at 14:27 +, john.c.harri...@intel.com wrote:
> > > 
> > > From: John Harrison 
> > > 
> > > When debugging batch buffer submission issues, it is useful to be able
> > > to see what the current state of the scheduler is. This change adds
> > > functions for decoding the internal scheduler state and reporting it.
> > > 
> > > v3: Updated a debug message with the new state_str() function.
> > > 
> > > v4: Wrapped some long lines to keep the style checker happy. Removed
> > > the fence/sync code as that will now be part of a separate patch series.
> > > 
> > > v5: Removed forward declarations and white space. Added documentation.
> > > [Joonas Lahtinen]
> > > 
> > > Also squashed in later patch to add seqno information from the start.
> > > It was only being added in a separate patch due to historical reasons
> > > which have since gone away.
> > > 
> > > For: VIZ-1587
> > > Signed-off-by: John Harrison 
> > > Cc: Joonas Lahtinen 
> > > ---
> > >   drivers/gpu/drm/i915/i915_scheduler.c | 302 
> > > +-
> > >   drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
> > >   2 files changed, 315 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
> > > b/drivers/gpu/drm/i915/i915_scheduler.c
> > > index f7f29d5..d0eed52 100644
> > > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > > @@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
> > >   return dev_priv->scheduler != NULL;
> > >   }
> > > 
> > These sort of functions should be in i915_debugfs.c, so that nobody
> > even thinks of using them for other purposes.
> > 
> > > 
> > >   
> > > +const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
> > > +{
> > > + static char str[50];
> > > + char*ptr = str;
> > > +
> > > + *(ptr++) = node->bumped ? 'B' : '-',
> > > + *(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
> > > +
> > Especially this kind of code needs to be in i915_debugfs.c. I'd
> > implement new code more along lines of i915_sseu_status(), but I see
> > this kind of code was previously merged.
> > 
> > > 
> > > + *ptr = 0;
> > > +
> > > + return str;
> > > +}
> > > +
> > > +char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status 
> > > status)
> > > +{
> > > + switch (status) {
> > > + case i915_sqs_none:
> > > + return 'N';
> > > +
> > > + case i915_sqs_queued:
> > > + return 'Q';
> > > +
> > > + case i915_sqs_popped:
> > > + return 'X';
> > > +
> > > + case i915_sqs_flying:
> > > + return 'F';
> > > +
> > > + case i915_sqs_complete:
> > > + return 'C';
> > > +
> > > + case i915_sqs_dead:
> > > + return 'D';
> > > +
> > > + default:
> > > + break;
> > > + }
> > > +
> > Bad indent.
> > 
> > > 
> > > + return '?';
> > > +}
> > > +
> > > +const char *i915_scheduler_queue_status_str(
> > > + enum i915_scheduler_queue_status status)
> > > +{
> > > + static char str[50];
> > > +
> > > + switch (status) {
> > > + case i915_sqs_none:
> > > + return "None";
> > > +
> > > + case i915_sqs_queued:
> > > + return "Queued";
> > > +
> > > + case i915_sqs_popped:
> > > + return "Popped";
> > > +
> > > + case i915_sqs_flying:
> > > + return "Flying";
> > > +
> > > + case i915_sqs_complete:
> > > + return "Complete";
> > > +
> > > + case i915_sqs_dead:
> > > + return "Dead";
> > > +
> > > + def

Re: [Intel-gfx] [PATCH v5 27/35] drm/i915: Added debug state dump facilities to scheduler

2016-03-11 Thread John Harrison
The intention of the state dump code is not really for it to be a user 
accessible debugfs entry (although one of the later patches does add a 
debugfs interface). It is more intended for debugging lock ups and 
unexpected behaviour by adding a dump function call to whatever WARN_ON 
or similar is being hit. So in theory, the code should not be necessary 
and there is no point upstreaming it. On the other hand, it does come in 
very handy when debugging scheduler related issues and it is complicated 
enough that you can't just knock it up in five minutes when a bug report 
is logged.


The code could certainly be moved into a separate file, e.g. 
i915_scheduler_debug.c. I don't think it would be good to move it to 
debugfs and make it all 'seq_printf' style output only. It is much more 
use as ordinary printk style output so you can call it directly at the 
point of detecting an inconsistent state and get the dmesg output prior 
to the kernel panic.



On 07/03/2016 12:31, Joonas Lahtinen wrote:

Hi,

On to, 2016-02-18 at 14:27 +, john.c.harri...@intel.com wrote:

From: John Harrison 

When debugging batch buffer submission issues, it is useful to be able
to see what the current state of the scheduler is. This change adds
functions for decoding the internal scheduler state and reporting it.

v3: Updated a debug message with the new state_str() function.

v4: Wrapped some long lines to keep the style checker happy. Removed
the fence/sync code as that will now be part of a separate patch series.

v5: Removed forward declarations and white space. Added documentation.
[Joonas Lahtinen]

Also squashed in later patch to add seqno information from the start.
It was only being added in a separate patch due to historical reasons
which have since gone away.

For: VIZ-1587
Signed-off-by: John Harrison 
Cc: Joonas Lahtinen 
---
  drivers/gpu/drm/i915/i915_scheduler.c | 302 +-
  drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
  2 files changed, 315 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index f7f29d5..d0eed52 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
return dev_priv->scheduler != NULL;
  }


These sort of functions should be in i915_debugfs.c, so that nobody
even thinks of using them for other purposes.

  
+const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)

+{
+   static char str[50];
+   char*ptr = str;
+
+   *(ptr++) = node->bumped ? 'B' : '-',
+   *(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
+

Especially this kind of code needs to be in i915_debugfs.c. I'd
implement new code more along lines of i915_sseu_status(), but I see
this kind of code was previously merged.


+   *ptr = 0;
+
+   return str;
+}
+
+char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status)
+{
+   switch (status) {
+   case i915_sqs_none:
+   return 'N';
+
+   case i915_sqs_queued:
+   return 'Q';
+
+   case i915_sqs_popped:
+   return 'X';
+
+   case i915_sqs_flying:
+   return 'F';
+
+   case i915_sqs_complete:
+   return 'C';
+
+   case i915_sqs_dead:
+   return 'D';
+
+   default:
+   break;
+   }
+

Bad indent.


+   return '?';
+}
+
+const char *i915_scheduler_queue_status_str(
+   enum i915_scheduler_queue_status status)
+{
+   static char str[50];
+
+   switch (status) {
+   case i915_sqs_none:
+   return "None";
+
+   case i915_sqs_queued:
+   return "Queued";
+
+   case i915_sqs_popped:
+   return "Popped";
+
+   case i915_sqs_flying:
+   return "Flying";
+
+   case i915_sqs_complete:
+   return "Complete";
+
+   case i915_sqs_dead:
+   return "Dead";
+
+   default:
+   break;
+   }
+
+   sprintf(str, "[Unknown_%d!]", status);
+   return str;
+}
+
+const char *i915_scheduler_flag_str(uint32_t flags)
+{
+   static char str[100];
+   char *ptr = str;
+
+   *ptr = 0;
+
+#define TEST_FLAG(flag, msg)   \
+   do {\
+   if (flags & (flag)) {   \
+   strcpy(ptr, msg);   \
+   ptr += strlen(ptr); \
+   flags &= ~(flag);   \
+   }   \
+   } while (0)
+
+   TEST_FLAG(i915_sf_interrupts_enabled, "IntOn|");
+   TEST_FLAG(i915_sf_submitting, "Submitting|");
+   TEST_FLAG(i915_sf_dump_force, "DumpForce|");
+   TEST_FLAG(i91

Re: [Intel-gfx] [PATCH v5 27/35] drm/i915: Added debug state dump facilities to scheduler

2016-03-07 Thread Joonas Lahtinen
Hi,

On to, 2016-02-18 at 14:27 +, john.c.harri...@intel.com wrote:
> From: John Harrison 
> 
> When debugging batch buffer submission issues, it is useful to be able
> to see what the current state of the scheduler is. This change adds
> functions for decoding the internal scheduler state and reporting it.
> 
> v3: Updated a debug message with the new state_str() function.
> 
> v4: Wrapped some long lines to keep the style checker happy. Removed
> the fence/sync code as that will now be part of a separate patch series.
> 
> v5: Removed forward declarations and white space. Added documentation.
> [Joonas Lahtinen]
> 
> Also squashed in later patch to add seqno information from the start.
> It was only being added in a separate patch due to historical reasons
> which have since gone away.
> 
> For: VIZ-1587
> Signed-off-by: John Harrison 
> Cc: Joonas Lahtinen 
> ---
>  drivers/gpu/drm/i915/i915_scheduler.c | 302 
> +-
>  drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
>  2 files changed, 315 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
> b/drivers/gpu/drm/i915/i915_scheduler.c
> index f7f29d5..d0eed52 100644
> --- a/drivers/gpu/drm/i915/i915_scheduler.c
> +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> @@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
>   return dev_priv->scheduler != NULL;
>  }
> 

These sort of functions should be in i915_debugfs.c, so that nobody
even thinks of using them for other purposes.

>  
> +const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
> +{
> + static char str[50];
> + char*ptr = str;
> +
> + *(ptr++) = node->bumped ? 'B' : '-',
> + *(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
> +

Especially this kind of code needs to be in i915_debugfs.c. I'd
implement new code more along lines of i915_sseu_status(), but I see
this kind of code was previously merged.

> + *ptr = 0;
> +
> + return str;
> +}
> +
> +char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status)
> +{
> + switch (status) {
> + case i915_sqs_none:
> + return 'N';
> +
> + case i915_sqs_queued:
> + return 'Q';
> +
> + case i915_sqs_popped:
> + return 'X';
> +
> + case i915_sqs_flying:
> + return 'F';
> +
> + case i915_sqs_complete:
> + return 'C';
> +
> + case i915_sqs_dead:
> + return 'D';
> +
> + default:
> + break;
> + }
> +

Bad indent.

> + return '?';
> +}
> +
> +const char *i915_scheduler_queue_status_str(
> + enum i915_scheduler_queue_status status)
> +{
> + static char str[50];
> +
> + switch (status) {
> + case i915_sqs_none:
> + return "None";
> +
> + case i915_sqs_queued:
> + return "Queued";
> +
> + case i915_sqs_popped:
> + return "Popped";
> +
> + case i915_sqs_flying:
> + return "Flying";
> +
> + case i915_sqs_complete:
> + return "Complete";
> +
> + case i915_sqs_dead:
> + return "Dead";
> +
> + default:
> + break;
> + }
> +
> + sprintf(str, "[Unknown_%d!]", status);
> + return str;
> +}
> +
> +const char *i915_scheduler_flag_str(uint32_t flags)
> +{
> + static char str[100];
> + char *ptr = str;
> +
> + *ptr = 0;
> +
> +#define TEST_FLAG(flag, msg) \
> + do {\
> + if (flags & (flag)) {   \
> + strcpy(ptr, msg);   \
> + ptr += strlen(ptr); \
> + flags &= ~(flag);   \
> + }   \
> + } while (0)
> +
> + TEST_FLAG(i915_sf_interrupts_enabled, "IntOn|");
> + TEST_FLAG(i915_sf_submitting, "Submitting|");
> + TEST_FLAG(i915_sf_dump_force, "DumpForce|");
> + TEST_FLAG(i915_sf_dump_details,   "DumpDetails|");
> + TEST_FLAG(i915_sf_dump_dependencies,  "DumpDeps|");
> + TEST_FLAG(i915_sf_dump_seqno, "DumpSeqno|");
> +
> +#undef TEST_FLAG
> +
> + if (flags) {
> + sprintf(ptr, "Unknown_0x%X!", flags);
> + ptr += strlen(ptr);
> + }
> +
> + if (ptr == str)
> + strcpy(str, "-");
> + else
> + ptr[-1] = 0;
> +
> + return str;
> +};
> +
>  /**
>   * i915_scheduler_init - Initialise the scheduler.
>   * @dev: DRM device
> @@ -1024,6 +1135,193 @@ void i915_scheduler_work_handler(struct work_struct 
> *work)
>   i915_scheduler_process_work(ring);
>  }
>  
> +static int i915_scheduler_dump_locked(struct intel_engine_cs *ring,
> +   const char *msg)
> +{
> + struct drm_i915_private *dev_priv = ring->dev->dev_privat

[Intel-gfx] [PATCH v5 27/35] drm/i915: Added debug state dump facilities to scheduler

2016-02-18 Thread John . C . Harrison
From: John Harrison 

When debugging batch buffer submission issues, it is useful to be able
to see what the current state of the scheduler is. This change adds
functions for decoding the internal scheduler state and reporting it.

v3: Updated a debug message with the new state_str() function.

v4: Wrapped some long lines to keep the style checker happy. Removed
the fence/sync code as that will now be part of a separate patch series.

v5: Removed forward declarations and white space. Added documentation.
[Joonas Lahtinen]

Also squashed in later patch to add seqno information from the start.
It was only being added in a separate patch due to historical reasons
which have since gone away.

For: VIZ-1587
Signed-off-by: John Harrison 
Cc: Joonas Lahtinen 
---
 drivers/gpu/drm/i915/i915_scheduler.c | 302 +-
 drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
 2 files changed, 315 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
b/drivers/gpu/drm/i915/i915_scheduler.c
index f7f29d5..d0eed52 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
return dev_priv->scheduler != NULL;
 }
 
+const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
+{
+   static char str[50];
+   char*ptr = str;
+
+   *(ptr++) = node->bumped ? 'B' : '-',
+   *(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
+
+   *ptr = 0;
+
+   return str;
+}
+
+char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status status)
+{
+   switch (status) {
+   case i915_sqs_none:
+   return 'N';
+
+   case i915_sqs_queued:
+   return 'Q';
+
+   case i915_sqs_popped:
+   return 'X';
+
+   case i915_sqs_flying:
+   return 'F';
+
+   case i915_sqs_complete:
+   return 'C';
+
+   case i915_sqs_dead:
+   return 'D';
+
+   default:
+   break;
+   }
+
+   return '?';
+}
+
+const char *i915_scheduler_queue_status_str(
+   enum i915_scheduler_queue_status status)
+{
+   static char str[50];
+
+   switch (status) {
+   case i915_sqs_none:
+   return "None";
+
+   case i915_sqs_queued:
+   return "Queued";
+
+   case i915_sqs_popped:
+   return "Popped";
+
+   case i915_sqs_flying:
+   return "Flying";
+
+   case i915_sqs_complete:
+   return "Complete";
+
+   case i915_sqs_dead:
+   return "Dead";
+
+   default:
+   break;
+   }
+
+   sprintf(str, "[Unknown_%d!]", status);
+   return str;
+}
+
+const char *i915_scheduler_flag_str(uint32_t flags)
+{
+   static char str[100];
+   char *ptr = str;
+
+   *ptr = 0;
+
+#define TEST_FLAG(flag, msg)   \
+   do {\
+   if (flags & (flag)) {   \
+   strcpy(ptr, msg);   \
+   ptr += strlen(ptr); \
+   flags &= ~(flag);   \
+   }   \
+   } while (0)
+
+   TEST_FLAG(i915_sf_interrupts_enabled, "IntOn|");
+   TEST_FLAG(i915_sf_submitting, "Submitting|");
+   TEST_FLAG(i915_sf_dump_force, "DumpForce|");
+   TEST_FLAG(i915_sf_dump_details,   "DumpDetails|");
+   TEST_FLAG(i915_sf_dump_dependencies,  "DumpDeps|");
+   TEST_FLAG(i915_sf_dump_seqno, "DumpSeqno|");
+
+#undef TEST_FLAG
+
+   if (flags) {
+   sprintf(ptr, "Unknown_0x%X!", flags);
+   ptr += strlen(ptr);
+   }
+
+   if (ptr == str)
+   strcpy(str, "-");
+   else
+   ptr[-1] = 0;
+
+   return str;
+};
+
 /**
  * i915_scheduler_init - Initialise the scheduler.
  * @dev: DRM device
@@ -1024,6 +1135,193 @@ void i915_scheduler_work_handler(struct work_struct 
*work)
i915_scheduler_process_work(ring);
 }
 
+static int i915_scheduler_dump_locked(struct intel_engine_cs *ring,
+ const char *msg)
+{
+   struct drm_i915_private *dev_priv = ring->dev->dev_private;
+   struct i915_scheduler *scheduler = dev_priv->scheduler;
+   struct i915_scheduler_queue_entry *node;
+   int flying = 0, queued = 0, complete = 0, other = 0;
+   static int old_flying = -1, old_queued = -1, old_complete = -1;
+   bool b_dump;
+   char brkt[2] = { '<', '>' };
+
+   if (!ring)
+   return -EINVAL;
+
+   list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+   if (I915_SQS_IS_QUEUED(node))
+   queued++;
+   else if (I915_SQS_IS