Hi,

On pe, 2016-03-11 at 16:38 +0000, John Harrison wrote:
> The intention of the state dump code is not really for it to be a user 
> accessible debugfs entry (although one of the later patches does add a 
> debugfs interface). It is more intended for debugging lock ups and 
> unexpected behaviour by adding a dump function call to whatever WARN_ON 
> or similar is being hit. So in theory, the code should not be necessary 
> and there is no point upstreaming it. On the other hand, it does come in 
> very handy when debugging scheduler related issues and it is complicated 
> enough that you can't just knock it up in five minutes when a bug report 
> is logged.
> 
> The code could certainly be moved into a separate file, e.g. 
> i915_scheduler_debug.c. I don't think it would be good to move it to 
> debugfs and make it all 'seq_printf' style output only. It is much more 
> use as ordinary printk style output so you can call it directly at the 
> point of detecting an inconsistent state and get the dmesg output prior 
> to the kernel panic.
> 

In that case, it could be moved to it's own file and enabled
conditionally through a config parameter?

If we want to keep feature debugging code, there should be a configure switch 
within i915 configuration to enable the bits.

CC'd Daniel and Chris as this relates to the fault injection code too.
Something along CONFIG_I915_DEBUG_SCHEDULER and CONFIG_I915_DEBUG_FAULT_INJECT 
could be added?

Regards, Joonas

> 
> On 07/03/2016 12:31, Joonas Lahtinen wrote:
> > 
> > Hi,
> > 
> > On to, 2016-02-18 at 14:27 +0000, john.c.harri...@intel.com wrote:
> > > 
> > > From: John Harrison <john.c.harri...@intel.com>
> > > 
> > > When debugging batch buffer submission issues, it is useful to be able
> > > to see what the current state of the scheduler is. This change adds
> > > functions for decoding the internal scheduler state and reporting it.
> > > 
> > > v3: Updated a debug message with the new state_str() function.
> > > 
> > > v4: Wrapped some long lines to keep the style checker happy. Removed
> > > the fence/sync code as that will now be part of a separate patch series.
> > > 
> > > v5: Removed forward declarations and white space. Added documentation.
> > > [Joonas Lahtinen]
> > > 
> > > Also squashed in later patch to add seqno information from the start.
> > > It was only being added in a separate patch due to historical reasons
> > > which have since gone away.
> > > 
> > > For: VIZ-1587
> > > Signed-off-by: John Harrison <john.c.harri...@intel.com>
> > > Cc: Joonas Lahtinen <joonas.lahti...@linux.intel.com>
> > > ---
> > >   drivers/gpu/drm/i915/i915_scheduler.c | 302 
> > > +++++++++++++++++++++++++++++++++-
> > >   drivers/gpu/drm/i915/i915_scheduler.h |  15 ++
> > >   2 files changed, 315 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.c 
> > > b/drivers/gpu/drm/i915/i915_scheduler.c
> > > index f7f29d5..d0eed52 100644
> > > --- a/drivers/gpu/drm/i915/i915_scheduler.c
> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.c
> > > @@ -40,6 +40,117 @@ bool i915_scheduler_is_enabled(struct drm_device *dev)
> > >           return dev_priv->scheduler != NULL;
> > >   }
> > > 
> > These sort of functions should be in i915_debugfs.c, so that nobody
> > even thinks of using them for other purposes.
> > 
> > > 
> > >   
> > > +const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node)
> > > +{
> > > + static char     str[50];
> > > + char            *ptr = str;
> > > +
> > > + *(ptr++) = node->bumped ? 'B' : '-',
> > > + *(ptr++) = i915_gem_request_completed(node->params.request) ? 'C' : '-';
> > > +
> > Especially this kind of code needs to be in i915_debugfs.c. I'd
> > implement new code more along lines of i915_sseu_status(), but I see
> > this kind of code was previously merged.
> > 
> > > 
> > > + *ptr = 0;
> > > +
> > > + return str;
> > > +}
> > > +
> > > +char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status 
> > > status)
> > > +{
> > > + switch (status) {
> > > + case i915_sqs_none:
> > > + return 'N';
> > > +
> > > + case i915_sqs_queued:
> > > + return 'Q';
> > > +
> > > + case i915_sqs_popped:
> > > + return 'X';
> > > +
> > > + case i915_sqs_flying:
> > > + return 'F';
> > > +
> > > + case i915_sqs_complete:
> > > + return 'C';
> > > +
> > > + case i915_sqs_dead:
> > > + return 'D';
> > > +
> > > + default:
> > > + break;
> > > + }
> > > +
> > Bad indent.
> > 
> > > 
> > > + return '?';
> > > +}
> > > +
> > > +const char *i915_scheduler_queue_status_str(
> > > +                         enum i915_scheduler_queue_status status)
> > > +{
> > > + static char     str[50];
> > > +
> > > + switch (status) {
> > > + case i915_sqs_none:
> > > + return "None";
> > > +
> > > + case i915_sqs_queued:
> > > + return "Queued";
> > > +
> > > + case i915_sqs_popped:
> > > + return "Popped";
> > > +
> > > + case i915_sqs_flying:
> > > + return "Flying";
> > > +
> > > + case i915_sqs_complete:
> > > + return "Complete";
> > > +
> > > + case i915_sqs_dead:
> > > + return "Dead";
> > > +
> > > + default:
> > > + break;
> > > + }
> > > +
> > > + sprintf(str, "[Unknown_%d!]", status);
> > > + return str;
> > > +}
> > > +
> > > +const char *i915_scheduler_flag_str(uint32_t flags)
> > > +{
> > > + static char str[100];
> > > + char *ptr = str;
> > > +
> > > + *ptr = 0;
> > > +
> > > +#define TEST_FLAG(flag, msg)                                             
> > > \
> > > + do {                                                            \
> > > +         if (flags & (flag)) {                                   \
> > > +                 strcpy(ptr, msg);                               \
> > > +                 ptr += strlen(ptr);                             \
> > > +                 flags &= ~(flag);                               \
> > > +         }                                                       \
> > > + } while (0)
> > > +
> > > + TEST_FLAG(i915_sf_interrupts_enabled, "IntOn|");
> > > + TEST_FLAG(i915_sf_submitting,         "Submitting|");
> > > + TEST_FLAG(i915_sf_dump_force,         "DumpForce|");
> > > + TEST_FLAG(i915_sf_dump_details,       "DumpDetails|");
> > > + TEST_FLAG(i915_sf_dump_dependencies,  "DumpDeps|");
> > > + TEST_FLAG(i915_sf_dump_seqno,         "DumpSeqno|");
> > > +
> > > +#undef TEST_FLAG
> > > +
> > > + if (flags) {
> > > +         sprintf(ptr, "Unknown_0x%X!", flags);
> > > +         ptr += strlen(ptr);
> > > + }
> > > +
> > > + if (ptr == str)
> > > +         strcpy(str, "-");
> > > + else
> > > +         ptr[-1] = 0;
> > > +
> > > + return str;
> > > +};
> > > +
> > >   /**
> > >    * i915_scheduler_init - Initialise the scheduler.
> > >    * @dev: DRM device
> > > @@ -1024,6 +1135,193 @@ void i915_scheduler_work_handler(struct 
> > > work_struct *work)
> > >                   i915_scheduler_process_work(ring);
> > >   }
> > >   
> > > +static int i915_scheduler_dump_locked(struct intel_engine_cs *ring,
> > > +                               const char *msg)
> > > +{
> > > + struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > > + struct i915_scheduler *scheduler = dev_priv->scheduler;
> > > + struct i915_scheduler_queue_entry *node;
> > > + int flying = 0, queued = 0, complete = 0, other = 0;
> > > + static int old_flying = -1, old_queued = -1, old_complete = -1;
> > > + bool b_dump;
> > > + char brkt[2] = { '<', '>' };
> > > +
> > > + if (!ring)
> > > +         return -EINVAL;
> > > +
> > > + list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
> > > +         if (I915_SQS_IS_QUEUED(node))
> > > +                 queued++;
> > > +         else if (I915_SQS_IS_FLYING(node))
> > > +                 flying++;
> > > +         else if (I915_SQS_IS_COMPLETE(node))
> > > +                 complete++;
> > > +         else
> > > +                 other++;
> > > + }
> > > +
> > > + b_dump = (flying != old_flying) ||
> > > +          (queued != old_queued) ||
> > > +          (complete != old_complete);
> > > + if (scheduler->flags[ring->id] & i915_sf_dump_force) {
> > > +         if (!b_dump) {
> > > +                 b_dump = true;
> > > +                 brkt[0] = '{';
> > > +                 brkt[1] = '}';
> > > +         }
> > > +
> > > +         scheduler->flags[ring->id] &= ~i915_sf_dump_force;
> > > + }
> > > +
> > > + if (b_dump) {
> > > +         old_flying   = flying;
> > > +         old_queued   = queued;
> > > +         old_complete = complete;
> > > +         DRM_DEBUG_DRIVER("<%s> Q:%02d, F:%02d, C:%02d, O:%02d, "
> > > +                          "Flags = %s, Next = %d:%d %c%s%c\n",
> > > +                          ring->name, queued, flying, complete, other,
> > > +                          
> > > i915_scheduler_flag_str(scheduler->flags[ring->id]),
> > > +                          dev_priv->request_uniq, dev_priv->next_seqno,
> > > +                          brkt[0], msg, brkt[1]);
> > Convert to debugfs, it's the de-facto.
> > 
> > > 
> > > + } else {
> > > +         /*DRM_DEBUG_DRIVER("<%s> Q:%02d, F:%02d, C:%02d, O:%02d"
> > > +                          ", Flags = %s, Next = %d:%d [%s]\n",
> > > +                          ring->name,
> > > +                          queued, flying, complete, other,
> > > +                          
> > > i915_scheduler_flag_str(scheduler->flags[ring->id]),
> > > +                          dev_priv->request_uniq, dev_priv->next_seqno, 
> > > msg); */
> > > +
> > Dead code to be removed from patches.
> > 
> > > 
> > > +         return 0;
> > > + }
> > > +
> > > + if (scheduler->flags[ring->id] & i915_sf_dump_seqno) {
> > > +         uint32_t seqno;
> > > +
> > > +         seqno = ring->get_seqno(ring, true);
> > > +
> > > +         DRM_DEBUG_DRIVER("<%s> Seqno = %d\n", ring->name, seqno);
> > > + }
> > > +
> > > + if (scheduler->flags[ring->id] & i915_sf_dump_details) {
> > > +         int i, deps;
> > > +         uint32_t count, counts[i915_sqs_MAX];
> > > +
> > > +         memset(counts, 0x00, sizeof(counts));
> > > +
> > > +         list_for_each_entry(node, &scheduler->node_queue[ring->id], 
> > > link) {
> > > +                 if (node->status < i915_sqs_MAX) {
> > > +                         count = counts[node->status]++;
> > > +                 } else {
> > > +                         DRM_DEBUG_DRIVER("<%s>   Unknown status: %d!\n",
> > > +                                          ring->name, node->status);
> > > +                         count = -1;
> > > +                 }
> > > +
> > > +                 deps = 0;
> > > +                 for (i = 0; i < node->num_deps; i++)
> > > +                         if (i915_scheduler_is_dependency_valid(node, i))
> > > +                                 deps++;
> > > +
> > > +                 DRM_DEBUG_DRIVER("<%s>   %c:%02d> uniq = %d, seqno"
> > > +                                  " = %d/%s, deps = %d / %d, %s [pri = "
> > > +                                  "%4d]\n", ring->name,
> > > +                                  
> > > i915_scheduler_queue_status_chr(node->status),
> > > +                                  count,
> > > +                                  node->params.request->uniq,
> > > +                                  node->params.request->seqno,
> > > +                                  node->params.ring->name,
> > > +                                  deps, node->num_deps,
> > > +                                  i915_qe_state_str(node),
> > > +                                  node->priority);
> > > +
> > > +                 if ((scheduler->flags[ring->id] & 
> > > i915_sf_dump_dependencies)
> > > +                         == 0)
> > > +                         continue;
> > > +
> > > +                 for (i = 0; i < node->num_deps; i++)
> > > +                         if (node->dep_list[i])
> > > +                                 DRM_DEBUG_DRIVER("<%s>       |-%c:"
> > > +                                         "%02d%c uniq = %d, seqno = 
> > > %d/%s, %s [pri = %4d]\n",
> > > +                                         ring->name,
> > > +                                         
> > > i915_scheduler_queue_status_chr(node->dep_list[i]->status),
> > > +                                         i,
> > > +                                         
> > > i915_scheduler_is_dependency_valid(node, i)
> > > +                                                 ? '>' : '#',
> > > +                                         
> > > node->dep_list[i]->params.request->uniq,
> > > +                                         
> > > node->dep_list[i]->params.request->seqno,
> > > +                                         
> > > node->dep_list[i]->params.ring->name,
> > > +                                         
> > > i915_qe_state_str(node->dep_list[i]),
> > > +                                         node->dep_list[i]->priority);
> > > +         }
> > > + }
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +/**
> > > + * i915_scheduler_dump - dump the scheduler's internal state to the 
> > > debug log.
> > > + * @ring: Ring to dump info for
> > > + * @msg: A reason why it is being dumped
> > > + * For debugging purposes, it can be very useful to see the internal 
> > > state of
> > > + * the scheduler for a given ring.
> > > + */
> > > +int i915_scheduler_dump(struct intel_engine_cs *ring, const char *msg)
> > > +{
> > > + struct drm_i915_private *dev_priv = ring->dev->dev_private;
> > > + struct i915_scheduler *scheduler = dev_priv->scheduler;
> > > + unsigned long flags;
> > > + int ret;
> > > +
> > > + spin_lock_irqsave(&scheduler->lock, flags);
> > > + ret = i915_scheduler_dump_locked(ring, msg);
> > > + spin_unlock_irqrestore(&scheduler->lock, flags);
> > > +
> > > + return ret;
> > > +}
> > > +
> > This kind of stuff especially should be static functions in
> > i915_debugfs.c. And then they don't need kerneldoc either, because we
> > expect no sane invidual to call them.
> > 
> > > 
> > > +static int i915_scheduler_dump_all_locked(struct drm_device *dev,
> > > +                                   const char *msg)
> > > +{
> > > + struct drm_i915_private *dev_priv = dev->dev_private;
> > > + struct i915_scheduler *scheduler = dev_priv->scheduler;
> > > + struct intel_engine_cs *ring;
> > > + int i, r, ret = 0;
> > > +
> > > + for_each_ring(ring, dev_priv, i) {
> > > +         scheduler->flags[ring->id] |= i915_sf_dump_force   |
> > > +                                       i915_sf_dump_details |
> > > +                                       i915_sf_dump_seqno   |
> > > +                                       i915_sf_dump_dependencies;
> > This looks really bad to me, invasive debugging. Rather to be carried
> > as function parameters in the debug functions, and they should be
> > uppercase enum in i915_debugfs.c too.
> > 
> > > 
> > > +         r = i915_scheduler_dump_locked(ring, msg);
> > > +         if (ret == 0)
> > > +                 ret = r;
> > > + }
> > > +
> > > + return ret;
> > > +}
> > > +
> > > +/**
> > > + * i915_scheduler_dump_all - dump the scheduler's internal state to the 
> > > debug
> > > + * log.
> > > + * @dev: DRM device
> > > + * @msg: A reason why it is being dumped
> > > + * For debugging purposes, it can be very useful to see the internal 
> > > state of
> > > + * the scheduler.
> > > + */
> > > +int i915_scheduler_dump_all(struct drm_device *dev, const char *msg)
> > > +{
> > > + struct drm_i915_private *dev_priv = dev->dev_private;
> > > + struct i915_scheduler *scheduler = dev_priv->scheduler;
> > > + unsigned long flags;
> > > + int ret;
> > > +
> > > + spin_lock_irqsave(&scheduler->lock, flags);
> > > + ret = i915_scheduler_dump_all_locked(dev, msg);
> > > + spin_unlock_irqrestore(&scheduler->lock, flags);
> > > +
> > > + return ret;
> > > +}
> > > +
> > >   static int i915_scheduler_submit_max_priority(struct intel_engine_cs 
> > > *ring,
> > >                                                 bool is_locked)
> > >   {
> > > @@ -1246,10 +1544,10 @@ int i915_scheduler_closefile(struct drm_device 
> > > *dev, struct drm_file *file)
> > >                                   continue;
> > >   
> > >                           if (!I915_SQS_IS_COMPLETE(node))
> > > -                         DRM_DEBUG_DRIVER("Closing file handle with 
> > > outstanding work: %d:%d/%d on %s\n",
> > > +                         DRM_DEBUG_DRIVER("Closing file handle with 
> > > outstanding work: %d:%d/%s on %s\n",
> > >                                                    
> > > node->params.request->uniq,
> > >                                                    
> > > node->params.request->seqno,
> > > -                                          node->status,
> > > +                                          i915_qe_state_str(node),
> > If the code is to be called from outside of i915_debugfs.c, it should
> > really be written in a more safe manner than static char, especially as
> > it is not much of an effort to make a table to index.
> > 
> > Regards, Joonas
> > 
> > > 
> > >                                                    ring->name);
> > >   
> > >                           
> > > i915_scheduler_file_queue_dec(node->params.file);
> > > diff --git a/drivers/gpu/drm/i915/i915_scheduler.h 
> > > b/drivers/gpu/drm/i915/i915_scheduler.h
> > > index b78de12..a071ebb 100644
> > > --- a/drivers/gpu/drm/i915/i915_scheduler.h
> > > +++ b/drivers/gpu/drm/i915/i915_scheduler.h
> > > @@ -41,6 +41,9 @@ enum i915_scheduler_queue_status {
> > >           /* Limit value for use with arrays/loops */
> > >           i915_sqs_MAX
> > >   };
> > > +char i915_scheduler_queue_status_chr(enum i915_scheduler_queue_status 
> > > status);
> > > +const char *i915_scheduler_queue_status_str(
> > > +                         enum i915_scheduler_queue_status status);
> > >   
> > >   #define I915_SQS_IS_QUEUED(node)        (((node)->status == 
> > > i915_sqs_queued))
> > >   #define I915_SQS_IS_FLYING(node)        (((node)->status == 
> > > i915_sqs_flying))
> > > @@ -65,6 +68,7 @@ struct i915_scheduler_queue_entry {
> > >           unsigned long                       stamp;
> > >           struct list_head                    link;
> > >   };
> > > +const char *i915_qe_state_str(struct i915_scheduler_queue_entry *node);
> > >   
> > >   struct i915_scheduler {
> > >           struct list_head    node_queue[I915_NUM_RINGS];
> > > @@ -82,9 +86,17 @@ struct i915_scheduler {
> > >   
> > >   /* Flag bits for i915_scheduler::flags */
> > >   enum {
> > > + /* Internal state */
> > >           i915_sf_interrupts_enabled  = (1 << 0),
> > >           i915_sf_submitting          = (1 << 1),
> > > +
> > > + /* Dump/debug flags */
> > > + i915_sf_dump_force          = (1 << 8),
> > > + i915_sf_dump_details        = (1 << 9),
> > > + i915_sf_dump_dependencies   = (1 << 10),
> > > + i915_sf_dump_seqno          = (1 << 11),
> > >   };
> > > +const char *i915_scheduler_flag_str(uint32_t flags);
> > >   
> > >   bool i915_scheduler_is_enabled(struct drm_device *dev);
> > >   int i915_scheduler_init(struct drm_device *dev);
> > > @@ -99,6 +111,9 @@ void i915_scheduler_work_handler(struct work_struct 
> > > *work);
> > >   int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked);
> > >   int i915_scheduler_flush_stamp(struct intel_engine_cs *ring,
> > >                                  unsigned long stamp, bool is_locked);
> > > +int i915_scheduler_dump(struct intel_engine_cs *ring,
> > > +                 const char *msg);
> > > +int i915_scheduler_dump_all(struct drm_device *dev, const char *msg);
> > >   bool i915_scheduler_is_request_tracked(struct drm_i915_gem_request *req,
> > >                                          bool *completed, bool *busy);
> > >   bool i915_scheduler_file_queue_is_full(struct drm_file *file);
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

Reply via email to