Re: [Intel-gfx] [PATCH] drm/i915: Add support for resetting the SO write pointers on gen7.

2012-01-03 Thread Ben Widawsky

On 12/29/2011 05:52 PM, Eric Anholt wrote:

These registers are automatically incremented by the hardware during
transform feedback to track where the next streamed vertex output
should go.  Unlike the previous generation, which had a packet for
setting the corresponding registers to a defined value, gen7 only has
MI_LOAD_REGISTER_IMM to do so.  That's a secure packet (since it loads
an arbitrary register), so we need to do it from the kernel, and it
needs to be settable atomically with the batchbuffer execution so that
two clients doing transform feedback don't stomp on each others'
state.

Instead of building a more complicated interface involcing setting the
registers to a specific value, just set them to 0 when asked and
userland can tweak its pointers accordingly.

Signed-off-by: Eric Anholt
---
  drivers/gpu/drm/i915/i915_dma.c|3 ++
  drivers/gpu/drm/i915/i915_gem_execbuffer.c |   31 
  drivers/gpu/drm/i915/i915_reg.h|6 +
  include/drm/i915_drm.h |4 +++
  4 files changed, 44 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a9ae374..1add685 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -781,6 +781,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RELAXED_DELTA:
value = 1;
break;
+   case I915_PARAM_HAS_GEN7_SOL_RESET:
+   value = 1;
+   break;
default:
DRM_DEBUG_DRIVER("Unknown parameter %d\n",
 param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c 
b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a4e4f3a..126144a 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -971,6 +971,31 @@ i915_gem_execbuffer_retire_commands(struct drm_device *dev,
  }

  static int
+i915_reset_gen7_sol_offsets(struct drm_device *dev,
+   struct intel_ring_buffer *ring)
+{
+   drm_i915_private_t *dev_priv = dev->dev_private;
+   int ret, i;
+
+   if (!IS_GEN7(dev) || ring !=&dev_priv->ring[RCS])
+   return 0;
+
+   ret = intel_ring_begin(ring, 4 * 3);
+   if (ret)
+   return ret;
+
+   for (i = 0; i<  4; i++) {
+   intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
+   intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
+   intel_ring_emit(ring, 0);
+   }
+
+   intel_ring_advance(ring);
+
+   return 0;
+}
+
+static int
  i915_gem_do_execbuffer(struct drm_device *dev, void *data,
   struct drm_file *file,
   struct drm_i915_gem_execbuffer2 *args,
@@ -1182,6 +1207,12 @@ i915_gem_do_execbuffer(struct drm_device *dev, void 
*data,
goto err;
}

+   if (args->flags&  I915_EXEC_GEN7_SOL_RESET) {
+   ret = i915_reset_gen7_sol_offsets(dev, ring);
+   if (ret)
+   goto err;
+   }
+
trace_i915_gem_ring_dispatch(ring, seqno);

exec_start = batch_obj->gtt_offset + args->batch_start_offset;
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 9d15474..54a18a4 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3583,4 +3583,10 @@
  #define GEN7_AUD_CNTRL_ST_A   0xE50B4
  #define GEN7_AUD_CNTRL_ST20xE50C0

+/* These are the 4 32-bit write offset registers for each stream
+ * output buffer.  It determines the offset from the
+ * 3DSTATE_SO_BUFFERs that the next streamed vertex output goes to.
+ */
+#define GEN7_SO_WRITE_OFFSET(n)(0x5280 + (n) * 4)
+
  #endif /* _I915_REG_H_ */
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 28c0d11..5bb6a6a 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -291,6 +291,7 @@ typedef struct drm_i915_irq_wait {
  #define I915_PARAM_HAS_COHERENT_RINGS  13
  #define I915_PARAM_HAS_EXEC_CONSTANTS  14
  #define I915_PARAM_HAS_RELAXED_DELTA   15
+#define I915_PARAM_HAS_GEN7_SOL_RESET   16

  typedef struct drm_i915_getparam {
int param;
@@ -653,6 +654,9 @@ struct drm_i915_gem_execbuffer2 {
__u64 rsvd2;
  };

+/** Resets the SO write offset registers for transform feedback on gen7. */
+#define I915_EXEC_GEN7_SOL_RESET   (1<<8)
+
  struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;


Is this something we want to carry long term wrt ABI. I really want to 
get context/ppgtt stuff out the door relatively soon (context should be 
ready to test really soon actually).


I'm totally unfamiliar with this register, but is this what you want the 
interface to be permanently?


Ben
___
Intel-gfx mailing list
Intel-gfx@li

[Intel-gfx] [PATCH] intel: Update for new i915_drm.h defines.

2012-01-03 Thread Eric Anholt
---

This syncs the code up to the kernel as of the gen7 SOL changes.  It
would be nice if doing this was just a straight copy of the kernel
code -- there are two diffs left out.  One is this hunk:

-#ifdef __KERNEL__
-/* For use by IPS driver */
-extern unsigned long i915_read_mch_val(void);
-extern bool i915_gpu_raise(void);
-extern bool i915_gpu_lower(void);
-extern bool i915_gpu_busy(void);
-extern bool i915_gpu_turbo_disable(void);
-#endif

Does anyone see any real reason to be dropping this?

The other is removing the sparse __user annotations on the structs.
Could we just patch the kernel to #define it away for userland, so we
could make updating this file just a matter of "cp"?

 include/drm/i915_drm.h |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index adc2392..846897c 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -228,7 +228,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_GEM_GET_APERTUREDRM_IOR  (DRM_COMMAND_BASE + 
DRM_I915_GEM_GET_APERTURE, struct drm_i915_gem_get_aperture)
 #define DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GET_PIPE_FROM_CRTC_ID, struct drm_i915_get_pipe_from_crtc_id)
 #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
-#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE   DRM_IOW(DRM_COMMAND_BASE + 
DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
+#define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE   DRM_IOW(DRM_COMMAND_BASE + 
DRM_I915_OVERLAY_PUT_IMAGE, struct drm_intel_overlay_put_image)
 #define DRM_IOCTL_I915_OVERLAY_ATTRS   DRM_IOWR(DRM_COMMAND_BASE + 
DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
 
 /* Allow drivers to submit batchbuffers directly to hardware, relying
@@ -282,6 +282,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_COHERENT_RINGS   13
 #define I915_PARAM_HAS_EXEC_CONSTANTS   14
 #define I915_PARAM_HAS_RELAXED_DELTA15
+#define I915_PARAM_HAS_GEN7_SOL_RESET   16
 
 typedef struct drm_i915_getparam {
int param;
@@ -644,6 +645,9 @@ struct drm_i915_gem_execbuffer2 {
__u64 rsvd2;
 };
 
+/** Resets the SO write offset registers for transform feedback on gen7. */
+#define I915_EXEC_GEN7_SOL_RESET   (1<<8)
+
 struct drm_i915_gem_pin {
/** Handle of the buffer to be pinned. */
__u32 handle;
-- 
1.7.7.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] intel: Add regression tests for batch decode.

2012-01-03 Thread Eric Anholt
On Wed, 4 Jan 2012 00:24:28 +0100, Daniel Vetter  wrote:
> On Tue, Jan 03, 2012 at 03:05:26PM -0800, Eric Anholt wrote:
> > The .batch was generated using the dump-a-batch branch of
> > 
> > git://people.freedesktop.org/~anholt/mesa
> > 
> > using glxgears on gen7 hardware, using INTEL_DEVID_OVERRIDE for
> > non-gen7 (this means that offsets in the buffers for non-gen7 are 0!).
> > The .ref was generated by:
> > 
> > ./test_decode tests/gen7-3d.batch -dump.
> > 
> > The .sh exists because you can't supply arguments to tests using the
> > simple automake tests driver.  Something reasonable could be done
> > using automake's parallel-tests driver (in fact, a previous version of
> > the patch did that), but I was concerned that:
> > 
> > 1) The parallel-tests driver is documented to be unstable -- they may
> >change interfaces on us later.
> > 2) The parallel-tests driver hides the output of tests in .log files
> >scattered all over the tree, which was ugly and more painful to
> >work with.
> > ---
> >  intel/Makefile.am |   17 +
> >  intel/tests/gen7-3d.batch |  Bin 0 -> 4504 bytes
> >  intel/tests/gen7-3d.batch-ref.txt | 1350 
> > +
> 
> I'm missing the *.batch* stuff for gen4-gen6 mentioned in the Makefile.am.
> From looking throught the code I've also expected the *.batch to be a
> symlink to test-batch.sh.

Failure to git add.  Fixed locally.


pgpO6FKsCf0fD.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH] drm/i915: Add support for resetting the SO write pointers on gen7.

2012-01-03 Thread Eric Anholt
On Mon, 2 Jan 2012 13:04:37 -0200, Eugeni Dodonov  wrote:
> On Thu, Dec 29, 2011 at 23:52, Eric Anholt  wrote:
> 
> > These registers are automatically incremented by the hardware during
> > transform feedback to track where the next streamed vertex output
> > should go.  Unlike the previous generation, which had a packet for
> > setting the corresponding registers to a defined value, gen7 only has
> > MI_LOAD_REGISTER_IMM to do so.  That's a secure packet (since it loads
> > an arbitrary register), so we need to do it from the kernel, and it
> > needs to be settable atomically with the batchbuffer execution so that
> > two clients doing transform feedback don't stomp on each others'
> > state.
> >
> > Instead of building a more complicated interface involcing setting the
> > registers to a specific value, just set them to 0 when asked and
> > userland can tweak its pointers accordingly.

> > diff --git a/drivers/gpu/drm/i915/i915_dma.c
> > b/drivers/gpu/drm/i915/i915_dma.c
> > index a9ae374..1add685 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -781,6 +781,9 @@ static int i915_getparam(struct drm_device *dev, void
> > *data,
> >case I915_PARAM_HAS_RELAXED_DELTA:
> >value = 1;
> >break;
> > +   case I915_PARAM_HAS_GEN7_SOL_RESET:
> > +   value = 1;
> >
> 
> Wouldn't it be better to have:
>value = IS_GEN7(dev);
> 
> as it is gen7+-specific item. This way, userspace could check for this
> support early, and avoid setting the flag on the batchbuffer in vain on
> pre-gen7 architectures.
> 
> Either way, it will work, so:
> 
> Reviewed-by: Eugeni Dodonov 

The flag only gets set by userland in the gen7 code, so it wouldn't
change anything.


pgpAW9GPx0rPu.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [RFC] [PATCH] drm/i915: Busy-wait for the seqno to update after an IRQ

2012-01-03 Thread Chris Wilson
One issue we face is where the chipset is not yet as coherent as it
should be, and the interrupt arrives before the seqno is written into
the CPU cache. (NB, this is usually due to a missing bit of register
setup and should be resolved in the future.) In the meantime, in order
to begin performance testing, albeit at the cost of extra power
consumption, enable the driver to spin upon the seqno after the irq
arrives.

Signed-off-by: Chris Wilson 
---

An experimental patch to sketch out Keith's variation of spinning only
upon receipt of the interrupt. Please review and test.
-Chris

---
 drivers/gpu/drm/i915/i915_drv.c |2 +-
 drivers/gpu/drm/i915/i915_drv.h |1 +
 drivers/gpu/drm/i915/i915_irq.c |3 ++
 drivers/gpu/drm/i915/intel_ringbuffer.c |   33 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h |1 +
 5 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 36e2938..7d6f078 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -107,7 +107,7 @@ unsigned int i915_irq_notify __read_mostly = 
I915_IRQ_NOTIFY_INTERRUPT;
 module_param_named(irq_notify, i915_irq_notify, int, 0600);
 MODULE_PARM_DESC(irq_notify,
"Choose the request notification method, can be any "
-   "combination of irq (0x1) or polling (0x2). "
+   "combination of irq (0x1), polling (0x2) or spinning-on-irq 
(0x4). "
"WARNING: Selecting no method will result in busy-waits. "
"(default: is to use irq only)");
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 49fa8e9..96a7e00 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1009,6 +1009,7 @@ extern bool i915_enable_hangcheck __read_mostly;
 extern unsigned int i915_irq_notify __read_mostly;
 #define I915_IRQ_NOTIFY_INTERRUPT  0x1
 #define I915_IRQ_NOTIFY_POLL   0x2
+#define I915_IRQ_NOTIFY_SPIN   0x4
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index b52bbf0..50f95fd 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -370,6 +370,9 @@ static void notify_ring(struct drm_device *dev,
  jiffies +
  msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
}
+
+   if (ring->irq_notify & I915_IRQ_NOTIFY_SPIN)
+   queue_work(dev_priv->wq, &ring->irq_work);
 }
 
 static void gen6_pm_rps_work(struct work_struct *work)
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c 
b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 5b8db53..b653d76 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -261,6 +261,25 @@ u32 intel_ring_get_active_head(struct intel_ring_buffer 
*ring)
return I915_READ(acthd_reg);
 }
 
+static void intel_ring_irq_work(struct work_struct *work)
+{
+   struct intel_ring_buffer *ring =
+   container_of(work, struct intel_ring_buffer, irq_work);
+   int spin = 100;
+
+   do {
+   /* XXX spot the missing memory barriers */
+   u32 seqno = ring->waiting_seqno;
+   if (seqno == 0)
+   break;
+
+   if (i915_seqno_passed(ring->get_seqno(ring), seqno)) {
+   wake_up_all(&ring->irq_queue);
+   break;
+   }
+   } while (--spin && ring->irq_notify);
+}
+
 static void intel_ring_irq_elapsed(unsigned long data)
 {
struct intel_ring_buffer *ring = (struct intel_ring_buffer *)data;
@@ -335,6 +354,8 @@ static int init_ring_common(struct intel_ring_buffer *ring)
 
setup_timer(&ring->irq_timer, intel_ring_irq_elapsed,
(unsigned long) ring);
+   INIT_WORK(&ring->irq_work, intel_ring_irq_work);
+
return 0;
 }
 
@@ -1566,10 +1587,18 @@ void intel_ring_put_irq(struct intel_ring_buffer *ring)
 {
spin_lock(&ring->irq_lock);
if (--ring->irq_refcount == 0) {
-   if (ring->irq_notify & I915_IRQ_NOTIFY_POLL)
+   unsigned irq_notify;
+
+   irq_notify = ring->irq_notify;
+   ring->irq_notify = 0;
+
+   if (irq_notify & I915_IRQ_NOTIFY_SPIN)
+   cancel_work_sync(&ring->irq_work);
+
+   if (irq_notify & I915_IRQ_NOTIFY_POLL)
del_timer(&ring->irq_timer);
 
-   if (ring->irq_notify & I915_IRQ_NOTIFY_INTERRUPT)
+   if (irq_notify & I915_IRQ_NOTIFY_INTERRUPT)
ring->irq_put(ring);
}
spin_unlock(&ring->irq_lock);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
b/drivers/gpu/drm/i915/intel_ringbuffer.h
inde

Re: [Intel-gfx] gen7 missed IRQ workaround series.

2012-01-03 Thread Keith Packard
On Wed, 4 Jan 2012 00:04:18 +0100, Daniel Vetter  wrote:

> I kinda prefer Chris' approach of sticking with irqs, but backing it up
> with a timer in the msec range. Can't find that patch though atm (iirc
> it's in bugzilla somewhere).

I think we've resolved that the interrupt arrives, but that it is not
serialized wrt the memory write. So, what I'd love to see is whether
waiting for the irq and then busy looping for 'a while' works. This
would allow for long-running operations to idle the CPU and then hit the
interrupt and spin until the memory write is recognized.

Any solution which can leave operations unacknowledged for 'ms'
timeframes seems like a potential for serious performance
problems. Eric's simple spinning approach seems fine for the BLT ring
which we don't use that often; something more complicated and yet not
entirely timer-based might be more efficient for longer-running operations.

-- 
keith.pack...@intel.com


pgpCYfLNh5Ipj.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Keith Packard
On Tue, 3 Jan 2012 22:49:52 +0100, Daniel Vetter  wrote:

> Nope, current hangcheck blows up, and we have an i-g-t testcase for it
> (which the commit msg clearly states). There are also numerous bug
> reports where a dying gpu results in tons of
> WARN_ON(!mutex_locked(dev->struct_mutex)) noise in dmesg (which drowns
> out the gpu hang warning). The locking change fixes this.

Ah, ok, that makes sense. Of course, hangcheck *could* have just taken
struct_mutex were it run in a suitable context.

> The patch adds the required locking to i915_reset.

No, the spinlock protects the forcewake_count access and not the actual
register access, which leaves all kinds of potential for races in
threads not also holding struct_mutex while accessing registers.

If you want a spinlock to protect the register access, it must surround
the whole operation.

-- 
keith.pack...@intel.com


pgpgrBG3ePrUe.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 3/3] intel: Add regression tests for batch decode.

2012-01-03 Thread Daniel Vetter
On Tue, Jan 03, 2012 at 03:05:26PM -0800, Eric Anholt wrote:
> The .batch was generated using the dump-a-batch branch of
> 
> git://people.freedesktop.org/~anholt/mesa
> 
> using glxgears on gen7 hardware, using INTEL_DEVID_OVERRIDE for
> non-gen7 (this means that offsets in the buffers for non-gen7 are 0!).
> The .ref was generated by:
> 
> ./test_decode tests/gen7-3d.batch -dump.
> 
> The .sh exists because you can't supply arguments to tests using the
> simple automake tests driver.  Something reasonable could be done
> using automake's parallel-tests driver (in fact, a previous version of
> the patch did that), but I was concerned that:
> 
> 1) The parallel-tests driver is documented to be unstable -- they may
>change interfaces on us later.
> 2) The parallel-tests driver hides the output of tests in .log files
>scattered all over the tree, which was ugly and more painful to
>work with.
> ---
>  intel/Makefile.am |   17 +
>  intel/tests/gen7-3d.batch |  Bin 0 -> 4504 bytes
>  intel/tests/gen7-3d.batch-ref.txt | 1350 
> +

I'm missing the *.batch* stuff for gen4-gen6 mentioned in the Makefile.am.
>From looking throught the code I've also expected the *.batch to be a
symlink to test-batch.sh.

Otherwise this looks nice. I'm not really sold on this catching bugs, but
it certainly helps in self-documenting patches to the decoder by including
the relevant output changes in the diff.
-Daniel
-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 2/3] intel: Add a regression test program for intel_decode.c.

2012-01-03 Thread Eric Anholt
---
 configure.ac|2 +
 intel/.gitignore|1 +
 intel/Makefile.am   |5 ++
 intel/test_decode.c |  190 +++
 4 files changed, 198 insertions(+), 0 deletions(-)
 create mode 100644 intel/.gitignore
 create mode 100644 intel/test_decode.c

diff --git a/configure.ac b/configure.ac
index 5f144bc..540622f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -114,6 +114,8 @@ AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=],
  [AC_MSG_ERROR([Couldn't find clock_gettime])])])
 AC_SUBST([CLOCK_LIB])
 
+AC_CHECK_FUNCS([open_memstream], [HAVE_OPEN_MEMSTREAM=yes])
+
 dnl Use lots of warning flags with with gcc and compatible compilers
 
 dnl Note: if you change the following variable, the cache is automatically
diff --git a/intel/.gitignore b/intel/.gitignore
new file mode 100644
index 000..528b408
--- /dev/null
+++ b/intel/.gitignore
@@ -0,0 +1 @@
+test_decode
diff --git a/intel/Makefile.am b/intel/Makefile.am
index 801210f..2d3d8c4 100644
--- a/intel/Makefile.am
+++ b/intel/Makefile.am
@@ -54,4 +54,9 @@ libdrm_intelincludedir = ${includedir}/libdrm
 libdrm_intelinclude_HEADERS = intel_bufmgr.h \
  intel_debug.h
 
+# This may be interesting even outside of "make check", due to the -dump 
option.
+noinst_PROGRAMS = test_decode
+
+test_decode_LDADD = libdrm_intel.la
+
 pkgconfig_DATA = libdrm_intel.pc
diff --git a/intel/test_decode.c b/intel/test_decode.c
new file mode 100644
index 000..d41b0b2
--- /dev/null
+++ b/intel/test_decode.c
@@ -0,0 +1,190 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "config.h"
+#include "intel_bufmgr.h"
+#include "intel_chipset.h"
+
+#define HW_OFFSET 0x1230
+
+static void
+usage(void)
+{
+   fprintf(stderr, "usage:\n");
+   fprintf(stderr, "  test_decode \n");
+   fprintf(stderr, "  test_decode  -dump\n");
+   exit(1);
+}
+
+static void
+read_file(const char *filename, void **ptr, size_t *size)
+{
+   int fd, ret;
+   struct stat st;
+
+   fd = open(filename, O_RDONLY);
+   if (fd == -1)
+   errx(1, "couldn't open `%s'", filename);
+
+   ret = fstat(fd, &st);
+   if (ret)
+   errx(1, "couldn't stat `%s'", filename);
+
+   *size = st.st_size;
+   *ptr = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+   if (*ptr == MAP_FAILED)
+   errx(1, "couldn't map `%s'", filename);
+
+   close(fd);
+}
+
+static void
+dump_batch(struct drm_intel_decode *ctx, const char *batch_filename)
+{
+   void *batch_ptr;
+   size_t batch_size;
+
+   read_file(batch_filename, &batch_ptr, &batch_size);
+
+   drm_intel_decode_set_batch_pointer(ctx, batch_ptr, HW_OFFSET,
+  batch_size / 4);
+   drm_intel_decode_set_output_file(ctx, stdout);
+
+   drm_intel_decode(ctx);
+}
+
+static void
+compare_batch(struct drm_intel_decode *ctx, const char *batch_filename)
+{
+   FILE *out = NULL;
+   void *ptr, *ref_ptr, *batch_ptr;
+   size_t size, ref_size, batch_size;
+   const char *ref_suffix = "-ref.txt";
+   char *ref_filename;
+
+   ref_filename = malloc(strlen(batch_filename) + strlen(ref_suffix) + 1);
+   sprintf(ref_filename, "%s%s", batch_filename, ref_suffix);
+
+   /* Read the batch and reference. */
+   read_file(batch_filename, &batch_ptr, &batch_size);
+   read_file(ref_filename, &ref_ptr, &ref_size);
+
+   /* Set up our decode output in memory, because I don't want to
+* figure out how to output to a file in a safe and sane way
+* inside of an automake project's test infrastructure.
+*/
+#ifdef HAVE_OPEN_MEMSTREAM
+   out = open_memstream(

[Intel-gfx] intel-decode regression testing

2012-01-03 Thread Eric Anholt
I tried to make this as easy as possible for updating the decode
output -- it should be a matter of looking at the diff produced by
make check, approving of it, and copying the -new.txt over the
-ref.txt.

Tested with make check and make distcheck, including with corrupting a
ref file.

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] [PATCH 1/3] intel: Add an interface for setting the output file for decode.

2012-01-03 Thread Eric Anholt
Consumers often want to choose stdout vs stderr, and for testing I
want to output to an open_memstream file.
---
 intel/intel_bufmgr.h |2 ++
 intel/intel_decode.c |   14 --
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 68017a5..85da8b9 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -34,6 +34,7 @@
 #ifndef INTEL_BUFMGR_H
 #define INTEL_BUFMGR_H
 
+#include 
 #include 
 
 struct drm_clip_rect;
@@ -199,6 +200,7 @@ void drm_intel_decode_set_dump_past_end(struct 
drm_intel_decode *ctx,
int dump_past_end);
 void drm_intel_decode_set_head_tail(struct drm_intel_decode *ctx,
uint32_t head, uint32_t tail);
+void drm_intel_decode_set_output_file(struct drm_intel_decode *ctx, FILE *out);
 void drm_intel_decode(struct drm_intel_decode *ctx);
 
 
diff --git a/intel/intel_decode.c b/intel/intel_decode.c
index e80e840..81ef712 100644
--- a/intel/intel_decode.c
+++ b/intel/intel_decode.c
@@ -33,6 +33,9 @@
 
 /* Struct for tracking drm_intel_decode state. */
 struct drm_intel_decode {
+   /** stdio file where the output should land.  Defaults to stdout. */
+   FILE *out;
+
/** PCI device ID. */
uint32_t devid;
 
@@ -3558,6 +3561,7 @@ drm_intel_decode_context_alloc(uint32_t devid)
return NULL;
 
ctx->devid = devid;
+   ctx->out = stdout;
 
return ctx;
 }
@@ -3592,6 +3596,13 @@ drm_intel_decode_set_head_tail(struct drm_intel_decode 
*ctx,
ctx->tail = tail;
 }
 
+void
+drm_intel_decode_set_output_file(struct drm_intel_decode *ctx,
+FILE *out)
+{
+   ctx->out = out;
+}
+
 /**
  * Decodes an i830-i915 batch buffer, writing the output to stdout.
  *
@@ -3618,12 +3629,11 @@ drm_intel_decode(struct drm_intel_decode *ctx)
devid = ctx->devid;
head_offset = ctx->head;
tail_offset = ctx->tail;
+   out = ctx->out;
 
saved_s2_set = 0;
saved_s4_set = 1;
 
-   out = stdout;
-
while (index < count) {
switch ((data[index] & 0xe000) >> 29) {
case 0x0:
-- 
1.7.7.3

___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] gen7 missed IRQ workaround series.

2012-01-03 Thread Daniel Vetter
On Thu, Dec 22, 2011 at 02:54:58PM -0800, Eric Anholt wrote:
> This is the minimal patchset I have for working around the missed
> IRQs.  I've been running it since Monday doing test runs for other
> work, and it appears to be stable.

I'm a bit late to the party with two things:
- The bsd ring is similarly broken and
- we can easily wait for a few msec (probably not when running piglit
  though) here, so busylooping is imo not appropriate.

I kinda prefer Chris' approach of sticking with irqs, but backing it up
with a timer in the msec range. Can't find that patch though atm (iirc
it's in bugzilla somewhere).

Cheers, Daniel
-- 
Daniel Vetter
Mail: dan...@ffwll.ch
Mobile: +41 (0)79 365 57 48
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] The latest status of intel graphics on kernel 3.2

2012-01-03 Thread Jesse Barnes
On Sat, 31 Dec 2011 08:41:34 +
"Sun, Yi"  wrote:

> The platforms we covered are IvyBridge, SandyBridge, IronLake, PineView and 
> GM965
> The version of Kernel: 
> (drm-intel-next)64a742fac3a22f57303d8f1b7e347350a1c48254
> 
> General speaking, the issue is mainly related to 3 pipe and eDP on IvyBridge.
> 
> 4 new bugs are filed:
> Bug 44250 - 
> [IVB]"[drm:intel_dsm_platform_mux_info] *ERROR* MUX INFO call failed" while 
> booting with monitor
> Bug 44304 - [ivb 3pipe] 
> "*ERROR* failed to set mode on [CRTC:5]" while plug in the 3th monitor
> Bug 44305 - [IVB]The Edp 
> can't work while booting with a monitor
> Bug 44309 - [IVB eDP] 3 
> pipe doesn't work with eDP monitor

> Old bugs which still exists or just need code merge:
> Bug 42263 - [ILK] Plug in 
> a monitor will make the eDP black screen
> 41976 [IVB] screen turn 
> to be black while switching between console and x-window with 3-pipe active
> 41917 [IVB] all three 
> screens are frozen while starting gnome-session with 3-pipe active
> 42731 [IVB] The whole 
> monitor connected to DP port is black screen while hotplugin VGA

Just to confirm, these are fixed by
 [PATCH 1/2] drm/i915: don't disable a PCH DPLL that's in use
and 
 [PATCH 2/2] drm/i915: only set the intel_crtc DPMS mode to on if the
   mode set succeeded

right?  Or are there other patches needed as well?  Can you reply to
them with your tested-by so Keith can pick them up?

Thanks,
-- 
Jesse Barnes, Intel Open Source Technology Center


signature.asc
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Chris Wilson
On Tue, 03 Jan 2012 13:49:36 -0800, Ben Widawsky  wrote:
> The atomic ops stuff was simply there to help reduce the races (even if
> we don't have the lock, we can still safely increment the variable). It
> should be safe to get rid of with the spinlock in place.
> 
> My only gripe here is Chris shot down my earlier version of this patch
> many moons ago :(

The other way of tackling it would be not to take the forcewake during
hangcheck at all, and engineer the hangcheck not to rely on the
ring reads. For example, use seqno as the primary activity monitor,
which only leaves the case of trying not to fire spuriously during a
long batchbuffer. To counter that, you could optimistically do a raw
read of ACTHD or simply rely on long timeouts. Any error recover should
be moved to the error handling workqueue, so that we never attempt to
write a register or modify the stuct without the struct_mutex.

Reducing the granularity of struct_mutex and solving the contention
with mode_config.lock over register access is the ultimate goal when
reviewing the locking mess.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Daniel Vetter
On Tue, Jan 3, 2012 at 22:13, Keith Packard  wrote:
> On Tue, 3 Jan 2012 20:12:35 +0100, Daniel Vetter  
> wrote:
>
>> I'm a bit confused by this. With the current code forcewake is
>> protected by dev->struct_mutex. Which doesn't work out because we need
>> to access registers that require the forcewake dance from non-process
>> context.
>
> Right, I like adding a spinlock around this to allow it to be called
> without needing to be able to lock the struct_mutex. (I remember
> suggesting that a spinlock would be necessary when the force wake code
> first showed up...)
>
> However, the commit message talks about the error capture and
> hang check code, but doesn't appear to change them at all.
>
> I think all this patch does is replace the locking for forcewake_count
> From struct_mutex to a new irq-safe spinlock, the commit message makes
> it sound like it's actually fixing stuff, which it isn't; it just
> enables fixing stuff in future patches, right?

Nope, current hangcheck blows up, and we have an i-g-t testcase for it
(which the commit msg clearly states). There are also numerous bug
reports where a dying gpu results in tons of
WARN_ON(!mutex_locked(dev->struct_mutex)) noise in dmesg (which drowns
out the gpu hang warning). The locking change fixes this.

> Reading through this a bit more, I think your patch opens up a hole in
> i915_reset. i915_reset takes struct_mutex, then resets the chip and
> restores the forcewake status. If we aren't relying on struct_mutex to
> protect the forcewake bits, then there's nothing preventing a thread
> From accessing the registers with the chip sleeping between the reset
> and the force wake reset.

The patch adds the required locking to i915_reset.

>> Afaik the atomic ops stuff is just ducttape for paranoia reasons.
>
> The atomic ops stuff would allow reading of the value without holding
> struct_mutex, if that were actually useful.

... but is currently unused and inherently racy. Which is why the
patch drops it.
-- 
Daniel Vetter
daniel.vet...@ffwll.ch - +41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Ben Widawsky
On 01/03/2012 01:13 PM, Keith Packard wrote:
> On Tue, 3 Jan 2012 20:12:35 +0100, Daniel Vetter  
> wrote:
> 
>> I'm a bit confused by this. With the current code forcewake is
>> protected by dev->struct_mutex. Which doesn't work out because we need
>> to access registers that require the forcewake dance from non-process
>> context.
> 
> Right, I like adding a spinlock around this to allow it to be called
> without needing to be able to lock the struct_mutex. (I remember
> suggesting that a spinlock would be necessary when the force wake code
> first showed up...)
> 
> However, the commit message talks about the error capture and
> hang check code, but doesn't appear to change them at all.
> 
> I think all this patch does is replace the locking for forcewake_count
> From struct_mutex to a new irq-safe spinlock, the commit message makes
> it sound like it's actually fixing stuff, which it isn't; it just
> enables fixing stuff in future patches, right?

As Daniel mentioned in the commit message, it fixes existing bugs simply
by using a spinlock. In the timer, we do not grab struct_mutex and there
is currently a race there (which we've known about since day 1).

>> Afaik the atomic ops stuff is just ducttape for paranoia reasons.
> 
> The atomic ops stuff would allow reading of the value without holding
> struct_mutex, if that were actually useful.

The atomic ops stuff was simply there to help reduce the races (even if
we don't have the lock, we can still safely increment the variable). It
should be safe to get rid of with the spinlock in place.

My only gripe here is Chris shot down my earlier version of this patch
many moons ago :(

Ben
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 0/1] uxa/glamor: Route some drawing function to glamor.

2012-01-03 Thread Chris Wilson
On Sat, 31 Dec 2011 21:18:14 +0800, zhigang.g...@linux.intel.com wrote:
> From: Zhigang Gong 
> 
> I agree to pending the last patch which is to create glamor pixmap by default.
> This patch is based on the other 3 patch. After apply this patch, and use
> the latest glamor.

I've pushed those 4 patches (all bar to use
intel_glamor_create_textured_pixmap by default). I've a new segfault for
you in glamor_core.c::glamor_validate_gc(), line 425 as pixmap_priv is
NULL, which I only hit just as I thought I had completed testing the
code.

Afaics, the remaining big topics for the ddx are how to enable glx and
integration between glamor/uxa render paths, right?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Keith Packard
On Tue, 3 Jan 2012 20:12:35 +0100, Daniel Vetter  wrote:

> I'm a bit confused by this. With the current code forcewake is
> protected by dev->struct_mutex. Which doesn't work out because we need
> to access registers that require the forcewake dance from non-process
> context.

Right, I like adding a spinlock around this to allow it to be called
without needing to be able to lock the struct_mutex. (I remember
suggesting that a spinlock would be necessary when the force wake code
first showed up...)

However, the commit message talks about the error capture and
hang check code, but doesn't appear to change them at all.

I think all this patch does is replace the locking for forcewake_count
From struct_mutex to a new irq-safe spinlock, the commit message makes
it sound like it's actually fixing stuff, which it isn't; it just
enables fixing stuff in future patches, right?

Reading through this a bit more, I think your patch opens up a hole in
i915_reset. i915_reset takes struct_mutex, then resets the chip and
restores the forcewake status. If we aren't relying on struct_mutex to
protect the forcewake bits, then there's nothing preventing a thread
From accessing the registers with the chip sleeping between the reset
and the force wake reset.

> Afaik the atomic ops stuff is just ducttape for paranoia reasons.

The atomic ops stuff would allow reading of the value without holding
struct_mutex, if that were actually useful.

-- 
keith.pack...@intel.com


pgpXJ9jHvesmI.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


[Intel-gfx] Problem Intel i915 driver, i3 2010T, HDMI output modes problems

2012-01-03 Thread paulo louro

Dear intel-gfx developers.
After spending a huge amount of time searching for a solution for me problem 
and haven't been able to find one, i decided to send a email to this mailing 
list hoping someone can help me or point me on the right direction. 
Hardware: Motherboard : ASRock H67M-GEProcessor:   Intel Core i3 2010T AV 
Receiver: Onkyo TX-NR808TV: Panasonic 50VT20
Software: Ubuntu 11.10 with xorg-edgers ppa.
Problem: When starting ubuntu without the AV receiver or the TV being on, the 
xorg start with a resolution of 720x576. When turning the TV on and selecting 
the AV-Receiver. The AV receiver reports that there is a signal but noting is 
display on the TV, by using my receiver Display/Information menu i can see im 
receiving a signal with 1920x1080i@120hz. The receiver pass's this direct to 
the television and ofc since its 120hz it cant be handled. As so only a black 
screen is display, not even the Onkyo GUI can be displayed.
By running the xrandr -display :0 --verbose i get:HDMI3 connected 1920x1080+0+0 
(0x42) normal (normal left inverted right x axis y axis) 708mm x 398mm  
Identifier: 0x41Timestamp:  111744  Subpixel:   unknown Gamma:  
1.0:1.0:1.0 Brightness: 1.0 Clones: CRTC:   0   CRTCs:  0 1 
Transform:  1.00 0.00 0.00  0.00 1.00 0.00  
0.00 0.00 1.00 filter:  EDID:   
00003dcb820a
0014010380780a0dc9a057479827
12484c0001010101010101010101
010101010101011d8018711c1620582c
2500c48e219e011d80d0721c1620
102c2580c48e219e00fc0054
582d4e523830380a2020202000fd
0017f00f7e11000a202020202020016c
02033e7255850403020e0f0723241094
1312111d1e162526011f38097f070f7f
071707503f06c04d02005706005f7e01
675400834f66030c002100808c0a
d08a20e02d10103e9600c48e2118
8c0ad090204031200c405500c48e2100
0018011d007251d01e206e285500c48e
211e0091Broadcast RGB:  Full
supported: Full Limited 16:2audio:  autosupported: off  
auto on1920x1080@60 (0x42)  148.5MHz +HSync +VSync 
*current +preferredh: width  1920 start 2008 end 2052 total 2200 skew   
 0 clock   67.5KHzv: height 1080 start 1084 end 1089 total 1125 
  clock   60.0Hz  720x576 (0x43)   27.0MHz -HSync -VSynch: width   720 
start  732 end  796 total  864 skew0 clock   31.2KHzv: height  576 
start  581 end  586 total  625   clock   50.0Hz  720x480 (0x44)   
27.0MHz -HSync -VSynch: width   720 start  736 end  798 total  858 skew 
   0 clock   31.5KHzv: height  480 start  489 end  495 total  525   
clock   59.9Hz
There i can see that the modeline is current selected to 1920x1080@60hz.  If i 
select the mode as 720x576@50hz, them gnome desktop shows but its like the 
screen has 2 desktops splitting the screen in half. If i move the mouse to the 
top of the screen i can see it in the top and lower part of my TV.  The funny 
part is my TV and Receiver now report a signal of 1920x1080@50hz.
Once i change to this mode my xrandr -display :0 --verbose shows 2 new modes 
that weren't there before:
Screen 0: minimum 320 x 200, current 720 x 576, maximum 8192 x 8192HDMI3 
connected 720x576+0+0 (0x43) normal (normal left inverted right x axis y axis) 
698mm x 392mm   Identifier: 0x41Timestamp:  277649  Subpixel:   
unknown Gamma:  1.0:1.0:1.0 Brightness: 1.0 Clones: CRTC:   
0   CRTCs:  0 1 Transform:  1.00 0.00 0.00  
0.00 1.00 0.00  0.00 0.00 1.00 
filter:  EDID:   00003dcb820a
0014010380780adaffa3584aa229
17494b0001010101010101010101
010101010101023a80d072382d40102c
4580ba88211e023a801871382d40
582c4500ba88211e00fc0054
582d4e523830380a2020202000fd
00173d0f440f000a202020202020013e
020362725c9f90140520130412031102
16071506011e0f1d0e1a0b190a262425
2338097f070f7f071707503f06c04d02
005706005f7e01675400834f7f03
0c002100b826e08011060800
1618002030480053580063680070e305
1f01011d80d0721c1620102c2580ba88
219e00ffBroadcast RGB:  Full
supported: Full Limited 16:2audio:  autosupported: off  
auto on1920x1080@60 (0x42)  148.5MHz +HSync +VSync 
+preferredh: width  1920 start 2008 end 2052 total 2200 skew0 

Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Daniel Vetter
On Tue, Jan 3, 2012 at 19:51, Keith Packard  wrote:
> On Wed, 14 Dec 2011 13:57:03 +0100, Daniel Vetter  
> wrote:
>
>> The problem this patch solves is that the forcewake accounting
>> necessary for register reads is protected by dev->struct_mutex. But the
>> hangcheck and error_capture code need to access registers without
>> grabbing this mutex because we hold it while waiting for the gpu.
>> So a new lock is required. Because currently the error_state capture
>> is called from the error irq handler and the hangcheck code runs from
>> a timer, it needs to be an irqsafe spinlock (note that the registers
>> used by the irq handler (neglecting the error handling part) only uses
>> registers that don't need the forcewake dance).
>
> I think this description is wrong -- the only difference between using
> atomic objects and using a spinlock is that with the spinlock the call
> to ->force_wake_get is correctly serialized so that no register access
> can occur without the chip being awoken. Without a spinlock, a second
> thread can pass right through gen6_gt_force_wake_get and then go touch
> registers while the first thread is busy waking the chip up.

I'm a bit confused by this. With the current code forcewake is
protected by dev->struct_mutex. Which doesn't work out because we need
to access registers that require the forcewake dance from non-process
context.

Afaik the atomic ops stuff is just ducttape for paranoia reasons.
-Daniel
-- 
Daniel Vetter
daniel.vet...@ffwll.ch - +41 (0) 79 365 57 48 - http://blog.ffwll.ch
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx


Re: [Intel-gfx] [PATCH 06/43] drm/i915: protect force_wake_(get|put) with the gt_lock

2012-01-03 Thread Keith Packard
On Wed, 14 Dec 2011 13:57:03 +0100, Daniel Vetter  
wrote:

> The problem this patch solves is that the forcewake accounting
> necessary for register reads is protected by dev->struct_mutex. But the
> hangcheck and error_capture code need to access registers without
> grabbing this mutex because we hold it while waiting for the gpu.
> So a new lock is required. Because currently the error_state capture
> is called from the error irq handler and the hangcheck code runs from
> a timer, it needs to be an irqsafe spinlock (note that the registers
> used by the irq handler (neglecting the error handling part) only uses
> registers that don't need the forcewake dance).

I think this description is wrong -- the only difference between using
atomic objects and using a spinlock is that with the spinlock the call
to ->force_wake_get is correctly serialized so that no register access
can occur without the chip being awoken. Without a spinlock, a second
thread can pass right through gen6_gt_force_wake_get and then go touch
registers while the first thread is busy waking the chip up.

-- 
keith.pack...@intel.com


pgpuWLYZdZV8S.pgp
Description: PGP signature
___
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx