[Nouveau] [PATCH 1/2] Unreference state/buffer objects on context/screen destruction

2009-12-27 Thread Krzysztof Smiechowicz
(resending as git patch)

- unreference state objects so that buffer objects are unreferenced and
eventually destroyed
- free channel at screen's destruction

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index e4cf91c..be7735b 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -239,5 +239,6 @@ nouveau_screen_init(struct nouveau_screen *screen, 
struct nouveau_device *dev)
  void
  nouveau_screen_fini(struct nouveau_screen *screen)
  {
+nouveau_channel_free(screen-channel);
  }

diff --git a/src/gallium/drivers/nv10/nv10_screen.c 
b/src/gallium/drivers/nv10/nv10_screen.c
index ee5901e..8d4d7d7 100644
--- a/src/gallium/drivers/nv10/nv10_screen.c
+++ b/src/gallium/drivers/nv10/nv10_screen.c
@@ -116,6 +116,8 @@ nv10_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(screen-sync);
nouveau_grobj_free(screen-celsius);

+   nouveau_screen_fini(screen-base);
+
FREE(pscreen);
  }

diff --git a/src/gallium/drivers/nv20/nv20_screen.c 
b/src/gallium/drivers/nv20/nv20_screen.c
index 4eeacd1..7187f52 100644
--- a/src/gallium/drivers/nv20/nv20_screen.c
+++ b/src/gallium/drivers/nv20/nv20_screen.c
@@ -116,6 +116,8 @@ nv20_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(screen-sync);
nouveau_grobj_free(screen-kelvin);

+   nouveau_screen_fini(screen-base);
+
FREE(pscreen);
  }

diff --git a/src/gallium/drivers/nv30/nv30_context.c 
b/src/gallium/drivers/nv30/nv30_context.c
index 46a821a..4bf4d81 100644
--- a/src/gallium/drivers/nv30/nv30_context.c
+++ b/src/gallium/drivers/nv30/nv30_context.c
@@ -25,6 +25,12 @@ static void
  nv30_destroy(struct pipe_context *pipe)
  {
struct nv30_context *nv30 = nv30_context(pipe);
+unsigned i;
+
+for (i = 0; i  NV30_STATE_MAX; i++) {
+if (nv30-state.hw[i])
+so_ref(NULL, nv30-state.hw[i]);
+}

if (nv30-draw)
draw_destroy(nv30-draw);
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c 
b/src/gallium/drivers/nv30/nv30_fragprog.c
index 40965a9..bf1c314 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -870,6 +870,12 @@ void
  nv30_fragprog_destroy(struct nv30_context *nv30,
  struct nv30_fragment_program *fp)
  {
+if (fp-buffer)
+pipe_buffer_reference(fp-buffer, NULL);
+
+if (fp-so)
+so_ref(NULL, fp-so);
+
if (fp-insn_len)
FREE(fp-insn);
  }
diff --git a/src/gallium/drivers/nv30/nv30_screen.c 
b/src/gallium/drivers/nv30/nv30_screen.c
index 7cd3690..e17e1ab 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -156,6 +156,12 @@ static void
  nv30_screen_destroy(struct pipe_screen *pscreen)
  {
struct nv30_screen *screen = nv30_screen(pscreen);
+unsigned i;
+
+for (i = 0; i  NV30_STATE_MAX; i++) {
+if (screen-state[i])
+so_ref(NULL, screen-state[i]);
+}

nouveau_resource_free(screen-vp_exec_heap);
nouveau_resource_free(screen-vp_data_heap);
@@ -164,6 +170,8 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
nouveau_notifier_free(screen-sync);
nouveau_grobj_free(screen-rankine);

+   nouveau_screen_fini(screen-base);
+   
FREE(pscreen);
  }

diff --git a/src/gallium/drivers/nv40/nv40_context.c 
b/src/gallium/drivers/nv40/nv40_context.c
index eb9cce4..d668f31 100644
--- a/src/gallium/drivers/nv40/nv40_context.c
+++ b/src/gallium/drivers/nv40/nv40_context.c
@@ -25,6 +25,12 @@ static void
  nv40_destroy(struct pipe_context *pipe)
  {
struct nv40_context *nv40 = nv40_context(pipe);
+unsigned i;
+
+for (i = 0; i  NV40_STATE_MAX; i++) {
+if (nv40-state.hw[i])
+so_ref(NULL, nv40-state.hw[i]);
+}

if (nv40-draw)
draw_destroy(nv40-draw);
diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c 
b/src/gallium/drivers/nv40/nv40_fragprog.c
index 1bf1672..52ba888 100644
--- a/src/gallium/drivers/nv40/nv40_fragprog.c
+++ b/src/gallium/drivers/nv40/nv40_fragprog.c
@@ -948,6 +948,12 @@ void
  nv40_fragprog_destroy(struct nv40_context *nv40,
  struct nv40_fragment_program *fp)
  {
+if (fp-buffer)
+pipe_buffer_reference(fp-buffer, NULL);
+
+if (fp-so)
+so_ref(NULL, fp-so);
+
if (fp-insn_len)
FREE(fp-insn);
  }
diff --git a/src/gallium/drivers/nv40/nv40_screen.c 
b/src/gallium/drivers/nv40/nv40_screen.c
index bd13dfd..bbe7509 100644
--- a/src/gallium/drivers/nv40/nv40_screen.c
+++ b/src/gallium/drivers/nv40/nv40_screen.c
@@ -140,6 +140,12 @@ static void
  nv40_screen_destroy(struct pipe_screen *pscreen)
  {
struct nv40_screen *screen = nv40_screen(pscreen);
+unsigned i;
+
+for (i = 0; i  NV40_STATE_MAX; i++) {
+if (screen-state[i])
+ 

[Nouveau] [PATCH 2/2] libdrm: Unreference pushbuf objects on channel destruction

2009-12-27 Thread Krzysztof Smiechowicz
(resending as git patch)

- unreference pushbuf objects on channel destruction


diff --git a/nouveau/nouveau_channel.c b/nouveau/nouveau_channel.c
index 674c5c3..6f71f89 100644
--- a/nouveau/nouveau_channel.c
+++ b/nouveau/nouveau_channel.c
@@ -111,6 +111,8 @@ nouveau_channel_free(struct nouveau_channel **chan)

FIRE_RING(nvchan-base);

+   nouveau_pushbuf_fini(nvchan-base);
+
nouveau_bo_unmap(nvchan-notifier_bo);
nouveau_bo_ref(NULL, nvchan-notifier_bo);

diff --git a/nouveau/nouveau_private.h b/nouveau/nouveau_private.h
index 784afc9..de21a6b 100644
--- a/nouveau/nouveau_private.h
+++ b/nouveau/nouveau_private.h
@@ -65,6 +65,9 @@ struct nouveau_pushbuf_priv {
  int
  nouveau_pushbuf_init(struct nouveau_channel *);

+void
+nouveau_pushbuf_fini(struct nouveau_channel *);
+
  struct nouveau_channel_priv {
struct nouveau_channel base;

diff --git a/nouveau/nouveau_pushbuf.c b/nouveau/nouveau_pushbuf.c
index b90e923..c4053ed 100644
--- a/nouveau/nouveau_pushbuf.c
+++ b/nouveau/nouveau_pushbuf.c
@@ -170,6 +170,12 @@ nouveau_pushbuf_init(struct nouveau_channel *chan)
return 0;
  }

+void
+nouveau_pushbuf_fini(struct nouveau_channel *chan)
+{
+   nouveau_pushbuf_fini_call(chan);
+}
+
  int
  nouveau_pushbuf_flush(struct nouveau_channel *chan, unsigned min)
  {
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 1/2] drm/nv50: align size of buffer object to the right boundaries.

2009-12-27 Thread Maarten Maathuis
- Depth and stencil buffers are supposed to be large enough in general.

Signed-off-by: Maarten Maathuis madman2...@gmail.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c |9 -
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index e342a41..9fc4bd6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -65,8 +65,9 @@ nouveau_bo_fixup_align(struct drm_device *dev,
 
/*
 * Some of the tile_flags have a periodic structure of N*4096 bytes,
-* align to to that as well as the page size. Overallocate memory to
-* avoid corruption of other buffer objects.
+* align to to that as well as the page size. Align the size to the
+* appropriate boundaries. This does imply that sizes are rounded up
+* 3-7 pages, so make sure your special buffer sizes are large enough.
 */
if (dev_priv-card_type == NV_50) {
uint32_t block_size = nouveau_mem_fb_amount(dev)  15;
@@ -77,22 +78,20 @@ nouveau_bo_fixup_align(struct drm_device *dev,
case 0x2800:
case 0x4800:
case 0x7a00:
-   *size = roundup(*size, block_size);
if (is_power_of_2(block_size)) {
-   *size += 3 * block_size;
for (i = 1; i  10; i++) {
*align = 12 * i * block_size;
if (!(*align % 65536))
break;
}
} else {
-   *size += 6 * block_size;
for (i = 1; i  10; i++) {
*align = 8 * i * block_size;
if (!(*align % 65536))
break;
}
}
+   *size = roundup(*size, *align);
break;
default:
break;
-- 
1.6.6.rc4

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 2/2] drm/nv50: synchronize user channel after buffer object move on kernel channel

2009-12-27 Thread Maarten Maathuis
- This is not yet a generic implementation that will work everywhere, but it's
a start.
- This will fix the corruption surrounding pixmap/texture bo moves on nv50.

Signed-off-by: Maarten Maathuis madman2...@gmail.com
---
 drivers/gpu/drm/nouveau/nouveau_bo.c  |7 ++
 drivers/gpu/drm/nouveau/nouveau_channel.c |9 ++-
 drivers/gpu/drm/nouveau/nouveau_dma.c |   26 +++
 drivers/gpu/drm/nouveau/nouveau_dma.h |   11 ++-
 drivers/gpu/drm/nouveau/nouveau_drv.h |   11 +++
 drivers/gpu/drm/nouveau/nouveau_fence.c   |  103 +
 drivers/gpu/drm/nouveau/nouveau_object.c  |2 +-
 drivers/gpu/drm/nouveau/nv50_graph.c  |   16 +
 8 files changed, 179 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 9fc4bd6..66f83a1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -466,6 +466,13 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
if (ret)
return ret;
 
+   /* Make the user channel wait for the kernel channel to be done. */
+   if (nvbo-channel  chan != nvbo-channel) {
+   ret = nouveau_fence_sync(nvbo-channel, fence);
+   if (ret)
+   return ret;
+   }
+
ret = ttm_bo_move_accel_cleanup(nvbo-bo, fence, NULL,
evict, no_wait, new_mem);
nouveau_fence_unref((void *)fence);
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c 
b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 9aaa972..c1ac34b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -414,7 +414,14 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void 
*data,
init-subchan[0].grclass = 0x0039;
else
init-subchan[0].grclass = 0x5039;
-   init-nr_subchan = 1;
+   if (dev_priv-card_type = NV_50) {
+   init-subchan[1].handle = NvSw;
+   init-subchan[1].grclass = NV50_NVSW;
+   }
+   if (dev_priv-card_type  NV_50)
+   init-nr_subchan = 1;
+   else
+   init-nr_subchan = 2;
 
/* Named memory object area */
ret = drm_gem_handle_create(file_priv, chan-notifier_bo-gem,
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c 
b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 7035536..7e66cc8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -52,6 +52,23 @@ nouveau_dma_init(struct nouveau_channel *chan)
if (ret)
return ret;
 
+   /* Allocate what we need for (simple) cross channel synchronisation. */
+   if (dev_priv-card_type = NV_50) {
+   struct nouveau_gpuobj *nvsw = NULL;
+
+   ret = nouveau_gpuobj_sw_new(chan, NV50_NVSW, nvsw);
+   if (ret)
+   return ret;
+
+   ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
+   if (ret)
+   return ret;
+
+   ret = nouveau_notifier_alloc(chan, NvNotify1, 32, 
chan-sync_ntfy);
+   if (ret)
+   return ret;
+   }
+
/* Map push buffer */
ret = nouveau_bo_map(chan-pushbuf_bo);
if (ret)
@@ -87,6 +104,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
OUT_RING(chan, NvNotify0);
 
+   /* Bind NvSw to channel. */
+   if (dev_priv-card_type = NV_50) {
+   ret = RING_SPACE(chan, 2);
+   if (ret)
+   return ret;
+   BEGIN_RING(chan, NvSubSw, 0, 1);
+   OUT_RING(chan, NvSw);
+   }
+
/* Sit back and pray the channel works.. */
FIRE_RING(chan);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h 
b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 04e85d8..3c74902 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -46,10 +46,11 @@
 /* Hardcoded object assignments to subchannels (subchannel id). */
 enum {
NvSubM2MF   = 0,
-   NvSub2D = 1,
-   NvSubCtxSurf2D  = 1,
-   NvSubGdiRect= 2,
-   NvSubImageBlit  = 3
+   NvSubSw= 1,
+   NvSub2D = 2,
+   NvSubCtxSurf2D  = 2,
+   NvSubGdiRect= 3,
+   NvSubImageBlit  = 4
 };
 
 /* Object handles. */
@@ -67,6 +68,8 @@ enum {
NvClipRect  = 0x800b,
NvGdiRect   = 0x800c,
NvImageBlit = 0x800d,
+   NvSw= 0x800e,
+   NvNotify1   = 0x800f,
 
/* G80+ display objects */
NvEvoVRAM   = 0x0100,
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h 
b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 7da88a9..75b2454 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h

[Nouveau] Lockdep spew with nouveau on 2.6.33-rc2

2009-12-27 Thread Dmitry Torokhov
Hi,

Every time I boot my laptop (Fedora 12) I get the following complint from
lockdep:

[  703.936365] ===
[  703.936369] [ INFO: possible circular locking dependency detected ]
[  703.936372] 2.6.33-rc2 #154
[  703.936374] ---
[  703.936376] X/1365 is trying to acquire lock:
[  703.936379]  (mm-mmap_sem){++}, at: [810f9672] 
might_fault+0x72/0xd0
[  703.936388]
[  703.936389] but task is already holding lock:
[  703.936397]  (dev-struct_mutex){+.+.+.}, at: [a003202d] 
nouveau_gem_ioctl_pushbuf_call+0xfd/0x7d0 [nouveau]
[  703.936412]
[  703.936413] which lock already depends on the new lock.
[  703.936413]
[  703.936415]
[  703.936416] the existing dependency chain (in reverse order) is:
[  703.936418]
[  703.936418] - #1 (dev-struct_mutex){+.+.+.}:
[  703.936423][81081262] check_prev_add+0x242/0x570
[  703.936428][81081be9] validate_chain+0x659/0x750
[  703.936432][81081ff7] __lock_acquire+0x317/0x4b0
[  703.936436][81082229] lock_acquire+0x99/0x140
[  703.936440][8149c9c5] __mutex_lock_common+0x65/0x4a0
[  703.936445][8149cede] mutex_lock_nested+0x3e/0x50
[  703.936448][8130d67a] drm_mmap+0x3a/0x70
[  703.936453][a0032d33] nouveau_ttm_mmap+0x63/0xa0 [nouveau]
[  703.936461][81102954] mmap_region+0x404/0x5a0
[  703.936466][81102e3c] do_mmap_pgoff+0x34c/0x3a0
[  703.936470][810f496f] sys_mmap_pgoff+0x1cf/0x2a0
[  703.936474][810079e9] sys_mmap+0x29/0x30
[  703.936479][81002f6b] system_call_fastpath+0x16/0x1b
[  703.936483]
[  703.936484] - #0 (mm-mmap_sem){++}:
[  703.936488][8108155e] check_prev_add+0x53e/0x570
[  703.936492][81081be9] validate_chain+0x659/0x750
[  703.936496][81081ff7] __lock_acquire+0x317/0x4b0
[  703.936500][81082229] lock_acquire+0x99/0x140
[  703.936503][810f969f] might_fault+0x9f/0xd0
[  703.936507][a0030ebe] validate_list+0x1be/0x290 [nouveau]
[  703.936515][a003171f] 
nouveau_gem_pushbuf_validate+0xbf/0x150 [nouveau]
[  703.936524][a0032051] 
nouveau_gem_ioctl_pushbuf_call+0x121/0x7d0 [nouveau]
[  703.936532][a0032726] 
nouveau_gem_ioctl_pushbuf_call2+0x26/0x30 [nouveau]
[  703.936541][81306b25] drm_ioctl+0x1c5/0x490
[  703.936549][811315c8] vfs_ioctl+0x38/0xe0
[  703.936553][81131c65] do_vfs_ioctl+0x75/0x370
[  703.936557][81131fe1] sys_ioctl+0x81/0xa0
[  703.936560][81002f6b] system_call_fastpath+0x16/0x1b
[  703.936564]
[  703.936565] other info that might help us debug this:
[  703.936565]
[  703.936568] 1 lock held by X/1365:
[  703.936569]  #0:  (dev-struct_mutex){+.+.+.}, at: [a003202d] 
nouveau_gem_ioctl_pushbuf_call+0xfd/0x7d0 [nouveau]
[  703.936581]
[  703.936581] stack backtrace:
[  703.936584] Pid: 1365, comm: X Not tainted 2.6.33-rc2 #154
[  703.936587] Call Trace:
[  703.936591]  [8107f8c9] print_circular_bug+0xe9/0xf0
[  703.936595]  [8108155e] check_prev_add+0x53e/0x570
[  703.936599]  [81081be9] validate_chain+0x659/0x750
[  703.936603]  [81081ff7] __lock_acquire+0x317/0x4b0
[  703.936607]  [81082229] lock_acquire+0x99/0x140
[  703.936610]  [810f9672] ? might_fault+0x72/0xd0
[  703.936613]  [810f969f] might_fault+0x9f/0xd0
[  703.936617]  [810f9672] ? might_fault+0x72/0xd0
[  703.936624]  [a001253f] ? ttm_bo_validate+0x6f/0xf0 [ttm]
[  703.936632]  [a0030ebe] validate_list+0x1be/0x290 [nouveau]
[  703.936641]  [a003171f] nouveau_gem_pushbuf_validate+0xbf/0x150 
[nouveau]
[  703.936650]  [a0032051] nouveau_gem_ioctl_pushbuf_call+0x121/0x7d0 
[nouveau]
[  703.936653]  [810f9672] ? might_fault+0x72/0xd0
[  703.936662]  [a0032726] nouveau_gem_ioctl_pushbuf_call2+0x26/0x30 
[nouveau]
[  703.93]  [81306b25] drm_ioctl+0x1c5/0x490
[  703.936670]  [81214dbd] ? inode_has_perm+0x3d/0x70
[  703.936679]  [a0032700] ? nouveau_gem_ioctl_pushbuf_call2+0x0/0x30 
[nouveau]
[  703.936684]  [81214f41] ? file_has_perm+0xb1/0xc0
[  703.936687]  [811315c8] vfs_ioctl+0x38/0xe0
[  703.936690]  [81131c65] do_vfs_ioctl+0x75/0x370
[  703.936693]  [81131fe1] sys_ioctl+0x81/0xa0
[  703.936697]  [81002f6b] system_call_fastpath+0x16/0x1b

Thanks.

-- 
Dmitry
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] drm/nv50: align size of buffer object to the right boundaries.

2009-12-27 Thread Christoph Bumiller
On 12/27/2009 12:41 PM, Maarten Maathuis wrote:
 - Depth and stencil buffers are supposed to be large enough in general.
 
 Signed-off-by: Maarten Maathuis madman2...@gmail.com
 ---
  drivers/gpu/drm/nouveau/nouveau_bo.c |9 -
  1 files changed, 4 insertions(+), 5 deletions(-)
 
 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
 b/drivers/gpu/drm/nouveau/nouveau_bo.c
 index e342a41..9fc4bd6 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
 @@ -65,8 +65,9 @@ nouveau_bo_fixup_align(struct drm_device *dev,
  
   /*
* Some of the tile_flags have a periodic structure of N*4096 bytes,
 -  * align to to that as well as the page size. Overallocate memory to
 -  * avoid corruption of other buffer objects.
 +  * align to to that as well as the page size. Align the size to the
 +  * appropriate boundaries. This does imply that sizes are rounded up
 +  * 3-7 pages, so make sure your special buffer sizes are large enough.
*/
No - 16x16 depth textures or whatever crazy idea some app might have
won't be large enough.
Taking care of size in userspace and of alignment in kernel ... not nice
to split in my opinion.
   if (dev_priv-card_type == NV_50) {
   uint32_t block_size = nouveau_mem_fb_amount(dev)  15;
 @@ -77,22 +78,20 @@ nouveau_bo_fixup_align(struct drm_device *dev,
   case 0x2800:
   case 0x4800:
   case 0x7a00:
 - *size = roundup(*size, block_size);
   if (is_power_of_2(block_size)) {
 - *size += 3 * block_size;
   for (i = 1; i  10; i++) {
   *align = 12 * i * block_size;
   if (!(*align % 65536))
   break;
   }
   } else {
 - *size += 6 * block_size;
   for (i = 1; i  10; i++) {
   *align = 8 * i * block_size;
   if (!(*align % 65536))
   break;
   }
   }
 + *size = roundup(*size, *align);
   break;
   default:
   break;

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] drm/nv50: align size of buffer object to the right boundaries.

2009-12-27 Thread Maarten Maathuis
On Sun, Dec 27, 2009 at 2:43 PM, Christoph Bumiller
e0425...@student.tuwien.ac.at wrote:
 On 12/27/2009 12:41 PM, Maarten Maathuis wrote:
 - Depth and stencil buffers are supposed to be large enough in general.

 Signed-off-by: Maarten Maathuis madman2...@gmail.com
 ---
  drivers/gpu/drm/nouveau/nouveau_bo.c |    9 -
  1 files changed, 4 insertions(+), 5 deletions(-)

 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
 b/drivers/gpu/drm/nouveau/nouveau_bo.c
 index e342a41..9fc4bd6 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
 @@ -65,8 +65,9 @@ nouveau_bo_fixup_align(struct drm_device *dev,

       /*
        * Some of the tile_flags have a periodic structure of N*4096 bytes,
 -      * align to to that as well as the page size. Overallocate memory to
 -      * avoid corruption of other buffer objects.
 +      * align to to that as well as the page size. Align the size to the
 +      * appropriate boundaries. This does imply that sizes are rounded up
 +      * 3-7 pages, so make sure your special buffer sizes are large 
 enough.
        */
 No - 16x16 depth textures or whatever crazy idea some app might have
 won't be large enough.
 Taking care of size in userspace and of alignment in kernel ... not nice
 to split in my opinion.

Userspace doesn't know all constraints, so the kernel has to do
something. How to deal with these limitations is another question.
Either we over allocate (and hide the overallocation from everyone),
align the size, or we try to make the allocator smarter to group them
together based on their tile flags.

       if (dev_priv-card_type == NV_50) {
               uint32_t block_size = nouveau_mem_fb_amount(dev)  15;
 @@ -77,22 +78,20 @@ nouveau_bo_fixup_align(struct drm_device *dev,
               case 0x2800:
               case 0x4800:
               case 0x7a00:
 -                     *size = roundup(*size, block_size);
                       if (is_power_of_2(block_size)) {
 -                             *size += 3 * block_size;
                               for (i = 1; i  10; i++) {
                                       *align = 12 * i * block_size;
                                       if (!(*align % 65536))
                                               break;
                               }
                       } else {
 -                             *size += 6 * block_size;
                               for (i = 1; i  10; i++) {
                                       *align = 8 * i * block_size;
                                       if (!(*align % 65536))
                                               break;
                               }
                       }
 +                     *size = roundup(*size, *align);
                       break;
               default:
                       break;


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] [PATCH 1/2] drm/nv50: align size of buffer object to the right boundaries.

2009-12-27 Thread Maarten Maathuis
To avoid any confusion, make sure your buffers are large enough
means you shouldn't go allocating very small depth/stencil buffers,
because that will waste space. Userspace should never have to worry
about the restrictions themselves.

Maarten.

On Sun, Dec 27, 2009 at 3:28 PM, Maarten Maathuis madman2...@gmail.com wrote:
 On Sun, Dec 27, 2009 at 2:43 PM, Christoph Bumiller
 e0425...@student.tuwien.ac.at wrote:
 On 12/27/2009 12:41 PM, Maarten Maathuis wrote:
 - Depth and stencil buffers are supposed to be large enough in general.

 Signed-off-by: Maarten Maathuis madman2...@gmail.com
 ---
  drivers/gpu/drm/nouveau/nouveau_bo.c |    9 -
  1 files changed, 4 insertions(+), 5 deletions(-)

 diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c 
 b/drivers/gpu/drm/nouveau/nouveau_bo.c
 index e342a41..9fc4bd6 100644
 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c
 +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
 @@ -65,8 +65,9 @@ nouveau_bo_fixup_align(struct drm_device *dev,

       /*
        * Some of the tile_flags have a periodic structure of N*4096 bytes,
 -      * align to to that as well as the page size. Overallocate memory to
 -      * avoid corruption of other buffer objects.
 +      * align to to that as well as the page size. Align the size to the
 +      * appropriate boundaries. This does imply that sizes are rounded up
 +      * 3-7 pages, so make sure your special buffer sizes are large 
 enough.
        */
 No - 16x16 depth textures or whatever crazy idea some app might have
 won't be large enough.
 Taking care of size in userspace and of alignment in kernel ... not nice
 to split in my opinion.

 Userspace doesn't know all constraints, so the kernel has to do
 something. How to deal with these limitations is another question.
 Either we over allocate (and hide the overallocation from everyone),
 align the size, or we try to make the allocator smarter to group them
 together based on their tile flags.

       if (dev_priv-card_type == NV_50) {
               uint32_t block_size = nouveau_mem_fb_amount(dev)  15;
 @@ -77,22 +78,20 @@ nouveau_bo_fixup_align(struct drm_device *dev,
               case 0x2800:
               case 0x4800:
               case 0x7a00:
 -                     *size = roundup(*size, block_size);
                       if (is_power_of_2(block_size)) {
 -                             *size += 3 * block_size;
                               for (i = 1; i  10; i++) {
                                       *align = 12 * i * block_size;
                                       if (!(*align % 65536))
                                               break;
                               }
                       } else {
 -                             *size += 6 * block_size;
                               for (i = 1; i  10; i++) {
                                       *align = 8 * i * block_size;
                                       if (!(*align % 65536))
                                               break;
                               }
                       }
 +                     *size = roundup(*size, *align);
                       break;
               default:
                       break;



___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [Bug 25806] New: NV40 vertex corruption (kernel BO deletion too early?)

2009-12-27 Thread bugzilla-daemon
http://bugs.freedesktop.org/show_bug.cgi?id=25806

   Summary: NV40 vertex corruption (kernel BO deletion too early?)
   Product: Mesa
   Version: git
  Platform: Other
OS/Version: All
Status: NEW
  Severity: normal
  Priority: medium
 Component: Drivers/DRI/nouveau
AssignedTo: nouveau@lists.freedesktop.org
ReportedBy: luca.barbi...@gmail.com


On my G71 system, several programs show vertex corruption issues. In
particular, vertices tend to be corrupted or randomly go to infinity, leading
to spiked triangles or random polygons, in several programs, such as
demos/engine, demos/dinoshade, Blender, Extreme Tux Racer.

The system is running:
Linux 2.6.33-rc2
libdrm 2.4.17
Mesa HEAD (b46bcd8e7b37aa2e9159e126c1cc88234a3c2790)
Detected an NV40 generation card (0x049800a2)
64 MB GART aperture
256 MB VRAM

The problem is solved by either of the following:
1. #define FORCE_SWTNL 1
2. Adding usleep(1) at the end of nv40_draw_arrays
3. Making nouveau_screen_bo_del do nothing

It seems that the issue is that Mesa deletes a buffer object used for vertex
data while the GPU is still drawing to it. The kernel actually performs the
deletion without waiting for the GPU drawing, the memory (or GART mapping) is
reused, and corruption ensues.

From Gallium tracing, Mesa is sending vertex data in 64 KB buffers, which are
created, written, drawn and then recreated upon reuse (which seems correct
behavior).

It seems, in other words, that the kernel is not keeping an extra reference to
buffers which are currently referenced by an in-flight pushbuffer, and
unreferencing them only once the GPU finished drawing.

Is the kernel already supposed to do so?
If yes, something is broken. If things work for others, maybe my system is
somehow more prone to reusing memory or GART mappings, so they don't see that?

If no, then how are things supposed to work?

(BTW, not freeing buffers leads to X freezing and the kernel oopsing on my
machine upon saturating memory, but that's another issue)


-- 
Configure bugmail: http://bugs.freedesktop.org/userprefs.cgi?tab=email
--- You are receiving this mail because: ---
You are the assignee for the bug.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH] drm/nouveau: create function for dealing with gpu lockup

2009-12-27 Thread Marcin Slusarz
It's mostly a cleanup, but in nv50_fbcon_accel_init gpu lockup
message was printed, but HWACCEL_DISBALED flag was not set.

Signed-off-by: Marcin Slusarz marcin.slus...@gmail.com
---
 drivers/gpu/drm/nouveau/nouveau_fbcon.c |   15 +++
 drivers/gpu/drm/nouveau/nouveau_fbcon.h |2 ++
 drivers/gpu/drm/nouveau/nv04_fbcon.c|   15 +--
 drivers/gpu/drm/nouveau/nv50_fbcon.c|   17 +
 4 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c 
b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 84af25c..6438935 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -64,8 +64,7 @@ nouveau_fbcon_sync(struct fb_info *info)
return 0;
 
if (RING_SPACE(chan, 4)) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
return 0;
}
 
@@ -86,8 +85,7 @@ nouveau_fbcon_sync(struct fb_info *info)
}
 
if (ret) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
return 0;
}
 
@@ -380,3 +378,12 @@ nouveau_fbcon_remove(struct drm_device *dev, struct 
drm_framebuffer *fb)
 
return 0;
 }
+
+void nouveau_gpu_lockup(struct fb_info *info, const char *file, int line)
+{
+   struct nouveau_fbcon_par *par = info-par;
+   struct drm_device *dev = par-dev;
+
+   NV_ERROR(dev, GPU lockup - switching to software fbcon (%s:%d)\n, 
file, line);
+   info-flags |= FBINFO_HWACCEL_DISABLED;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h 
b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 8531140..52c87fb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -43,5 +43,7 @@ void nouveau_fbcon_zfill(struct drm_device *dev);
 int nv04_fbcon_accel_init(struct fb_info *info);
 int nv50_fbcon_accel_init(struct fb_info *info);
 
+#define NV_GPU_LOCKUP(info) nouveau_gpu_lockup(info, __FILE__, __LINE__)
+void nouveau_gpu_lockup(struct fb_info *info, const char *file, int line);
 #endif /* __NV50_FBCON_H__ */
 
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c 
b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 09a3107..0ae7069 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -39,8 +39,7 @@ nv04_fbcon_copyarea(struct fb_info *info, const struct 
fb_copyarea *region)
return;
 
if (!(info-flags  FBINFO_HWACCEL_DISABLED)  RING_SPACE(chan, 4)) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
}
 
if (info-flags  FBINFO_HWACCEL_DISABLED) {
@@ -68,8 +67,7 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct 
fb_fillrect *rect)
return;
 
if (!(info-flags  FBINFO_HWACCEL_DISABLED)  RING_SPACE(chan, 7)) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
}
 
if (info-flags  FBINFO_HWACCEL_DISABLED) {
@@ -109,8 +107,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct 
fb_image *image)
}
 
if (!(info-flags  FBINFO_HWACCEL_DISABLED)  RING_SPACE(chan, 8)) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
}
 
if (info-flags  FBINFO_HWACCEL_DISABLED) {
@@ -144,8 +141,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct 
fb_image *image)
int iter_len = dsize  128 ? 128 : dsize;
 
if (RING_SPACE(chan, iter_len + 1)) {
-   NV_ERROR(dev, GPU lockup - switching to software 
fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
cfb_imageblit(info, image);
return;
}
@@ -242,8 +238,7 @@ nv04_fbcon_accel_init(struct fb_info *info)
return ret;
 
if (RING_SPACE(chan, 49)) {
-   NV_ERROR(dev, GPU lockup - switching to software fbcon\n);
-   info-flags |= FBINFO_HWACCEL_DISABLED;
+   NV_GPU_LOCKUP(info);
return 0;
}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c 
b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index c966ef8..4aff98b 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -17,9 +17,7 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct 
fb_fillrect *rect)
 
if (!(info-flags  FBINFO_HWACCEL_DISABLED) 
 

[Nouveau] [PATCH] drm/nouveau: printk something when nStatus == 0

2009-12-27 Thread Marcin Slusarz
when nStatus == 0, nouveau_graph_dump_trap_info printks
something like this:
PGRAPH_ERROR - nSource: PROTECTION_ERROR DMA_W_PROTECTION, nStatus:\n
without any information about status - printk additional 0

Signed-off-by: Marcin Slusarz marcin.slus...@gmail.com
---
 drivers/gpu/drm/nouveau/nouveau_irq.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c 
b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 370c72c..57ee73a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -284,6 +284,10 @@ nouveau_print_bitfield_names_(uint32_t value,
 * Also the caller is responsible for adding the newline.
 */
int i;
+   if (!value) {
+   printk( 0);
+   return;
+   }
for (i = 0; i  namelist_len; ++i) {
uint32_t mask = namelist[i].mask;
if (value  mask) {
-- 
1.6.6.rc3

___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [Bug 25806] NV40 vertex corruption (kernel BO deletion too early?)

2009-12-27 Thread bugzilla-daemon
http://bugs.freedesktop.org/show_bug.cgi?id=25806





--- Comment #1 from Luca Barbieri luca.barbi...@gmail.com  2009-12-27 
14:13:14 PST ---
Upon further examination, the kernel does seem to have the required logic:
sending a pushbuffer creates a fence, which is put in bo-sync_obj, which is
then checked on deletion and if non-null, the buffer is put on a delayed
destroy list.

However, it seems to be somehow not working.

Maybe fencing is broken on my card? (i.e. the kernel thinks fences are signaled
when they aren't)
Or possibly fences are being signaled before the vertex shader is finished
running?

How can I test that fencing is working correctly?


-- 
Configure bugmail: http://bugs.freedesktop.org/userprefs.cgi?tab=email
--- You are receiving this mail because: ---
You are the assignee for the bug.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [Bug 25806] NV40 vertex corruption (kernel BO deletion too early?)

2009-12-27 Thread bugzilla-daemon
http://bugs.freedesktop.org/show_bug.cgi?id=25806


Francisco Jerez curroje...@riseup.net changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution||INVALID




--- Comment #2 from Francisco Jerez curroje...@riseup.net  2009-12-27 
14:25:30 PST ---
(In reply to comment #1)
 Upon further examination, the kernel does seem to have the required logic:
 sending a pushbuffer creates a fence, which is put in bo-sync_obj, which is
 then checked on deletion and if non-null, the buffer is put on a delayed
 destroy list.
 
 However, it seems to be somehow not working.
 
 Maybe fencing is broken on my card? (i.e. the kernel thinks fences are 
 signaled
 when they aren't)
 Or possibly fences are being signaled before the vertex shader is finished
 running?

That would be almost unprecedented... it's more likely that some caches in the
GPU aren't being flushed often enough (or maybe the ones in the CPU... a bug in
the kernel PAT code also used to cause the same symptoms, but that's hopefully
already fixed).

I'm marking this as invalid because that's the current policy, unfortunately
we're already aware of too many gallium bugs.

 
 How can I test that fencing is working correctly?
 


-- 
Configure bugmail: http://bugs.freedesktop.org/userprefs.cgi?tab=email
--- You are receiving this mail because: ---
You are the assignee for the bug.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] [PATCH 3/5] drivers/gpu: Correct NULL test

2009-12-27 Thread Julia Lawall
From: Julia Lawall ju...@diku.dk

Test the just-allocated value for NULL rather than some other value.

The semantic patch that makes this change is as follows:
(http://coccinelle.lip6.fr/)

// smpl
@@
expression x,y;
statement S;
@@

x = \(kmalloc\|kcalloc\|kzalloc\)(...);
(
if ((x) == NULL) S
|
if (
-   y
+   x
   == NULL)
 S
)
// /smpl

Signed-off-by: Julia Lawall ju...@diku.dk

---
 drivers/gpu/drm/nouveau/nouveau_grctx.c|4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.c 
b/drivers/gpu/drm/nouveau/nouveau_grctx.c
index 419f4c2..c7ebec6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_grctx.c
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.c
@@ -97,8 +97,8 @@ nouveau_grctx_prog_load(struct drm_device *dev)
}
 
pgraph-ctxvals = kmalloc(fw-size, GFP_KERNEL);
-   if (!pgraph-ctxprog) {
-   NV_ERROR(dev, OOM copying ctxprog\n);
+   if (!pgraph-ctxvals) {
+   NV_ERROR(dev, OOM copying ctxvals\n);
release_firmware(fw);
nouveau_grctx_fini(dev);
return -ENOMEM;
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


[Nouveau] Synchronization mostly missing?

2009-12-27 Thread Luca Barbieri
It seems that Noveau is assuming that once the FIFO pointer is past a
command, that command has finished executing, and all the buffers it
used are no longer needed.

However, this seems to be false at least on G71.
In particular, the card may not have even finished reading the input
vertex buffers when the pushbuffer fence triggers.
While Mesa does not reuse the buffer object itself, the current
allocator tends to return memory that has just been freed, resulting
in the buffer actually been reused.
Thus Mesa will overwrite the vertices before the GPU has used them.

This results in all kinds of artifacts, such as vertices going to
infinity, and random polygons appearing.
This can be seen in progs/demos/engine, progs/demos/dinoshade,
Blender, Extreme Tux Racer and probably any non-trivial OpenGL
software.

The problem can be significantly reduced by just adding a waiting loop
at the end of draw_arrays and draw_elements, or by synchronizing
drawing by adding and calling the following function instead of
pipe-flush in nv40_vbo.c:
I think the remaining artifacts may be due to missing 2D engine
synchronization, but I'm not sure how that works.
Note that this causes the CPU to wait for rendering, which is not the
correct solution

static void nv40_sync(struct nv40_context *nv40)
{
nouveau_notifier_reset(nv40-screen-sync, 0);

//  BEGIN_RING(curie, 0x1d6c, 1);
//  OUT_RING(0x5c0);

//  static int value = 0x23;
//  BEGIN_RING(curie, 0x1d70, 1);
//  OUT_RING(value++);

BEGIN_RING(curie, NV40TCL_NOTIFY, 1);
OUT_RING(0);

BEGIN_RING(curie, NV40TCL_NOP, 1);
OUT_RING(0);

FIRE_RING(NULL);

nouveau_notifier_wait_status(nv40-screen-sync, 0, 0, 0);
}

It seems that NV40TCL_NOTIFY (which must be followed by a nop for some
reason) triggers a notification of rendering completion.
Furthermore, the card will probably put the value set with 0x1d70
somewhere, where 0x1d6c has an unknown use
The 1d70/1d6c is frequently used by the nVidia driver, with 0x1d70
being a sequence number, while 0x1d6c is always set to 0x5c0, while
NV40TCL_NOTIFY seems to be inserted on demand.
On my machine, setting 0x1d6c/0x1d70 like the nVidia driver does
causes a GPU lockup. That is probably because the location where the
GPU is supposed to put the value has not been setup correctly.

So it seems that the current model is wrong, and the current fence
should only be used to determine whether the pushbuffer itself can be
reused.
It seems that, after figuring out where the GPU writes the value and
how to use the mechanism properly, this should be used by the kernel
driver as the bo-sync_obj implementation.
This will delay destruction of the buffers, and thus prevent
reallocation of them, and artifacts, without synchronizing rendering.

I'm not sure why this hasn't been noticed before though.
Is everyone getting randomly misrendered OpenGL or is my machine
somehow more prone to reusing buffers?

What do you think? Is the analysis correct?
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Synchronization mostly missing?

2009-12-27 Thread Francisco Jerez
Hi,

Luca Barbieri l...@luca-barbieri.com writes:

 It seems that Noveau is assuming that once the FIFO pointer is past a
 command, that command has finished executing, and all the buffers it
 used are no longer needed.

 However, this seems to be false at least on G71.
 In particular, the card may not have even finished reading the input
 vertex buffers when the pushbuffer fence triggers.
 While Mesa does not reuse the buffer object itself, the current
 allocator tends to return memory that has just been freed, resulting
 in the buffer actually been reused.
 Thus Mesa will overwrite the vertices before the GPU has used them.

 This results in all kinds of artifacts, such as vertices going to
 infinity, and random polygons appearing.
 This can be seen in progs/demos/engine, progs/demos/dinoshade,
 Blender, Extreme Tux Racer and probably any non-trivial OpenGL
 software.


Can you reproduce this with your vertex buffers in VRAM instead of GART?
(to rule out that it's a fencing issue).

 The problem can be significantly reduced by just adding a waiting loop
 at the end of draw_arrays and draw_elements, or by synchronizing
 drawing by adding and calling the following function instead of
 pipe-flush in nv40_vbo.c:
 I think the remaining artifacts may be due to missing 2D engine
 synchronization, but I'm not sure how that works.
 Note that this causes the CPU to wait for rendering, which is not the
 correct solution

 static void nv40_sync(struct nv40_context *nv40)
 {
   nouveau_notifier_reset(nv40-screen-sync, 0);

 //BEGIN_RING(curie, 0x1d6c, 1);
 //OUT_RING(0x5c0);

 //static int value = 0x23;
 //BEGIN_RING(curie, 0x1d70, 1);
 //OUT_RING(value++);

   BEGIN_RING(curie, NV40TCL_NOTIFY, 1);
   OUT_RING(0);
   
   BEGIN_RING(curie, NV40TCL_NOP, 1);
   OUT_RING(0);
   
   FIRE_RING(NULL);

   nouveau_notifier_wait_status(nv40-screen-sync, 0, 0, 0);
 }

 It seems that NV40TCL_NOTIFY (which must be followed by a nop for some
 reason) triggers a notification of rendering completion.
 Furthermore, the card will probably put the value set with 0x1d70
 somewhere, where 0x1d6c has an unknown use
 The 1d70/1d6c is frequently used by the nVidia driver, with 0x1d70
 being a sequence number, while 0x1d6c is always set to 0x5c0, while
 NV40TCL_NOTIFY seems to be inserted on demand.
 On my machine, setting 0x1d6c/0x1d70 like the nVidia driver does
 causes a GPU lockup. That is probably because the location where the
 GPU is supposed to put the value has not been setup correctly.

 So it seems that the current model is wrong, and the current fence
 should only be used to determine whether the pushbuffer itself can be
 reused.
 It seems that, after figuring out where the GPU writes the value and
 how to use the mechanism properly, this should be used by the kernel
 driver as the bo-sync_obj implementation.
 This will delay destruction of the buffers, and thus prevent
 reallocation of them, and artifacts, without synchronizing rendering.

 I'm not sure why this hasn't been noticed before though.
 Is everyone getting randomly misrendered OpenGL or is my machine
 somehow more prone to reusing buffers?

 What do you think? Is the analysis correct?
 ___
 Nouveau mailing list
 Nouveau@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/nouveau


pgpRwKfHGCfxK.pgp
Description: PGP signature
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Synchronization mostly missing?

2009-12-27 Thread Luca Barbieri
I figured out the registers.

There is a fence/sync mechanism which apparently triggers after
rendering is finished.
There are two ways to use it, but they trigger at the same time
(spinning in a loop on the CPU checking them, they trigger at the same
iteration or in two successive iterations).

The first is the sync notifier, which involves a notifier object set
at NV40TCL_DMA_NOTIFY.
When NV40TCL_NOTIFY, with argument 0, followed by NV40TCL_NOP, with
argument 0 is inserted in the ring, the notifier object will be
notified when rendering is finished.
fbcon uses this to sync rendering.
Currently the Mesa driver sets an object but does not use it.
The renouveau traces use this mechanism only in the
EXT_framebuffer_object tests.
It's not clear what the purpose of the NOP is, but it seems necessary.

The second is the fence mechanism, which involves an object set at
NV40TCL_DMA_FENCE.
When register 0x1d70 is set, the value set there will be written to
the object at the offset programmed in 0x1d6c.
The offset in 0x1d6c must be 16-byte aligned, but the GPU seems to
only write 4 bytes with the sequence number.
Nouveau does not use this currently, and sets NV40TCL_DMA_FENCE to 0.
The nVidia driver uses this often. It allocates a 4KB object and asks
the GPU to put the sequence number always at offset 0x5c0. Why it does
this rather than allocating a 16 byte object and using offset 0 is
unknown.

IMHO the fence mechanism should be implemented in the kernel along
with the current FIFO fencing, and should protect the relocated buffer
object.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Synchronization mostly missing?

2009-12-27 Thread Luca Barbieri
 Can you reproduce this with your vertex buffers in VRAM instead of GART?
 (to rule out that it's a fencing issue).

Putting the vertex buffers in VRAM makes things almost perfect, but
still with rare artifacts.
In particular, the yellow arrow in dinoshade sometimes becames a
yellow polygon on the floor, which happens almost every frame if I
move the window around.
It does fix demos/engine, blender and etracer is almost perfect.

Using my sync patch fixes demos/engine and demos/dinoshade, but still
leaves artifacts in blender when moving the rectangle and artifacts in
etracer.

Putting the vertex buffers in VRAM _AND_ adding my sync patch makes
things perfect on my system.

Using sync + a delay loop before drawing makes things better but still
problematic.

Also note that both adding wbinvd in the kernel at the start of push
buffer submission, running with nopat and synchronizing with the
current fence in the kernel had no effect on demos/engine artifacts.

Preventing loading of intel_agp did not seem to have any effect either
(but strangely, it still listed the aperture size, not sure what's up
there).

The last test I tried was, all together:
1. My nv40_sync patch
2. 3 wbinvd followed by spinning 1 times in the kernel at the
start of pushbuffer validation
3. Adding
BEGIN_RING(curie, NV40TCL_VTX_CACHE_INVALIDATE, 1);
OUT_RING(0);
before and after draw_elements and draw_arrays
4. Removing intel_agp

The logo on etracer's splash screen still, on some frames, flickered.
Only putting vertex buffers in VRAM fixed that.

I'm not really sure what is happening there.

It seems that there is the lack of synchronization plus some other problem.

Maybe there is indeed an on-GPU cache for AGP/PCI memory which isn't
getting flushed.
Maybe NV40TCL_VTX_CACHE_INVALIDATE should be used but not in the way I did.
I couldn't find it in renouveau traces, who did reverse engineer that?
What does that do?

Also, what happens when I remove intel_agp? Does it use PCI DMA?

BTW, it seems to me that adding the fencing mechanism I described is
necessary even if the vertices are read before the FIFO continues,
since rendering is not completed and currently I don't see anything
preventing TTM from, for instance, evicting the render buffer while it
is being rendered to.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Synchronization mostly missing?

2009-12-27 Thread Younes Manton
On Mon, Dec 28, 2009 at 1:55 AM, Luca Barbieri l...@luca-barbieri.com wrote:
 Can you reproduce this with your vertex buffers in VRAM instead of GART?
 (to rule out that it's a fencing issue).

 Putting the vertex buffers in VRAM makes things almost perfect, but
 still with rare artifacts.
 In particular, the yellow arrow in dinoshade sometimes becames a
 yellow polygon on the floor, which happens almost every frame if I
 move the window around.
 It does fix demos/engine, blender and etracer is almost perfect.

 Using my sync patch fixes demos/engine and demos/dinoshade, but still
 leaves artifacts in blender when moving the rectangle and artifacts in
 etracer.

 Putting the vertex buffers in VRAM _AND_ adding my sync patch makes
 things perfect on my system.

 Using sync + a delay loop before drawing makes things better but still
 problematic.

 Also note that both adding wbinvd in the kernel at the start of push
 buffer submission, running with nopat and synchronizing with the
 current fence in the kernel had no effect on demos/engine artifacts.

 Preventing loading of intel_agp did not seem to have any effect either
 (but strangely, it still listed the aperture size, not sure what's up
 there).

 The last test I tried was, all together:
 1. My nv40_sync patch
 2. 3 wbinvd followed by spinning 1 times in the kernel at the
 start of pushbuffer validation
 3. Adding
 BEGIN_RING(curie, NV40TCL_VTX_CACHE_INVALIDATE, 1);
 OUT_RING(0);
 before and after draw_elements and draw_arrays
 4. Removing intel_agp

 The logo on etracer's splash screen still, on some frames, flickered.
 Only putting vertex buffers in VRAM fixed that.

 I'm not really sure what is happening there.

 It seems that there is the lack of synchronization plus some other problem.

 Maybe there is indeed an on-GPU cache for AGP/PCI memory which isn't
 getting flushed.
 Maybe NV40TCL_VTX_CACHE_INVALIDATE should be used but not in the way I did.
 I couldn't find it in renouveau traces, who did reverse engineer that?
 What does that do?

 Also, what happens when I remove intel_agp? Does it use PCI DMA?

 BTW, it seems to me that adding the fencing mechanism I described is
 necessary even if the vertices are read before the FIFO continues,
 since rendering is not completed and currently I don't see anything
 preventing TTM from, for instance, evicting the render buffer while it
 is being rendered to.

It's my understanding that once the FIFO get reg is past a certain
point all previous commands are guaranteed to be finished, which is
what our fencing is based on. I think we would all have corruption
issues if this wasn't the case. You can see that the FIFO get ptr
stops advancing after long running draw commands are submitted, and
the video decoder FIFO works similarly as well when the HW is lagging.

Anyhow, another person with a GF7 had the same problem and putting
vertex buffers in VRAM also improved things for him, so it could be a
hardware bug/quirk for some/all GF7s. We don't do it in general
because it's slower, but as a temporary workaround we can do that for
GF7 NV40s I guess. It likely also doesn't happen with immediate mode
vertex submission, which will be implemented sooner or later. I can't
reproduce it on my GF6 and I don't think anyone else has either.
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau


Re: [Nouveau] Synchronization mostly missing?

2009-12-27 Thread Krzysztof Smiechowicz
Luca Barbieri pisze:

 I'm not sure why this hasn't been noticed before though.
 Is everyone getting randomly misrendered OpenGL or is my machine
 somehow more prone to reusing buffers?

I reported a similar problem about 2 weeks ago. It first became apparent 
with NV40 but I also confirmed it with NV30 - in both cases it was 
visible in morph3d demo. As long as nothing changes in memory 
allocation, everything is fine. If I even move a window(which causes 
some allocations in the system) vertexes become damaged.


Some information from that previous emails:



I see this problem on morph3d demo. What it does is: for each frame 
create a call list and then call it 4 times.

ADDRVRAM OFFSET
AX
BY
CX

A,B,C is the memory offset of 32kb buffer created for vertex buffer when 
call lists are compiled. X,Y is the VRAM OFFSET (bo.mem.mm_node.start)

First buffer is created (X,A). When it gets full (after around 3 frames) 
second buffer is created (Y,B). Then first one is freed. When second 
buffer is full, third is created (X,C) - here the problem start:
according to my observations, the card seems to read vertexes not from 
address C but from address A as if it somehow remembered the initial 
address binding.

Other observations:
- the data during execution of gl commands actually seems to be put into 
location C - when I switch to software path, I could track down that it 
reads data from location C - rendering is done correctly in software path
- when I comment out freeing of memory manager node (bo.mem.mm_node), so 
that the third buffer is Z,C (paired with not yet used offset of VRAM) 
then hardware rendering behaves correctly - but this will make card run 
out of memory as no memory manager nodes will be deallocated
- when I switch the calls of glCallList into actual rendering code and 
disable invocation of glNewList/glEndList the hardware rendering also 
behaves correctly


Best regards,
Krzysztof
___
Nouveau mailing list
Nouveau@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/nouveau