[Mesa-dev] [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/r300/r300_query.c |  2 +-
 src/gallium/drivers/r300/r300_render.c|  2 +-
 src/gallium/drivers/r300/r300_screen_buffer.c |  4 ++--
 src/gallium/drivers/r300/r300_texture.c   |  2 +-
 src/gallium/drivers/radeon/r600_buffer_common.c   |  9 ++--
 src/gallium/drivers/radeon/r600_texture.c |  2 ++
 src/gallium/drivers/radeon/radeon_uvd.c   |  8 +---
 src/gallium/drivers/radeon/radeon_vce.c   |  8 
 src/gallium/drivers/radeon/radeon_video.c | 11 ++
 src/gallium/drivers/radeon/radeon_video.h |  4 +++-
 src/gallium/drivers/radeonsi/si_state.c   |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 25 +++
 src/gallium/winsys/radeon/drm/radeon_drm_bo.h |  1 +
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 12 +++
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |  2 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h |  7 ++-
 17 files changed, 77 insertions(+), 26 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_query.c 
b/src/gallium/drivers/r300/r300_query.c
index 5305ebd..1679433 100644
--- a/src/gallium/drivers/r300/r300_query.c
+++ b/src/gallium/drivers/r300/r300_query.c
@@ -59,7 +59,7 @@ static struct pipe_query *r300_create_query(struct 
pipe_context *pipe,
 q->num_pipes = r300screen->info.r300_num_gb_pipes;
 
 q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE,
-  RADEON_DOMAIN_GTT);
+  RADEON_DOMAIN_GTT, 0);
 if (!q->buf) {
 FREE(q);
 return NULL;
diff --git a/src/gallium/drivers/r300/r300_render.c 
b/src/gallium/drivers/r300/r300_render.c
index 175b83a..6e5b381 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -907,7 +907,7 @@ static boolean r300_render_allocate_vertices(struct 
vbuf_render* render,
 r300->vbo = rws->buffer_create(rws,
MAX2(R300_MAX_DRAW_VBO_SIZE, size),
R300_BUFFER_ALIGNMENT, TRUE,
-   RADEON_DOMAIN_GTT);
+   RADEON_DOMAIN_GTT, 0);
 if (!r300->vbo) {
 return FALSE;
 }
diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c 
b/src/gallium/drivers/r300/r300_screen_buffer.c
index 86e4478..de557b5 100644
--- a/src/gallium/drivers/r300/r300_screen_buffer.c
+++ b/src/gallium/drivers/r300/r300_screen_buffer.c
@@ -103,7 +103,7 @@ r300_buffer_transfer_map( struct pipe_context *context,
 /* Create a new one in the same pipe_resource. */
 new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
-   rbuf->domain);
+   rbuf->domain, 0);
 if (new_buf) {
 /* Discard the old buffer. */
 pb_reference(&rbuf->buf, NULL);
@@ -185,7 +185,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen 
*screen,
 rbuf->buf =
 r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0,
R300_BUFFER_ALIGNMENT, TRUE,
-   rbuf->domain);
+   rbuf->domain, 0);
 if (!rbuf->buf) {
 FREE(rbuf);
 return NULL;
diff --git a/src/gallium/drivers/r300/r300_texture.c 
b/src/gallium/drivers/r300/r300_texture.c
index 4ea69dc..ffe8c00 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -1042,7 +1042,7 @@ r300_texture_create_object(struct r300_screen *rscreen,
 /* Create the backing buffer if needed. */
 if (!tex->buf) {
 tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE,
-  tex->domain);
+  tex->domain, 0);
 
 if (!tex->buf) {
 goto fail;
diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 0eaa817..4e6b897 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -107,11 +107,14 @@ bool r600_init_resource(struct r600_common_screen 
*rscreen,
 {
struct r600_texture *rtex = (struct r600_texture*)res;
struct pb_buffer *old_buf, *new_buf;
+   enum radeon_bo_flag flags = 0;
 
switch (res->b.b.usage) {
-   case PIPE_USAGE_STAGING:
case PIPE_USAGE_DYNAMIC:
case PIPE_USAGE_STREAM:
+   flags = RADEON_FLAG_GTT_WC;
+   /* fall through */
+   

[Mesa-dev] [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for VRAM and GTT

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Should reduce overhead because the caching buffer manager doesn't need to
consider buffers of the wrong type.

Signed-off-by: Michel Dänzer 
---
 src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 10 +++---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 +++-
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.h |  3 ++-
 3 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
index 0ebe196..d06bb34 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -800,10 +800,14 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
 desc.initial_domains = domain;
 
 /* Assign a buffer manager. */
-if (use_reusable_pool)
-provider = ws->cman;
-else
+if (use_reusable_pool) {
+if (domain == RADEON_DOMAIN_VRAM)
+provider = ws->cman_vram;
+else
+provider = ws->cman_gtt;
+} else {
 provider = ws->kman;
+}
 
 buffer = provider->create_buffer(provider, size, &desc.base);
 if (!buffer)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 576fea5..0834cbd 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -417,7 +417,8 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
 pipe_mutex_destroy(ws->cmask_owner_mutex);
 pipe_mutex_destroy(ws->cs_stack_lock);
 
-ws->cman->destroy(ws->cman);
+ws->cman_vram->destroy(ws->cman_vram);
+ws->cman_gtt->destroy(ws->cman_gtt);
 ws->kman->destroy(ws->kman);
 if (ws->gen >= DRV_R600) {
 radeon_surface_manager_free(ws->surf_man);
@@ -632,8 +633,11 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t 
screen_create)
 ws->kman = radeon_bomgr_create(ws);
 if (!ws->kman)
 goto fail;
-ws->cman = pb_cache_manager_create(ws->kman, 100, 2.0f, 0);
-if (!ws->cman)
+ws->cman_vram = pb_cache_manager_create(ws->kman, 100, 2.0f, 0);
+if (!ws->cman_vram)
+goto fail;
+ws->cman_gtt = pb_cache_manager_create(ws->kman, 100, 2.0f, 0);
+if (!ws->cman_gtt)
 goto fail;
 
 if (ws->gen >= DRV_R600) {
@@ -689,8 +693,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t 
screen_create)
 
 fail:
 pipe_mutex_unlock(fd_tab_mutex);
-if (ws->cman)
-ws->cman->destroy(ws->cman);
+if (ws->cman_gtt)
+ws->cman_gtt->destroy(ws->cman_gtt);
+if (ws->cman_vram)
+ws->cman_vram->destroy(ws->cman_vram);
 if (ws->kman)
 ws->kman->destroy(ws->kman);
 if (ws->surf_man)
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
index 18fe0ae..fc6f53b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -57,7 +57,8 @@ struct radeon_drm_winsys {
 uint32_t va_start;
 
 struct pb_manager *kman;
-struct pb_manager *cman;
+struct pb_manager *cman_vram;
+struct pb_manager *cman_gtt;
 struct radeon_surface_manager *surf_man;
 
 uint32_t num_cpus;  /* Number of CPUs. */
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Michel Dänzer
In order to try and improve X(Shm)PutImage performance with glamor, I
implemented support for write-combined CPU mappings of BOs in GTT.

This did provide a nice speedup, but to my surprise, using VRAM instead
of write-combined GTT turned out to be even faster in general on my
Kaveri machine, both for the internal GPU and for discrete GPUs.

However, I've kept the changes from GTT to VRAM separated, in case this
turns out to be a loss on other setups.

Kernel patches:

[PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
[PATCH 2/5] drm/radeon: Pass GART page flags to
[PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in
[PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and
[PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI

Mesa patches:

[PATCH 1/5] winsys/radeon: Use separate caching buffer managers for
[PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some
[PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming
[PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT
[PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

This is hopefully safe: The kernel makes sure writes to these mappings
finish before the GPU might start reading from them, and the GPU caches
are invalidated at the start of a command stream.

Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 40917f0..c8a0723 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
  PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
res->domains = RADEON_DOMAIN_GTT;
-   flags = 0;
+   flags = RADEON_FLAG_GTT_WC;
}
 
/* Tiled textures are unmappable. Always put them in VRAM. */
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] drm/radeon: Pass GART page flags to radeon_gart_set_page() explicitly

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/radeon/r100.c|  2 +-
 drivers/gpu/drm/radeon/r300.c| 12 +---
 drivers/gpu/drm/radeon/radeon.h  | 12 +---
 drivers/gpu/drm/radeon/radeon_asic.h |  8 
 drivers/gpu/drm/radeon/radeon_gart.c |  9 ++---
 drivers/gpu/drm/radeon/radeon_ttm.c  |  8 ++--
 drivers/gpu/drm/radeon/rs400.c   | 13 ++---
 drivers/gpu/drm/radeon/rs600.c   | 16 +++-
 include/uapi/drm/radeon_drm.h|  4 +++-
 9 files changed, 59 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index ed1c53e..9241b89 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -682,7 +682,7 @@ void r100_pci_gart_disable(struct radeon_device *rdev)
 }
 
 void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i,
-   uint64_t addr)
+   uint64_t addr, uint32_t flags)
 {
u32 *gtt = rdev->gart.ptr;
gtt[i] = cpu_to_le32(lower_32_bits(addr));
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 8d14e66..75b3033 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -69,17 +69,23 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev)
mb();
 }
 
+#define R300_PTE_UNSNOOPED (1 << 0)
 #define R300_PTE_WRITEABLE (1 << 2)
 #define R300_PTE_READABLE  (1 << 3)
 
 void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i,
- uint64_t addr)
+ uint64_t addr, uint32_t flags)
 {
void __iomem *ptr = rdev->gart.ptr;
 
addr = (lower_32_bits(addr) >> 8) |
-  ((upper_32_bits(addr) & 0xff) << 24) |
-  R300_PTE_WRITEABLE | R300_PTE_READABLE;
+   ((upper_32_bits(addr) & 0xff) << 24);
+   if (flags & RADEON_GART_PAGE_READ)
+   addr |= R300_PTE_READABLE;
+   if (flags & RADEON_GART_PAGE_WRITE)
+   addr |= R300_PTE_WRITEABLE;
+   if (!(flags & RADEON_GART_PAGE_SNOOP))
+   addr |= R300_PTE_UNSNOOPED;
/* on x86 we want this to be CPU endian, on powerpc
 * on powerpc without HW swappers, it'll get swapped on way
 * into VRAM - so no need for cpu_to_le32 on VRAM tables */
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index f4869b4..4dd092e 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -589,6 +589,12 @@ struct radeon_mc;
 #define RADEON_GPU_PAGE_SHIFT 12
 #define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & 
~RADEON_GPU_PAGE_MASK)
 
+#define RADEON_GART_PAGE_DUMMY  0
+#define RADEON_GART_PAGE_VALID (1 << 0)
+#define RADEON_GART_PAGE_READ  (1 << 1)
+#define RADEON_GART_PAGE_WRITE (1 << 2)
+#define RADEON_GART_PAGE_SNOOP (1 << 3)
+
 struct radeon_gart {
dma_addr_t  table_addr;
struct radeon_bo*robj;
@@ -613,7 +619,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, 
unsigned offset,
int pages);
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 int pages, struct page **pagelist,
-dma_addr_t *dma_addr);
+dma_addr_t *dma_addr, uint32_t flags);
 
 
 /*
@@ -1775,7 +1781,7 @@ struct radeon_asic {
struct {
void (*tlb_flush)(struct radeon_device *rdev);
void (*set_page)(struct radeon_device *rdev, unsigned i,
-uint64_t addr);
+uint64_t addr, uint32_t flags);
} gart;
struct {
int (*init)(struct radeon_device *rdev);
@@ -2702,7 +2708,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t 
v);
 #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), 
(state))
 #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev))
 #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev))
-#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), 
(i), (p))
+#define radeon_gart_set_page(rdev, i, p, f) 
(rdev)->asic->gart.set_page((rdev), (i), (p), (f))
 #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev))
 #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev))
 #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) 
((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), 
(flags)))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h 
b/drivers/gpu/drm/radeon/radeon_asic.h
index 01e7c0a..f632e31 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -68,7 +68,7 @@ int r100_asic_reset(struct radeon_device *rdev);
 u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc);
 void r100_pci_gart_tlb_flush(struct radeon_device

[Mesa-dev] [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming buffers

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index 4e6b897..40917f0 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -110,15 +110,13 @@ bool r600_init_resource(struct r600_common_screen 
*rscreen,
enum radeon_bo_flag flags = 0;
 
switch (res->b.b.usage) {
-   case PIPE_USAGE_DYNAMIC:
-   case PIPE_USAGE_STREAM:
-   flags = RADEON_FLAG_GTT_WC;
-   /* fall through */
case PIPE_USAGE_STAGING:
/* Transfers are likely to occur more often with these 
resources. */
res->domains = RADEON_DOMAIN_GTT;
break;
case PIPE_USAGE_DEFAULT:
+   case PIPE_USAGE_STREAM:
+   case PIPE_USAGE_DYNAMIC:
case PIPE_USAGE_IMMUTABLE:
default:
/* Not listing GTT here improves performance in some apps. */
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/radeon/cik.c | 3 +++
 drivers/gpu/drm/radeon/cik_sdma.c| 2 ++
 drivers/gpu/drm/radeon/ni.c  | 3 +++
 drivers/gpu/drm/radeon/ni_dma.c  | 2 ++
 drivers/gpu/drm/radeon/radeon_ring.c | 2 +-
 5 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index df39095..8af5c9a 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -3846,6 +3846,9 @@ void cik_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
  (ib->gpu_addr & 0xFFFC));
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0x);
radeon_ring_write(ring, control);
+
+   /* Flush HDP cache */
+   WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index 3396b28..2ab873d 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -158,6 +158,8 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
radeon_ring_write(ring, ib->length_dw);
 
+   /* Flush HDP cache */
+   WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index b589fe7..ea58e5b 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1397,6 +1397,9 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, 
struct radeon_ib *ib)
radeon_ring_write(ring, 0x);
radeon_ring_write(ring, 0);
radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* 
poll interval */
+
+   /* Flush HDP cache (for SI) */
+   WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 }
 
 static void cayman_cp_enable(struct radeon_device *rdev, bool enable)
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 119fc69..0e575ea 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -148,6 +148,8 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev,
radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0));
radeon_ring_write(ring, (ib->length_dw << 12) | 
(upper_32_bits(ib->gpu_addr) & 0xFF));
 
+   /* Flush HDP cache (for SI) */
+   WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index 62e9e57..31ac4fd 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -206,7 +206,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
  RADEON_IB_POOL_SIZE*64*1024,
  RADEON_GPU_PAGE_SIZE,
- RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_DOMAIN_VRAM,
  RADEON_GEM_GTT_WC);
} else {
/* Without GPUVM, it's better to stick to cacheable GTT due
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/radeon/cik.c  |  4 ++--
 drivers/gpu/drm/radeon/cik_sdma.c |  3 ++-
 drivers/gpu/drm/radeon/evergreen.c| 12 
 drivers/gpu/drm/radeon/r600.c |  4 ++--
 drivers/gpu/drm/radeon/radeon.h   |  3 ++-
 drivers/gpu/drm/radeon/radeon_benchmark.c |  4 ++--
 drivers/gpu/drm/radeon/radeon_device.c|  3 ++-
 drivers/gpu/drm/radeon/radeon_fb.c|  2 +-
 drivers/gpu/drm/radeon/radeon_gart.c  |  2 +-
 drivers/gpu/drm/radeon/radeon_gem.c   | 16 ++--
 drivers/gpu/drm/radeon/radeon_object.c| 24 +++-
 drivers/gpu/drm/radeon/radeon_object.h|  5 +++--
 drivers/gpu/drm/radeon/radeon_prime.c |  2 +-
 drivers/gpu/drm/radeon/radeon_ring.c  |  4 ++--
 drivers/gpu/drm/radeon/radeon_sa.c|  4 ++--
 drivers/gpu/drm/radeon/radeon_test.c  |  4 ++--
 drivers/gpu/drm/radeon/radeon_ttm.c   |  2 +-
 drivers/gpu/drm/radeon/radeon_uvd.c   |  6 +++---
 drivers/gpu/drm/radeon/radeon_vce.c   |  2 +-
 drivers/gpu/drm/radeon/radeon_vm.c|  8 ++--
 drivers/gpu/drm/radeon/si_dma.c   |  3 ++-
 21 files changed, 70 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 1b0da66..a9fd3e7 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4374,7 +4374,7 @@ static int cik_mec_init(struct radeon_device *rdev)
r = radeon_bo_create(rdev,
 rdev->mec.num_mec *rdev->mec.num_pipe * 
MEC_HPD_SIZE * 2,
 PAGE_SIZE, true,
-RADEON_GEM_DOMAIN_GTT, NULL,
+RADEON_GEM_DOMAIN_GTT, 0, NULL,
 &rdev->mec.hpd_eop_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", 
r);
@@ -4544,7 +4544,7 @@ static int cik_cp_compute_resume(struct radeon_device 
*rdev)
r = radeon_bo_create(rdev,
 sizeof(struct bonaire_mqd),
 PAGE_SIZE, true,
-RADEON_GEM_DOMAIN_GTT, NULL,
+RADEON_GEM_DOMAIN_GTT, 0, NULL,
 &rdev->ring[idx].mqd_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create MQD bo 
failed\n", r);
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index 8e9d0f1..a7f66c8 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -742,7 +742,8 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev,
 
trace_radeon_vm_set_page(pe, addr, count, incr, flags);
 
-   if (flags == R600_PTE_GART) {
+   /* XXX: How to distinguish between GART and other system memory pages? 
*/
+   if (flags & R600_PTE_SYSTEM) {
uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8;
while (count) {
unsigned bytes = count * 8;
diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 39ada71..902334f 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -4022,7 +4022,8 @@ int sumo_rlc_init(struct radeon_device *rdev)
/* save restore block */
if (rdev->rlc.save_restore_obj == NULL) {
r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-RADEON_GEM_DOMAIN_VRAM, NULL, 
&rdev->rlc.save_restore_obj);
+RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+&rdev->rlc.save_restore_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create RLC sr bo 
failed\n", r);
return r;
@@ -4100,7 +4101,8 @@ int sumo_rlc_init(struct radeon_device *rdev)
 
if (rdev->rlc.clear_state_obj == NULL) {
r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true,
-RADEON_GEM_DOMAIN_VRAM, NULL, 
&rdev->rlc.clear_state_obj);
+RADEON_GEM_DOMAIN_VRAM, 0, NULL,
+&rdev->rlc.clear_state_obj);
if (r) {
dev_warn(rdev->dev, "(%d) create RLC c bo 
failed\n", r);
sumo_rlc_fini(rdev);
@@ -4174,8 +4176,10 @@ int sumo_rlc_init(struct radeon_device *rdev)
 
if (rdev->rlc.cp_table_size) {
if (rdev->rlc.cp_table_obj == NULL) {
-   r = radeon_bo_create(rdev, rdev->rlc.c

[Mesa-dev] [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and IBs on >= SI

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/radeon/cik.c |  3 +++
 drivers/gpu/drm/radeon/cik_sdma.c|  4 
 drivers/gpu/drm/radeon/ni.c  |  3 +++
 drivers/gpu/drm/radeon/ni_dma.c  |  4 
 drivers/gpu/drm/radeon/radeon_ring.c | 22 +-
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index a9fd3e7..df39095 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -4181,6 +4181,9 @@ u32 cik_gfx_get_wptr(struct radeon_device *rdev,
 void cik_gfx_set_wptr(struct radeon_device *rdev,
  struct radeon_ring *ring)
 {
+   /* Make IB/ring buffer writes land before the WPTR register write */
+   wmb();
+
WREG32(CP_RB0_WPTR, ring->wptr);
(void)RREG32(CP_RB0_WPTR);
 }
diff --git a/drivers/gpu/drm/radeon/cik_sdma.c 
b/drivers/gpu/drm/radeon/cik_sdma.c
index a7f66c8..3396b28 100644
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@@ -112,12 +112,16 @@ void cik_sdma_set_wptr(struct radeon_device *rdev,
 {
u32 reg;
 
+   /* Make IB/ring buffer writes land before the WPTR register write */
+   wmb();
+
if (ring->idx == R600_RING_TYPE_DMA_INDEX)
reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET;
else
reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET;
 
WREG32(reg, (ring->wptr << 2) & 0x3fffc);
+   (void)RREG32(reg);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 327b85f..b589fe7 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1449,6 +1449,9 @@ u32 cayman_gfx_get_wptr(struct radeon_device *rdev,
 void cayman_gfx_set_wptr(struct radeon_device *rdev,
 struct radeon_ring *ring)
 {
+   /* Make IB/ring buffer writes land before the WPTR register write */
+   wmb();
+
if (ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
WREG32(CP_RB0_WPTR, ring->wptr);
(void)RREG32(CP_RB0_WPTR);
diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c
index 6378e02..119fc69 100644
--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@@ -103,12 +103,16 @@ void cayman_dma_set_wptr(struct radeon_device *rdev,
 {
u32 reg;
 
+   /* Make IB/ring buffer writes land before the WPTR register write */
+   wmb();
+
if (ring->idx == R600_RING_TYPE_DMA_INDEX)
reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET;
else
reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET;
 
WREG32(reg, (ring->wptr << 2) & 0x3fffc);
+   (void)RREG32(reg);
 }
 
 /**
diff --git a/drivers/gpu/drm/radeon/radeon_ring.c 
b/drivers/gpu/drm/radeon/radeon_ring.c
index 71439f0..62e9e57 100644
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@@ -201,10 +201,22 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
if (rdev->ib_pool_ready) {
return 0;
}
-   r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
- RADEON_IB_POOL_SIZE*64*1024,
- RADEON_GPU_PAGE_SIZE,
- RADEON_GEM_DOMAIN_GTT, 0);
+
+   if (rdev->family >= CHIP_TAHITI) {
+   r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
+ RADEON_IB_POOL_SIZE*64*1024,
+ RADEON_GPU_PAGE_SIZE,
+ RADEON_GEM_DOMAIN_GTT,
+ RADEON_GEM_GTT_WC);
+   } else {
+   /* Without GPUVM, it's better to stick to cacheable GTT due
+* to the command stream patching
+*/
+   r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
+ RADEON_IB_POOL_SIZE*64*1024,
+ RADEON_GPU_PAGE_SIZE,
+ RADEON_GEM_DOMAIN_GTT, 0);
+   }
if (r) {
return r;
}
@@ -640,7 +652,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct 
radeon_ring *ring, unsig
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
-RADEON_GEM_DOMAIN_GTT, 0,
+RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_WC,
 NULL, &ring->ring_obj);
if (r) {
dev_err(rdev->dev, "(%d) ring create failed\n", r);
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/

[Mesa-dev] [PATCH 1/5] drm/radeon: Remove radeon_gart_restore()

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Doesn't seem necessary, the GART table memory should be persistent.

Signed-off-by: Michel Dänzer 
---
 drivers/gpu/drm/radeon/cik.c |  1 -
 drivers/gpu/drm/radeon/evergreen.c   |  1 -
 drivers/gpu/drm/radeon/ni.c  |  1 -
 drivers/gpu/drm/radeon/r100.c|  1 -
 drivers/gpu/drm/radeon/r300.c|  1 -
 drivers/gpu/drm/radeon/r600.c|  1 -
 drivers/gpu/drm/radeon/radeon.h  |  1 -
 drivers/gpu/drm/radeon/radeon_gart.c | 27 ---
 drivers/gpu/drm/radeon/rs400.c   |  1 -
 drivers/gpu/drm/radeon/rs600.c   |  1 -
 drivers/gpu/drm/radeon/rv770.c   |  1 -
 drivers/gpu/drm/radeon/si.c  |  1 -
 12 files changed, 38 deletions(-)

diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c
index 0b24711..1b0da66 100644
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@@ -5401,7 +5401,6 @@ static int cik_pcie_gart_enable(struct radeon_device 
*rdev)
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
-   radeon_gart_restore(rdev);
/* Setup TLB control */
WREG32(MC_VM_MX_L1_TLB_CNTL,
   (0xA << 7) |
diff --git a/drivers/gpu/drm/radeon/evergreen.c 
b/drivers/gpu/drm/radeon/evergreen.c
index 250bac3..39ada71 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2424,7 +2424,6 @@ static int evergreen_pcie_gart_enable(struct 
radeon_device *rdev)
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
-   radeon_gart_restore(rdev);
/* Setup L2 cache */
WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 5a33ca6..327b85f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1229,7 +1229,6 @@ static int cayman_pcie_gart_enable(struct radeon_device 
*rdev)
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
-   radeon_gart_restore(rdev);
/* Setup TLB control */
WREG32(MC_VM_MX_L1_TLB_CNTL,
   (0xA << 7) |
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 1544efc..ed1c53e 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -652,7 +652,6 @@ int r100_pci_gart_enable(struct radeon_device *rdev)
 {
uint32_t tmp;
 
-   radeon_gart_restore(rdev);
/* discard memory request outside of configured range */
tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS;
WREG32(RADEON_AIC_CNTL, tmp);
diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c
index 3c21d77..8d14e66 100644
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@@ -120,7 +120,6 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev)
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
-   radeon_gart_restore(rdev);
/* discard memory request outside of configured range */
tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD;
WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp);
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index c66952d..e1be5ce 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -968,7 +968,6 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
r = radeon_gart_table_vram_pin(rdev);
if (r)
return r;
-   radeon_gart_restore(rdev);
 
/* Setup L2 cache */
WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING |
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 079eac7..f4869b4 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -614,7 +614,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, 
unsigned offset,
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 int pages, struct page **pagelist,
 dma_addr_t *dma_addr);
-void radeon_gart_restore(struct radeon_device *rdev);
 
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c 
b/drivers/gpu/drm/radeon/radeon_gart.c
index 2e72365..b7d3e84 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -298,33 +298,6 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned 
offset,
 }
 
 /**
- * radeon_gart_restore - bind all pages in the gart page table
- *
- * @rdev: radeon_device pointer
- *
- * Binds all pages in the gart page table (all asics).
- * Used to rebuild the gart table on device startup or resume.
- */
-void radeon_gart_restore(struct radeon_device *rdev)
-{
-   int i, j, t;
-   u64 page_base;
-
-   if (!rdev->gart.ptr) {
-   return;
-   }

[Mesa-dev] [PATCH 5/5] r600g, radeonsi: Prefer VRAM for persistent mappings

2014-07-17 Thread Michel Dänzer
From: Michel Dänzer 

Signed-off-by: Michel Dänzer 
---
 src/gallium/drivers/radeon/r600_buffer_common.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
b/src/gallium/drivers/radeon/r600_buffer_common.c
index c8a0723..6f7fa29 100644
--- a/src/gallium/drivers/radeon/r600_buffer_common.c
+++ b/src/gallium/drivers/radeon/r600_buffer_common.c
@@ -125,12 +125,10 @@ bool r600_init_resource(struct r600_common_screen 
*rscreen,
break;
}
 
-   /* Use GTT for all persistent mappings, because they are
-* always cached and coherent. */
if (res->b.b.target == PIPE_BUFFER &&
res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
  PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
-   res->domains = RADEON_DOMAIN_GTT;
+   res->domains = RADEON_DOMAIN_VRAM;
flags = RADEON_FLAG_GTT_WC;
}
 
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Christian König

Am 17.07.2014 12:01, schrieb Michel Dänzer:

In order to try and improve X(Shm)PutImage performance with glamor, I
implemented support for write-combined CPU mappings of BOs in GTT.

This did provide a nice speedup, but to my surprise, using VRAM instead
of write-combined GTT turned out to be even faster in general on my
Kaveri machine, both for the internal GPU and for discrete GPUs.

However, I've kept the changes from GTT to VRAM separated, in case this
turns out to be a loss on other setups.

Kernel patches:

[PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
[PATCH 2/5] drm/radeon: Pass GART page flags to
[PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in
[PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and


Those four are Reviewed-by: Christian König 


[PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI


I'm still not very keen with this change since I still don't understand 
the reason why it's faster than with GTT. Definitely needs more testing 
on a wider range of systems. Maybe limit it to APUs for now?


Regards,
Christian.



Mesa patches:

[PATCH 1/5] winsys/radeon: Use separate caching buffer managers for
[PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some
[PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming
[PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT
[PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings
___
dri-devel mailing list
dri-de...@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-17 Thread Grigori Goronzy
On 17.07.2014 12:01, Michel Dänzer wrote:
> From: Michel Dänzer 
> 
> This is hopefully safe: The kernel makes sure writes to these mappings
> finish before the GPU might start reading from them, and the GPU caches
> are invalidated at the start of a command stream.
>

Aren't CPU reads from write-combined GTT memory extraordinarily slow,
because they're uncached? And don't you need the right access patterns
to make write combining perform well?

Grigori

> Signed-off-by: Michel Dänzer 
> ---
>  src/gallium/drivers/radeon/r600_buffer_common.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
> b/src/gallium/drivers/radeon/r600_buffer_common.c
> index 40917f0..c8a0723 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen 
> *rscreen,
>   res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
> PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
>   res->domains = RADEON_DOMAIN_GTT;
> - flags = 0;
> + flags = RADEON_FLAG_GTT_WC;
>   }
>  
>   /* Tiled textures are unmappable. Always put them in VRAM. */
> 




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Fix crash in update_framebuffer_state

2014-07-17 Thread Marek Olšák
The patch is wrong. is_rtt is only set for textures, not
renderbuffers, so it breaks rendering to depth renderbuffers.

Marek

On Thu, Jul 17, 2014 at 12:02 AM, David Weber  wrote:
> Hi,
>
> GPU: Radeon HD 5770
> mesa: 10.2.2 with gallium/llvm backend
> llvm: 3.4.2
> linux: 3.15.3
> xf86-video-ati: 7.4.0
>
> Switching from the software to the OpenGL backend in Gwenview with EGL
> enabled QT4 crashes with the following BT:
> state_tracker/st_atom_framebuffer.c:60:update_framebuffer_size:
> Assertion `surface' failed.
>
> Program received signal SIGTRAP, Trace/breakpoint trap.
> 0x7fffe09e7ec1 in _debug_assert_fail (expr=0x7fffe0f9f85c
> "surface", file=0x7fffe0f9f838 "state_tracker/st_atom_framebuffer.c",
> line=60,
> function=0x7fffe0f9f9a0 <__func__.33915>
> "update_framebuffer_size") at util/u_debug.c:277
> 277 util/u_debug.c: Datei oder Verzeichnis nicht gefunden.
> (gdb) bt
> #0  0x7fffe09e7ec1 in _debug_assert_fail (expr=0x7fffe0f9f85c
> "surface", file=0x7fffe0f9f838 "state_tracker/st_atom_framebuffer.c",
> line=60,
> function=0x7fffe0f9f9a0 <__func__.33915>
> "update_framebuffer_size") at util/u_debug.c:277
> #1  0x7fffe0c8715d in update_framebuffer_size
> (framebuffer=0x17f82b0, surface=0x0) at
> state_tracker/st_atom_framebuffer.c:60
> #2  0x7fffe0c87446 in update_framebuffer_state (st=0x17f76d0) at
> state_tracker/st_atom_framebuffer.c:132
> #3  0x7fffe0c84457 in st_validate_state (st=0x17f76d0) at
> state_tracker/st_atom.c:213
> #4  0x7fffe0c91618 in st_Clear (ctx=0x17b3a30, mask=2) at
> state_tracker/st_cb_clear.c:446
> #5  0x7fffe0b10a39 in _mesa_Clear (mask=16384) at main/clear.c:226
> #6  0x720c9aaa in ?? () from /usr/lib64/qt4/libQtOpenGL.so.4
> #7  0x74a21cfb in QPainter::begin(QPaintDevice*) () from
> /usr/lib64/qt4/libQtGui.so.4
> #8  0x74a22768 in QPainter::QPainter(QPaintDevice*) () from
> /usr/lib64/qt4/libQtGui.so.4
> #9  0x74ec9544 in QGraphicsView::paintEvent(QPaintEvent*) ()
> from /usr/lib64/qt4/libQtGui.so.4
> #10 0x749221f0 in QWidget::event(QEvent*) () from
> /usr/lib64/qt4/libQtGui.so.4
> #11 0x74cb595e in QFrame::event(QEvent*) () from
> /usr/lib64/qt4/libQtGui.so.4
> #12 0x74ecd32b in QGraphicsView::viewportEvent(QEvent*) ()
> from /usr/lib64/qt4/libQtGui.so.4
> #13 0x76a9f223 in
> QCoreApplicationPrivate::sendThroughObjectEventFilters(QObject*,
> QEvent*) () from /usr/lib64/qt4/libQtCore.so.4
> #14 0x748d4bac in QApplicationPrivate::notify_helper(QObject*,
> QEvent*) () from /usr/lib64/qt4/libQtGui.so.4
> #15 0x748d7602 in QApplication::notify(QObject*, QEvent*) ()
> from /usr/lib64/qt4/libQtGui.so.4
> #16 0x75600a08 in KApplication::notify(QObject*, QEvent*) ()
> from /usr/lib64/libkdeui.so.5
> #17 0x76a9f0ad in QCoreApplication::notifyInternal(QObject*,
> QEvent*) () from /usr/lib64/qt4/libQtCore.so.4
> #18 0x7492705f in QWidgetPrivate::drawWidget(QPaintDevice*,
> QRegion const&, QPoint const&, int, QPainter*, QWidgetBackingStore*)
> () from /usr/lib64/qt4/libQtGui.so.4
> #19 0x74ae5639 in QWidgetPrivate::repaint_sys(QRegion const&)
> () from /usr/lib64/qt4/libQtGui.so.4
> #20 0x749159e4 in QWidgetPrivate::syncBackingStore() () from
> /usr/lib64/qt4/libQtGui.so.4
> #21 0x74922691 in QWidget::event(QEvent*) () from
> /usr/lib64/qt4/libQtGui.so.4
> #22 0x7209fd0a in QGLWidget::event(QEvent*) () from
> /usr/lib64/qt4/libQtOpenGL.so.4
> #23 0x748d4bcc in QApplicationPrivate::notify_helper(QObject*,
> QEvent*) () from /usr/lib64/qt4/libQtGui.so.4
> #24 0x748d7602 in QApplication::notify(QObject*, QEvent*) ()
> from /usr/lib64/qt4/libQtGui.so.4
> #25 0x75600a08 in KApplication::notify(QObject*, QEvent*) ()
> from /usr/lib64/libkdeui.so.5
> #26 0x76a9f0ad in QCoreApplication::notifyInternal(QObject*,
> QEvent*) () from /usr/lib64/qt4/libQtCore.so.4
> #27 0x76aa26e8 in
> QCoreApplicationPrivate::sendPostedEvents(QObject*, int, QThreadData*)
> () from /usr/lib64/qt4/libQtCore.so.4
> #28 0x76acd653 in ?? () from /usr/lib64/qt4/libQtCore.so.4
> #29 0x705e4a94 in g_main_context_dispatch () from
> /usr/lib64/libglib-2.0.so.0
> #30 0x705e4df0 in ?? () from /usr/lib64/libglib-2.0.so.0
> #31 0x705e4eac in g_main_context_iteration () from
> /usr/lib64/libglib-2.0.so.0
> #32 0x76acd7c6 in
> QEventDispatcherGlib::processEvents(QFlags)
> () from /usr/lib64/qt4/libQtCore.so.4
> #33 0x74975f26 in ?? () from /usr/lib64/qt4/libQtGui.so.4
> #34 0x76a9dcef in
> QEventLoop::processEvents(QFlags) ()
> from /usr/lib64/qt4/libQtCore.so.4
> #35 0x76a9dfd0 in
> QEventLoop::exec(QFlags) () from
> /usr/lib64/qt4/libQtCore.so.4
> #36 0x74d6e147 in QDialog::exec() () from /usr/lib64/qt4/libQtGui.so.4
> #37 0x0044c7f3 in Gwenview::MainWindow::showConfigDialog
> (this=0xaf1e30) at /home/weber/work/gwenview/app/mainwin

Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-17 Thread Marek Olšák
The resource flags actually tell you what you can do. If the COHERENT
flag is set, the mapping must be cached. If it's unset, it's up to
you.

If write-combining is faster for vertex uploads, then Glamor shouldn't
set the coherent flag.

Marek

On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> This is hopefully safe: The kernel makes sure writes to these mappings
> finish before the GPU might start reading from them, and the GPU caches
> are invalidated at the start of a command stream.
>
> Signed-off-by: Michel Dänzer 
> ---
>  src/gallium/drivers/radeon/r600_buffer_common.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
> b/src/gallium/drivers/radeon/r600_buffer_common.c
> index 40917f0..c8a0723 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen 
> *rscreen,
> res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
>   PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
> res->domains = RADEON_DOMAIN_GTT;
> -   flags = 0;
> +   flags = RADEON_FLAG_GTT_WC;
> }
>
> /* Tiled textures are unmappable. Always put them in VRAM. */
> --
> 2.0.0
>
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result v2

2014-07-17 Thread Francisco Jerez
Tom Stellard  writes:

> v2:
>   - Move the end_query() call into the timestamp constructor.
>   - Still pass false as the wait parameter to get_query_result().

Reviewed-by: Francisco Jerez 

> ---
>  src/gallium/state_trackers/clover/core/timestamp.cpp | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp 
> b/src/gallium/state_trackers/clover/core/timestamp.cpp
> index 481c4f9..3fd341f 100644
> --- a/src/gallium/state_trackers/clover/core/timestamp.cpp
> +++ b/src/gallium/state_trackers/clover/core/timestamp.cpp
> @@ -30,6 +30,7 @@ using namespace clover;
>  timestamp::query::query(command_queue &q) :
> q(q),
> _query(q.pipe->create_query(q.pipe, PIPE_QUERY_TIMESTAMP, 0)) {
> +   q.pipe->end_query(q.pipe, _query);
>  }
>  
>  timestamp::query::query(query &&other) :
> -- 
> 1.8.1.5


pgp6BHrKR3yXH.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 5/5] r600g, radeonsi: Prefer VRAM for persistent mappings

2014-07-17 Thread Marek Olšák
Like I said at patch 4, this would be okay if the COHERENT flag wasn't set.

If you removed the PERSISTENT flag from the conditional, the placement
of persistent non-coherent buffers would be driven by the "usage",
meaning that you would be able to get any kind of placement you want.

Marek

On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> Signed-off-by: Michel Dänzer 
> ---
>  src/gallium/drivers/radeon/r600_buffer_common.c | 4 +---
>  1 file changed, 1 insertion(+), 3 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c 
> b/src/gallium/drivers/radeon/r600_buffer_common.c
> index c8a0723..6f7fa29 100644
> --- a/src/gallium/drivers/radeon/r600_buffer_common.c
> +++ b/src/gallium/drivers/radeon/r600_buffer_common.c
> @@ -125,12 +125,10 @@ bool r600_init_resource(struct r600_common_screen 
> *rscreen,
> break;
> }
>
> -   /* Use GTT for all persistent mappings, because they are
> -* always cached and coherent. */
> if (res->b.b.target == PIPE_BUFFER &&
> res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
>   PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
> -   res->domains = RADEON_DOMAIN_GTT;
> +   res->domains = RADEON_DOMAIN_VRAM;
> flags = RADEON_FLAG_GTT_WC;
> }
>
> --
> 2.0.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] clover: Use 1 as default value for CL_DEVICE_PROFILING_TIMER_RESOLUTION

2014-07-17 Thread Francisco Jerez
Tom Stellard  writes:

> We use PIPE_QUERY_TIMESTAMP for profiling events, and gallium specifies
> that the timestamp be in nanoseconds.
> ---
>  src/gallium/state_trackers/clover/api/device.cpp | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/src/gallium/state_trackers/clover/api/device.cpp 
> b/src/gallium/state_trackers/clover/api/device.cpp
> index 1176668..25d29f5 100644
> --- a/src/gallium/state_trackers/clover/api/device.cpp
> +++ b/src/gallium/state_trackers/clover/api/device.cpp
> @@ -249,7 +249,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param,
>break;
>  
> case CL_DEVICE_PROFILING_TIMER_RESOLUTION:
> -  buf.as_scalar() = 0;
> +  // PIPE_QUERY_TIMESTAMP returns a timestamp in units of nanoseconds,
> +  // so we default to 1 here.
> +  buf.as_scalar() = 1;
>break;

I guess we should use PIPE_QUERY_TIMESTAMP_DISJOINT to calculate the
timer resolution?

Thanks.

>  
> case CL_DEVICE_ENDIAN_LITTLE:
> -- 
> 1.8.1.5


pgpo4ygcLtAqk.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Marek Olšák
On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
> Mesa patches:
>
> [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for
> [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some
> [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming

For these 3 patches:

Reviewed-by: Marek Olšák 

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Alex Deucher
On Thu, Jul 17, 2014 at 6:01 AM, Michel Dänzer  wrote:
> In order to try and improve X(Shm)PutImage performance with glamor, I
> implemented support for write-combined CPU mappings of BOs in GTT.
>
> This did provide a nice speedup, but to my surprise, using VRAM instead
> of write-combined GTT turned out to be even faster in general on my
> Kaveri machine, both for the internal GPU and for discrete GPUs.
>
> However, I've kept the changes from GTT to VRAM separated, in case this
> turns out to be a loss on other setups.
>
> Kernel patches:
>
> [PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
> [PATCH 2/5] drm/radeon: Pass GART page flags to
> [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in
> [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and
> [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI

Applied 1-4 to my 3.17 tree.  thanks!

Alex

>
> Mesa patches:
>
> [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for
> [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some
> [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming
> [PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT
> [PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings
> ___
> dri-devel mailing list
> dri-de...@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Thorsten Glaser
On Thu, 17 Jul 2014, Eero Tamminen wrote:

> While effect of unaligned accesses is normally invisible,

No, the compiler is inserting padding here silently.
We call this “implicit padding”. The problem with it
is that this padding is architecture-dependent, and
some platforms have other alignment requirements than
other platforms.

Take this example:

struct {
char c;
int i;
} foo;

This looks like this to the programmer:

┌───┬───┬───┬───┬───┐
│ c │ i   i   i   i │
└───┴───┴───┴───┴───┘

But it looks like this on i386:

┌───┬───┬───┬───┬───┬───┬───┬───┐
│ c │ XpaddingX │ i   i   i   i │
└───┴───┴───┴───┴───┴───┴───┴───┘

And only like this on m68k:

┌───┬───┬───┬───┬───┬───┐
│ c │ X │ i   i   i   i │
└───┴───┴───┴───┴───┴───┘

This is because the compiler uses the architecture’s optimal
minimum alignment for “implicit” padding, to avoid the misalignment
you’re talking about. On i386, access to a 32-bit quantity is fast
if it’s 4-byte aligned; on m68k, 2-byte alignment is not only enough
for it to be fast (4-byte would have no benefit), but is also required
by the ABI.


To fix this, we use explicit padding:

struct {
char c;
char unused1[3];
int i;
} foo;

Now all cases look the same (except if you have a CPU which
wants to align its “int”s to 64 bit…).


The problem here is that the code in question uses arrays of
such structs with implicit padding, and checks their sizes
against its expectations. Maybe because the array is written
directly to the hardware.

What my patch does is to insert e̲x̲p̲l̲i̲c̲i̲t̲ padding to exactly
match the i̲m̲p̲l̲i̲c̲i̲t̲ padding present on the i386 architecture,
to make this the “minimum amount of padding” used. (Other
architectures may still insert implicit padding, e.g. if
they want their “int”s to be 64-bit aligned, but that’s
outside of the scope of this, and will fail with that code
anyway.)

bye,
//mirabilos
-- 
[16:04:33] bkix: "veni vidi violini"
[16:04:45] bkix: "ich kam, sah und vergeigte"...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Ilia Mirkin
On Thu, Jul 17, 2014 at 4:21 AM, Thorsten Glaser  wrote:
> On Thu, 17 Jul 2014, Eero Tamminen wrote:
>
>> While effect of unaligned accesses is normally invisible,
>
> No, the compiler is inserting padding here silently.
> We call this “implicit padding”. The problem with it

[ strip excellent explanation of the problem ]

Yes, implicit padding is definitely bad. I didn't realize I even had
it in there, although I'm glad my STATIC_ASSERT did its job. There are
various references to patches, however I haven't seen any. Perhaps
they're lost in my inbox, or they were never sent to mesa-dev, or
something else. Could the patch author (or another interested party)
send it as a proper patch to mesa-dev? Assuming it doesn't break x86,
happy to check it in.

Cheers,

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Jan Vesely
On Thu, 2014-07-17 at 10:21 +0200, Thorsten Glaser wrote:
> On Thu, 17 Jul 2014, Eero Tamminen wrote:
> 
> > While effect of unaligned accesses is normally invisible,
> 
> No, the compiler is inserting padding here silently.
> We call this “implicit padding”. The problem with it
> is that this padding is architecture-dependent, and
> some platforms have other alignment requirements than
> other platforms.
> 
> Take this example:
> 
> struct {
>   char c;
>   int i;
> } foo;
> 
> This looks like this to the programmer:
> 
> ┌───┬───┬───┬───┬───┐
> │ c │ i   i   i   i │
> └───┴───┴───┴───┴───┘
> 
> But it looks like this on i386:
> 
> ┌───┬───┬───┬───┬───┬───┬───┬───┐
> │ c │ XpaddingX │ i   i   i   i │
> └───┴───┴───┴───┴───┴───┴───┴───┘
> 
> And only like this on m68k:
> 
> ┌───┬───┬───┬───┬───┬───┐
> │ c │ X │ i   i   i   i │
> └───┴───┴───┴───┴───┴───┘
> 
> This is because the compiler uses the architecture’s optimal
> minimum alignment for “implicit” padding, to avoid the misalignment
> you’re talking about. On i386, access to a 32-bit quantity is fast
> if it’s 4-byte aligned; on m68k, 2-byte alignment is not only enough
> for it to be fast (4-byte would have no benefit), but is also required
> by the ABI.
> 
> 
> To fix this, we use explicit padding:
> 
> struct {
>   char c;
>   char unused1[3];
>   int i;
> } foo;
> 
> Now all cases look the same (except if you have a CPU which
> wants to align its “int”s to 64 bit…).
> 
> 
> The problem here is that the code in question uses arrays of
> such structs with implicit padding, and checks their sizes
> against its expectations. Maybe because the array is written
> directly to the hardware.
> 
> What my patch does is to insert e̲x̲p̲l̲i̲c̲i̲t̲ padding to exactly
> match the i̲m̲p̲l̲i̲c̲i̲t̲ padding present on the i386 architecture,
> to make this the “minimum amount of padding” used. (Other
> architectures may still insert implicit padding, e.g. if
> they want their “int”s to be 64-bit aligned, but that’s
> outside of the scope of this, and will fail with that code
> anyway.)

just a question
why not use __attribute__ ((aligned(X))) for explicit padding?
the attached program produces the following output on my x64 machine:

natural: size 8, offset c: 0, offset i: 4 
explicit 8: size 16, offset c: 0, offset i: 8 
explicit 2: size 8, offset c: 0, offset i: 4

and I get the same output on arm32.

regards,
Jan

> 
> bye,
> //mirabilos

-- 
Jan Vesely 

#include 
#include 

struct foo {
	char c;
	int i;
};

struct bar {
	char c;
	int i __attribute__ ((aligned (8)));
};

struct baz {
	char c;
	int i __attribute__ ((aligned (2)));
};


int main(void)
{
	printf("natural: size %zu, offset c: %zu, offset i: %zu \n",
		sizeof(struct foo), offsetof(struct foo, c), offsetof(struct foo, i));
	printf("explicit 8: size %zu, offset c: %zu, offset i: %zu \n",
		sizeof(struct bar), offsetof(struct bar, c), offsetof(struct bar, i));
	printf("explicit 2: size %zu, offset c: %zu, offset i: %zu \n",
		sizeof(struct baz), offsetof(struct baz, c), offsetof(struct baz, i));
	return 0;
}


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Ilia Mirkin
On Thu, Jul 17, 2014 at 10:24 AM, Thorsten Glaser  wrote:
> On Thu, 17 Jul 2014, Ilia Mirkin wrote:
>
>> Yes, implicit padding is definitely bad. I didn't realize I even had
>> it in there, although I'm glad my STATIC_ASSERT did its job. There are
>
> ;-)
>
>> various references to patches, however I haven't seen any. Perhaps
>> they're lost in my inbox, or they were never sent to mesa-dev, or
>> something else. Could the patch author (or another interested party)
>> send it as a proper patch to mesa-dev? Assuming it doesn't break x86,
>> happy to check it in.
>
> Oh fun. I’m the patch author, and it was here:
> https://bugs.debian.org/cgi-bin/bugreport.cgi?msg=37;filename=mesa_10.2.3-1%2Bm68k.1.debdiff;att=1;bug=728053
>
> I’ve attached the part you’re probably most interested in.

Thanks! Munged the commit description a little and pushed to the
master branch. I think the patches for 10.2.4 have already been
selected, so it should make its way into 10.2.5.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Thorsten Glaser
On Thu, 17 Jul 2014, Ilia Mirkin wrote:

> Yes, implicit padding is definitely bad. I didn't realize I even had
> it in there, although I'm glad my STATIC_ASSERT did its job. There are

;-)

> various references to patches, however I haven't seen any. Perhaps
> they're lost in my inbox, or they were never sent to mesa-dev, or
> something else. Could the patch author (or another interested party)
> send it as a proper patch to mesa-dev? Assuming it doesn't break x86,
> happy to check it in.

Oh fun. I’m the patch author, and it was here:
https://bugs.debian.org/cgi-bin/bugreport.cgi?msg=37;filename=mesa_10.2.3-1%2Bm68k.1.debdiff;att=1;bug=728053

I’ve attached the part you’re probably most interested in.

Thanks,
//mirabilos
-- 
Sometimes they [people] care too much: pretty printers [and syntax highligh-
ting, d.A.] mechanically produce pretty output that accentuates irrelevant
detail in the program, which is as sensible as putting all the prepositions
in English text in bold font.   -- Rob Pike in "Notes on Programming in C"From: Thorsten Glaser ___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread Thorsten Glaser
On Thu, 17 Jul 2014, Jan Vesely wrote:

> why not use __attribute__ ((aligned(X))) for explicit padding?

That’s ① GCC-specific and ② relies on environmental guarantees
that cannot always be given (e.g. you cannot align a struct
more than the stack alignment if it is ever passed on the
stack; for static or auto storage, it relies on the linker;
for malloc, it relies on libc).

The approach here is not to make the struct aligned, but
rather, to make it consistent within itself. In a portable
way, even (although we do use GCC 4.9 on Debian/m68k).

bye,
//mirabilos
-- 
«MyISAM tables -will- get corrupted eventually. This is a fact of life. »
“mysql is about as much database as ms access” – “MSSQL at least descends
from a database” “it's a rebranded SyBase” “MySQL however was born from a
flatfile and went downhill from there” – “at least jetDB doesn’t claim to
be a database”  ‣‣‣ Please, http://deb.li/mysql and MariaDB, finally die!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k

2014-07-17 Thread John Paul Adrian Glaubitz
Hi Ilia!

On 07/17/2014 04:35 PM, Ilia Mirkin wrote:
> Thanks! Munged the commit description a little and pushed to the
> master branch. I think the patches for 10.2.4 have already been
> selected, so it should make its way into 10.2.5.

Awesome, thank you so much! Glad we could finally this one :).

Cheers,

Adrian

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaub...@debian.org
`. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/2] Make a llvmpipe context basically thread safe.

2014-07-17 Thread Jose Fonseca

Hi Mathias,

Thanks for doing this.

Patch 2 looks great.

For patch 1 I'd prefer that instead of keeping a global list of 
contexts, these are passed by the caller as argument to gallivm_create() 
.  It will be a more invasive change, but it will be cleaner.  And in 
particular it will garantee that when llvmpipe contexts are destroyed 
there will be no lingering llvm contexts neither.


Jose


On 13/07/14 17:13, Mathias Fröhlich wrote:

Hi Jose,

This makes llvmpipe thread safe as mandated by the OpenGL standard.
The changes replace the use of two global data structures with
non global ones.
The changes pass piglit as of today without regressions.
The patchset deviates form your last suggestion. But lacking sufficient time
I just went ahead and implemented something that fixes this problem
preserving hopefully enough of the memory savings the shared
ShaderMemoryManager initially provided. The more extended solution
with a single jit memory manager per GL context could be implemented
on top of this change.

Please review!
Mathias

Mathias Fröhlich (2):
   llvmpipe: Pool the LLVMContexts in use.
   llvmpipe: Make a llvmpipe OpenGL context thread safe.

  src/gallium/auxiliary/gallivm/lp_bld_init.c   | 87 ---
  src/gallium/auxiliary/gallivm/lp_bld_init.h   |  1 +
  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 38 ++--
  src/gallium/auxiliary/gallivm/lp_bld_misc.h   |  3 +
  4 files changed, 100 insertions(+), 29 deletions(-)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.

2014-07-17 Thread Pavel Popov
According to spec (OpenGL 4.0 specification, pages 254-255) we have a different 
bits set
for one buffer and for multiple buffers. For glDrawBuffer we may have up to 
four bits set
but for glDrawBuffers we can only have one bit set.

The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL 
arguments when
_mesa_update_framebuffer or _mesa_update_draw_buffers is called. In this 
situation realization
for glDrawBuffers is used for any number of buffers. Even for one. But 
glDrawBuffer have to be
used for one buffer instead of glDrawBuffers.

Piglit test 'gl30basic' fails with assert with debug Mesa and pass with release
'main/buffers.c:520: _mesa_drawbuffers: Assertion 
`__builtin_popcount(destMask[buf]) == 1' failed.'
Probably some other tests also can be affected.

Signed-off-by: Pavel Popov 
---
 src/mesa/main/buffers.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index b13a7af..a640360 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLbitfield mask[MAX_DRAW_BUFFERS];
GLuint buf;
+   GLuint m = n;
 
if (!destMask) {
   /* compute destMask values now */
@@ -489,15 +490,17 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
  mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
  ASSERT(mask[output] != BAD_MASK);
  mask[output] &= supportedMask;
+ if (mask[output] == 0)
+m--;
   }
   destMask = mask;
}
 
/*
-* If n==1, destMask[0] may have up to four bits set.
+* If m==1, destMask[0] may have up to four bits set.
 * Otherwise, destMask[x] can only have one bit set.
 */
-   if (n == 1) {
+   if (m == 1) {
   GLuint count = 0, destMask0 = destMask[0];
   while (destMask0) {
  GLint bufIndex = ffs(destMask0) - 1;
-- 
1.9.1



Closed Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park, 
17 Krylatskaya Str., Bldg 4, Moscow 121614, 
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders

2014-07-17 Thread Grigori Goronzy
Accuracy of some operations was recently improved in the R600 backend,
at the cost of slower code. This is required for compute shaders,
but not for graphics shaders. Add unsafe-fp-math hint to make LLVM
generate faster but possibly less accurate code.

Piglit didn't indicate any regressions.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 1b17dd4..171ccaa 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -26,6 +26,7 @@
 #include "radeon_llvm_emit.h"
 #include "radeon_elf_util.h"
 #include "util/u_memory.h"
+#include "pipe/p_shader_tokens.h"
 
 #include 
 #include 
@@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
   sprintf(Str, "%1d", type);
 
   LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
+
+  if (type != TGSI_PROCESSOR_COMPUTE) {
+LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
+  }
 }
 
 static void init_r600_target() {
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] radeon/llvm: fix formatting

2014-07-17 Thread Grigori Goronzy
Use K&R and same indent as most other code. No functional change
intended.
---
 src/gallium/drivers/radeon/radeon_llvm_emit.c | 24 ++--
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 171ccaa..53694b7 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -47,17 +47,18 @@
  */
 void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
 {
-  char Str[2];
-  sprintf(Str, "%1d", type);
+   char Str[2];
+   sprintf(Str, "%1d", type);
 
-  LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
+   LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
 
-  if (type != TGSI_PROCESSOR_COMPUTE) {
-LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
-  }
+   if (type != TGSI_PROCESSOR_COMPUTE) {
+   LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
+   }
 }
 
-static void init_r600_target() {
+static void init_r600_target()
+{
static unsigned initialized = 0;
if (!initialized) {
LLVMInitializeR600TargetInfo();
@@ -68,7 +69,8 @@ static void init_r600_target() {
}
 }
 
-static LLVMTargetRef get_r600_target() {
+static LLVMTargetRef get_r600_target()
+{
LLVMTargetRef target = NULL;
 
for (target = LLVMGetFirstTarget(); target;
@@ -87,7 +89,8 @@ static LLVMTargetRef get_r600_target() {
 
 #if HAVE_LLVM >= 0x0305
 
-static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) {
+static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context)
+{
if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) {
unsigned int *diagnosticflag = (unsigned int *)context;
char *diaginfo_message = LLVMGetDiagInfoDescription(di);
@@ -106,7 +109,8 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef 
di, void *context) {
  * @returns 0 for success, 1 for failure
  */
 unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary 
*binary,
- const char * gpu_family, unsigned 
dump) {
+ const char *gpu_family, unsigned dump)
+{
 
LLVMTargetRef target;
LLVMTargetMachineRef tm;
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 80848] [dri3] Building mesa fails with dri3 enabled

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=80848

--- Comment #8 from Emil Velikov  ---
Juha-Pekka, Bryan

Can you guys test the patch in comment 7 ? I feel slightly reluctant about
breaking my system in order to test it myself.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/12] main/formats: Remove IndexBits

2014-07-17 Thread Jason Ekstrand
Mesa hasn't supported color-indexed textures for some time.  This is 0 for
all texture formats, so we don't need to store it.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/format_info.py | 2 +-
 src/mesa/main/formats.c  | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py
index 9b63bfb..b8956a5 100644
--- a/src/mesa/main/format_info.py
+++ b/src/mesa/main/format_info.py
@@ -171,7 +171,7 @@ for fmat in formats:
 
bits = [ get_channel_bits(fmat, name) for name in ['r', 'g', 'b', 'a']]
print '  {0},'.format(', '.join(map(str, bits)))
-   bits = [ get_channel_bits(fmat, name) for name in ['l', 'i', 'I', 'z', 's']]
+   bits = [ get_channel_bits(fmat, name) for name in ['l', 'i', 'z', 's']]
print '  {0},'.format(', '.join(map(str, bits)))
 
print '  {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height,
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index e237064..39cc5f1 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -59,7 +59,6 @@ struct gl_format_info
GLubyte AlphaBits;
GLubyte LuminanceBits;
GLubyte IntensityBits;
-   GLubyte IndexBits;
GLubyte DepthBits;
GLubyte StencilBits;
 
@@ -145,7 +144,7 @@ _mesa_get_format_bits(mesa_format format, GLenum pname)
case GL_TEXTURE_LUMINANCE_SIZE:
   return info->LuminanceBits;
case GL_INDEX_BITS:
-  return info->IndexBits;
+  return 0;
case GL_DEPTH_BITS:
case GL_TEXTURE_DEPTH_SIZE_ARB:
case GL_RENDERBUFFER_DEPTH_SIZE_EXT:
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/12] main: Add a format description CSV file

2014-07-17 Thread Jason Ekstrand
Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/formats.csv | 282 ++
 1 file changed, 282 insertions(+)
 create mode 100644 src/mesa/main/formats.csv

diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv
new file mode 100644
index 000..5abb706
--- /dev/null
+++ b/src/mesa/main/formats.csv
@@ -0,0 +1,282 @@
+###
+#
+# Copyright 2009-2010 VMware, Inc.
+# Copyright 2014 Intel Corporation
+# All Rights Reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the
+# "Software"), to deal in the Software without restriction, including
+# without limitation the rights to use, copy, modify, merge, publish,
+# distribute, sub license, and/or sell copies of the Software, and to
+# permit persons to whom the Software is furnished to do so, subject to
+# the following conditions:
+#
+# The above copyright notice and this permission notice (including the
+# next paragraph) shall be included in all copies or substantial portions
+# of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+###
+
+# This CSV file has the input data for gen_format.h and gen_format.c
+#
+# Each format entry contains:
+# - name, per enum mesa_format
+# - layout
+# - pixel block's width
+# - pixel block's height
+# - channel encoding (only meaningful for array or packed layout), containing 
for each
+#   channel the following information:
+#   - type, one of
+# - 'x': void
+# - 'u': unsigned
+# - 's': signed
+# - 'h': fixed
+# - 'f': FLOAT
+#   - optionally followed by 'n' if it is normalized
+#   - number of bits
+# - channel swizzle
+# - color space: rgb, srgb, yub, sz
+
+# None
+# Described as regular uint_8 bytes, i.e. MESA_FORMAT_R8_USCALED
+MESA_FORMAT_NONE  , other , 1, 1, x8  , , ,
 , 0001, rgb
+
+# Packed unorm formats
+MESA_FORMAT_A8B8G8R8_UNORM, packed, 1, 1, un8 , un8 , un8 , 
un8 , wzyx, rgb
+MESA_FORMAT_X8B8G8R8_UNORM, packed, 1, 1, x8  , un8 , un8 , 
un8 , wzy1, rgb
+MESA_FORMAT_R8G8B8A8_UNORM, packed, 1, 1, un8 , un8 , un8 , 
un8 , xyzw, rgb
+MESA_FORMAT_R8G8B8X8_UNORM, packed, 1, 1, un8 , un8 , un8 , x8 
 , xyz1, rgb
+MESA_FORMAT_B8G8R8A8_UNORM, packed, 1, 1, un8 , un8 , un8 , 
un8 , zyxw, rgb
+MESA_FORMAT_B8G8R8X8_UNORM, packed, 1, 1, un8 , un8 , un8 , x8 
 , zyx1, rgb
+MESA_FORMAT_A8R8G8B8_UNORM, packed, 1, 1, un8 , un8 , un8 , 
un8 , yzwx, rgb
+MESA_FORMAT_X8R8G8B8_UNORM, packed, 1, 1, x8  , un8 , un8 , 
un8 , yzw1, rgb
+MESA_FORMAT_L16A16_UNORM  , packed, 1, 1, un16, un16, ,
 , xxxy, rgb
+MESA_FORMAT_A16L16_UNORM  , packed, 1, 1, un16, un16, ,
 , yyyx, rgb
+MESA_FORMAT_B5G6R5_UNORM  , packed, 1, 1, un5 , un6 , un5 ,
 , zyx1, rgb
+MESA_FORMAT_R5G6B5_UNORM  , packed, 1, 1, un5 , un6 , un5 ,
 , xyz1, rgb
+MESA_FORMAT_B4G4R4A4_UNORM, packed, 1, 1, un4 , un4 , un4 , 
un4 , zyxw, rgb
+MESA_FORMAT_B4G4R4X4_UNORM, packed, 1, 1, un4 , un4 , un4 , x4 
 , zyx1, rgb
+MESA_FORMAT_A4R4G4B4_UNORM, packed, 1, 1, un4 , un4 , un4 , 
un4 , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UNORM, packed, 1, 1, un1 , un5 , un5 , 
un5 , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UNORM, packed, 1, 1, un5 , un5 , un5 , 
un1 , zyxw, rgb
+MESA_FORMAT_B5G5R5X1_UNORM, packed, 1, 1, un5 , un5 , un5 , x1 
 , zyx1, rgb
+MESA_FORMAT_A1R5G5B5_UNORM, packed, 1, 1, un1 , un5 , un5 , 
un5 , yzwx, rgb
+MESA_FORMAT_L8A8_UNORM, packed, 1, 1, un8 , un8 , ,
 , xxxy, rgb
+MESA_FORMAT_A8L8_UNORM, packed, 1, 1, un8 , un8 , ,
 , yyyx, rgb
+MESA_FORMAT_R8G8_UNORM, packed, 1, 1, un8 , un8 , ,
 , xy01, rgb
+MESA_FORMAT_G8R8_UNORM, packed, 1, 1, un8 , un8 , ,
 , yx01, rgb
+MESA_FORMAT_L4A4_UNORM, packed, 1, 1, un4 , un4 , ,
 , xxxy, rgb
+
+MESA_FORMAT_B2G3R3_UNORM  , packed, 1, 1, un2 , un3 , un3 ,
 , zyx1, rgb
+MESA_FORMAT_R16G16_UNORM  , packed, 1, 1, un16, un16, ,
 , xy01, rgb
+MESA_FORMAT_G16R16_UNORM   

[Mesa-dev] [PATCH 04/12] main/formats: Add layout and swizzle information

2014-07-17 Thread Jason Ekstrand
Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/format_info.py | 11 +++
 src/mesa/main/formats.c  | 46 
 src/mesa/main/formats.h  | 29 
 3 files changed, 86 insertions(+)

diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py
index b8956a5..448bd00 100644
--- a/src/mesa/main/format_info.py
+++ b/src/mesa/main/format_info.py
@@ -96,6 +96,14 @@ def get_gl_data_type(fmat):
else:
   assert False
 
+def get_mesa_layout(fmat):
+   if fmat.layout == 'array':
+  return 'MESA_FORMAT_LAYOUT_ARRAY'
+   elif fmat.layout == 'packed':
+  return 'MESA_FORMAT_LAYOUT_PACKED'
+   else:
+  return 'MESA_FORMAT_LAYOUT_OTHER'
+
 def get_channel_bits(fmat, chan_name):
if fmat.is_compressed():
   # These values are pretty-much bogus, but OpenGL requires that we
@@ -166,6 +174,7 @@ for fmat in formats:
print '   {'
print '  {0},'.format(fmat.name)
print '  "{0}",'.format(fmat.name)
+   print '  {0},'.format(get_mesa_layout(fmat))
print '  {0},'.format(get_gl_base_format(fmat))
print '  {0},'.format(get_gl_data_type(fmat))
 
@@ -176,6 +185,8 @@ for fmat in formats:
 
print '  {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height,
int(fmat.block_size() / 8))
+
+   print '  {{ {0} }},'.format(', '.join(map(str, fmat.swizzle)))
print '   },'
 
 print '};'
diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
index 39cc5f1..f03425e 100644
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -40,6 +40,8 @@ struct gl_format_info
/** text name for debugging */
const char *StrName;
 
+   enum mesa_format_layout Layout;
+
/**
 * Base format is one of GL_RED, GL_RG, GL_RGB, GL_RGBA, GL_ALPHA,
 * GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_INTENSITY, GL_YCBCR_MESA,
@@ -67,6 +69,8 @@ struct gl_format_info
 */
GLubyte BlockWidth, BlockHeight;
GLubyte BytesPerBlock;
+
+   uint8_t Swizzle[4];
 };
 
 #include "format_info.c"
@@ -178,6 +182,21 @@ _mesa_get_format_max_bits(mesa_format format)
 
 
 /**
+ * Return the layout type of the given format.
+ * The return value will be one of:
+ *MESA_FORMAT_LAYOUT_ARRAY
+ *MESA_FORMAT_LAYOUT_PACKED
+ *MESA_FORMAT_LAYOUT_OTHER
+ */
+extern enum mesa_format_layout
+_mesa_get_format_layout(mesa_format format)
+{
+   const struct gl_format_info *info = _mesa_get_format_info(format);
+   return info->Layout;
+}
+
+
+/**
  * Return the data type (or more specifically, the data representation)
  * for the given format.
  * The return value will be one of:
@@ -224,6 +243,33 @@ _mesa_get_format_block_size(mesa_format format, GLuint 
*bw, GLuint *bh)
 }
 
 
+/**
+ * Returns the an array of four numbers representing the transformation
+ * from the RGBA or SZ colorspace to the given format.  For array formats,
+ * the i'th RGBA component is given by:
+ *
+ * if (swizzle[i] <= MESA_FORMAT_SWIZZLE_W)
+ *comp = data[swizzle[i]];
+ * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ZERO)
+ *comp = 0;
+ * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ONE)
+ *comp = 1;
+ * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_NONE)
+ *// data does not contain a channel of this format
+ *
+ * For packed formats, the swizzle gives the number of components left of
+ * the least significant bit.
+ *
+ * Compressed formats have no swizzle.
+ */
+void
+_mesa_get_format_swizzle(mesa_format format, uint8_t swizzle_out[4])
+{
+   const struct gl_format_info *info = _mesa_get_format_info(format);
+   memcpy(swizzle_out, info->Swizzle, sizeof(info->Swizzle));
+}
+
+
 /** Is the given format a compressed format? */
 GLboolean
 _mesa_is_format_compressed(mesa_format format)
diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
index dc50bc8..48aad44 100644
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -56,6 +56,15 @@ extern "C" {
  */
 #define MAX_PIXEL_BYTES 16
 
+/**
+ * Specifies the layout of a pixel format.  See the MESA_FORMAT
+ * documentation below.
+ */
+enum mesa_format_layout {
+   MESA_FORMAT_LAYOUT_ARRAY,
+   MESA_FORMAT_LAYOUT_PACKED,
+   MESA_FORMAT_LAYOUT_OTHER,
+};
 
 /**
  * Mesa texture/renderbuffer image formats.
@@ -419,6 +428,9 @@ _mesa_get_format_bits(mesa_format format, GLenum pname);
 extern GLuint
 _mesa_get_format_max_bits(mesa_format format);
 
+extern enum mesa_format_layout
+_mesa_get_format_layout(mesa_format format);
+
 extern GLenum
 _mesa_get_format_datatype(mesa_format format);
 
@@ -428,6 +440,23 @@ _mesa_get_format_base_format(mesa_format format);
 extern void
 _mesa_get_format_block_size(mesa_format format, GLuint *bw, GLuint *bh);
 
+/**
+ * An enum representing different possible swizzling values.  This is used
+ * to interpret the output of _mesa_get_format_swizzle
+ */
+enum {
+   MESA_FORMAT_SWIZZLE_X = 0,
+   MESA_FORMAT_SWIZZLE_Y = 1,
+   MESA_FORMAT_SWIZZLE_Z = 2,
+   MESA_FORMAT_SW

[Mesa-dev] [PATCH 00/12] Rework texture upload code

2014-07-17 Thread Jason Ekstrand
This is the first installment of some work I've been doing over the past
couple of weeks to refactor mesa's texture conversion/storage code.  There
is more to be done and more that I have done but have not included in this
series.  This is the first mailing-list-ready fruits of my efforts.  The
important bits here include:

 1) Using a human-readable CSV file to describe texture formats similar to
the way it is currently don in gallium.  This is much easier to
read/edit than the structure in formats.c.  The guts of formats.c is
then autogenerated from this CSV file.

 2) Adding a very generic yet efficient _mesa_swizzle_and_convert function
that is capable of performing the vast majority of texture format
conversions in one function.  It has also been fairly carefully tuned
to be even faster than the _mesa_swizzle_ubyte_image special-case that
we had before for ubyte textures only it also works on the other
datatypes and can even do type conversions as it swizzles.

 3) Refactoring of texstore.c including the use of the above
_mesa_swizzle_and_convert function along with the already-existing
packing functions to remove a lot of hand-written special-case code.

Thanks to the format CSV file, there's even more that we can now do.
Things I hope to accomplish in the future include:

 1) Autogenerate the bulk of main/format_pack.c, main/format_unpack.c, and
main/pack.c from CSV files.  There's some refactoring that will be
required first, but it shouldn't be that hard and I already have the
python code to do the generation; it's just not part of this patch
series.
 2) Find a general way to do depth-stencil formats.  I'm a bit dubious as
to whether or not this will turn out to be practical, but I haven't had
a chance to look into it too much yet.
 2) Do similar refactors for GetTexImage, ReadPixels, and DrawPixels.

Happy Reviewing!
--Jason Ekstrand

Jason Ekstrand (12):
  main: Add a format description CSV file
  main/formats: Autogenerate the format_info structure from a CSV file
  main/formats: Remove IndexBits
  main/formats: Add layout and swizzle information
  main: Gather some common format conversion functions into a single
format_utils file
  main/format_utils: Add a general format conversion function
  main/format_utils: Add a function for determining if a format is
actually an array format and computing the array format
parameters
  main/texstore: Split texture storage into three functions:
texstore_depth_stencil, texstore_compressed, and texstore_rgba
  main/texstore: Use _mesa_swizzle_and_convert when possible
  main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM
  main/texstore: Add a general texture upload path for rgba
floating-point and normalized textures
  mesa/texstore: Add a generic rgba integer texture upload path

 src/mesa/Makefile.am |   10 +
 src/mesa/Makefile.sources|1 +
 src/mesa/main/.gitignore |1 +
 src/mesa/main/format_info.py |  192 +++
 src/mesa/main/format_pack.c  |  104 +-
 src/mesa/main/format_parser.py   |  406 +
 src/mesa/main/format_unpack.c|   69 +-
 src/mesa/main/format_unpack.h|3 -
 src/mesa/main/format_utils.c |  715 +
 src/mesa/main/format_utils.h |   90 ++
 src/mesa/main/formats.c  | 1786 +
 src/mesa/main/formats.csv|  282 
 src/mesa/main/formats.h  |   29 +
 src/mesa/main/texcompress_etc.c  |   20 +-
 src/mesa/main/texcompress_s3tc.c |   26 +-
 src/mesa/main/texstore.c | 3172 +++---
 16 files changed, 2412 insertions(+), 4494 deletions(-)
 create mode 100644 src/mesa/main/format_info.py
 create mode 100755 src/mesa/main/format_parser.py
 create mode 100644 src/mesa/main/format_utils.c
 create mode 100644 src/mesa/main/format_utils.h
 create mode 100644 src/mesa/main/formats.csv

-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/12] main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM

2014-07-17 Thread Jason Ekstrand
Before it was only storing one of the color components due to truncation.
With this patch it now properly stores all of them.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/format_pack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
index fb3feb5..4b52405 100644
--- a/src/mesa/main/format_pack.c
+++ b/src/mesa/main/format_pack.c
@@ -856,7 +856,7 @@ pack_float_R_UNORM8(const GLfloat src[4], void *dst)
 static void
 pack_ubyte_R8G8_UNORM(const GLubyte src[4], void *dst)
 {
-   GLubyte *d = ((GLubyte *) dst);
+   GLushort *d = ((GLushort *) dst);
*d = PACK_COLOR_88(src[GCOMP], src[RCOMP]);
 }
 
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/12] main/format_utils: Add a general format conversion function

2014-07-17 Thread Jason Ekstrand
Most format conversion operations required by GL can be performed by
converting one channel at a time, shuffling the channels around, and
optionally filling missing channels with zeros and ones.  This adds a
function to do just that in a general, yet efficient, way.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/format_utils.c | 566 +++
 src/mesa/main/format_utils.h |  18 ++
 2 files changed, 584 insertions(+)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 241c158..0cb3eae 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -54,3 +54,569 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl)
 
return lut[cl];
 }
+
+static bool
+swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
+   const void *src, GLenum src_type, int 
num_src_channels,
+   const uint8_t swizzle[4], bool normalized, int 
count)
+{
+   int i;
+
+   if (src_type != dst_type)
+  return false;
+   if (num_src_channels != num_dst_channels)
+  return false;
+
+   for (i = 0; i < num_dst_channels; ++i)
+  if (swizzle[i] != i && swizzle[i] != MESA_FORMAT_SWIZZLE_NONE)
+ return false;
+
+   memcpy(dst, src, count * num_src_channels * _mesa_sizeof_type(src_type));
+
+   return true;
+}
+
+/* Note: This loop is carefully crafted for performance.  Be careful when
+ * changing it and run some benchmarks to ensure no performance regressions
+ * if you do.
+ */
+#define SWIZZLE_CONVERT_LOOP(DST_TYPE, SRC_TYPE, CONV)   \
+   do {  \
+  const SRC_TYPE *typed_src = void_src;  \
+  DST_TYPE *typed_dst = void_dst;\
+  DST_TYPE tmp[7];   \
+  tmp[4] = 0;\
+  tmp[5] = one;  \
+  for (s = 0; s < count; ++s) {  \
+ for (j = 0; j < num_src_channels; ++j) {\
+SRC_TYPE src = typed_src[j]; \
+tmp[j] = CONV;   \
+ }   \
+ \
+ typed_dst[0] = tmp[swizzle_x];  \
+ if (num_dst_channels > 1) { \
+typed_dst[1] = tmp[swizzle_y];   \
+if (num_dst_channels > 2) {  \
+   typed_dst[2] = tmp[swizzle_z];\
+   if (num_dst_channels > 3) {   \
+  typed_dst[3] = tmp[swizzle_w]; \
+   } \
+}\
+ }   \
+ typed_src += num_src_channels;  \
+ typed_dst += num_dst_channels;  \
+  }  \
+   } while (0);
+
+/**
+ * Convert between array-based color formats.
+ *
+ * Most format conversion operations required by GL can be performed by
+ * converting one channel at a time, shuffling the channels around, and
+ * optionally filling missing channels with zeros and ones.  This function
+ * does just that in a general, yet efficient, way.
+ *
+ * Most of the parameters are self-explanitory.  The swizzle parameter is
+ * an array of 4 numbers (see _mesa_get_format_swizzle) that describes
+ * where each channel in the destination should come from in the source.
+ *
+ * Under most circumstances, the source and destination images must be
+ * different as no care is taken not to clobber one with the other.
+ * However, if they have the same number of bits per pixel, it is safe to
+ * do an in-place conversion.
+ */
+void
+_mesa_swizzle_and_convert(void *void_dst, GLenum dst_type, int 
num_dst_channels,
+  const void *void_src, GLenum src_type, int 
num_src_channels,
+  const uint8_t swizzle[4], bool normalized, int count)
+{
+   int s, j;
+   register uint8_t swizzle_x, swizzle_y, swizzle_z, swizzle_w;
+
+   if (swizzle_convert_try_memcpy(void_dst, dst_type, num_dst_channels,
+  void_src, src_type, num_src_channels,
+  swizzle, normalized, count))
+  return;
+
+   swizzle_x = swizzle[0];
+   swizzle_y = swizzle[1];
+   swizzle_z = swizzle[2];
+   swizzle_w = swizzle[3];
+
+   switch (dst_type) {
+   case GL_FLOAT:
+   {
+  const float one = 1.0f;
+  switch (src_type) {
+  case GL_FLOAT:
+ SWIZZLE_CONVERT_LOOP(float, float, src)
+ break;
+  case GL_HALF_FLOAT:
+ SWIZZLE_CONVERT_LOOP(float, uint16_t, _mesa_half_to_float(src))
+ break;
+  case GL_UNSIGNED_BYTE:
+ if (normalized) {
+SWIZZLE

[Mesa-dev] [PATCH 08/12] main/texstore: Split texture storage into three functions: texstore_depth_stencil, texstore_compressed, and texstore_rgba

2014-07-17 Thread Jason Ekstrand
Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/texstore.c | 171 +++
 1 file changed, 100 insertions(+), 71 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index d363f9f..e1f2284 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -3540,35 +3540,95 @@ _mesa_texstore_abgr2101010(TEXSTORE_PARAMS)
return GL_TRUE;
 }
 
+
 static GLboolean
-_mesa_texstore_null(TEXSTORE_PARAMS)
+texstore_depth_stencil(TEXSTORE_PARAMS)
 {
-   (void) ctx; (void) dims;
-   (void) baseInternalFormat;
-   (void) dstFormat;
-   (void) dstRowStride; (void) dstSlices,
-   (void) srcWidth; (void) srcHeight; (void) srcDepth;
-   (void) srcFormat; (void) srcType;
-   (void) srcAddr;
-   (void) srcPacking;
-
-   /* should never happen */
-   _mesa_problem(NULL, "_mesa_texstore_null() is called");
-   return GL_FALSE;
+   static StoreTexImageFunc table[MESA_FORMAT_COUNT];
+   static GLboolean initialized = GL_FALSE;
+
+   if (!initialized) {
+  memset(table, 0, sizeof table);
+
+  table[MESA_FORMAT_S8_UINT_Z24_UNORM] = _mesa_texstore_z24_s8;
+  table[MESA_FORMAT_Z24_UNORM_S8_UINT] = _mesa_texstore_s8_z24;
+  table[MESA_FORMAT_Z_UNORM16] = _mesa_texstore_z16;
+  table[MESA_FORMAT_Z24_UNORM_X8_UINT] = _mesa_texstore_x8_z24;
+  table[MESA_FORMAT_X8_UINT_Z24_UNORM] = _mesa_texstore_z24_x8;
+  table[MESA_FORMAT_Z_UNORM32] = _mesa_texstore_z32;
+  table[MESA_FORMAT_S_UINT8] = _mesa_texstore_s8;
+  table[MESA_FORMAT_Z_FLOAT32] = _mesa_texstore_z32;
+  table[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = _mesa_texstore_z32f_x24s8;
+
+  initialized = GL_TRUE;
+   }
+
+   ASSERT(table[dstFormat]);
+   return table[dstFormat](ctx, dims, baseInternalFormat,
+   dstFormat, dstRowStride, dstSlices,
+   srcWidth, srcHeight, srcDepth,
+   srcFormat, srcType, srcAddr, srcPacking);
 }
 
+static GLboolean
+texstore_compressed(TEXSTORE_PARAMS)
+{
+   static StoreTexImageFunc table[MESA_FORMAT_COUNT];
+   static GLboolean initialized = GL_FALSE;
+
+   if (!initialized) {
+  memset(table, 0, sizeof table);
 
-/**
- * Return the StoreTexImageFunc pointer to store an image in the given format.
- */
-static StoreTexImageFunc
-_mesa_get_texstore_func(mesa_format format)
+  table[MESA_FORMAT_SRGB_DXT1] = _mesa_texstore_rgb_dxt1;
+  table[MESA_FORMAT_SRGBA_DXT1] = _mesa_texstore_rgba_dxt1;
+  table[MESA_FORMAT_SRGBA_DXT3] = _mesa_texstore_rgba_dxt3;
+  table[MESA_FORMAT_SRGBA_DXT5] = _mesa_texstore_rgba_dxt5;
+  table[MESA_FORMAT_RGB_FXT1] = _mesa_texstore_rgb_fxt1;
+  table[MESA_FORMAT_RGBA_FXT1] = _mesa_texstore_rgba_fxt1;
+  table[MESA_FORMAT_RGB_DXT1] = _mesa_texstore_rgb_dxt1;
+  table[MESA_FORMAT_RGBA_DXT1] = _mesa_texstore_rgba_dxt1;
+  table[MESA_FORMAT_RGBA_DXT3] = _mesa_texstore_rgba_dxt3;
+  table[MESA_FORMAT_RGBA_DXT5] = _mesa_texstore_rgba_dxt5;
+  table[MESA_FORMAT_R_RGTC1_UNORM] = _mesa_texstore_red_rgtc1;
+  table[MESA_FORMAT_R_RGTC1_SNORM] = _mesa_texstore_signed_red_rgtc1;
+  table[MESA_FORMAT_RG_RGTC2_UNORM] = _mesa_texstore_rg_rgtc2;
+  table[MESA_FORMAT_RG_RGTC2_SNORM] = _mesa_texstore_signed_rg_rgtc2;
+  table[MESA_FORMAT_L_LATC1_UNORM] = _mesa_texstore_red_rgtc1;
+  table[MESA_FORMAT_L_LATC1_SNORM] = _mesa_texstore_signed_red_rgtc1;
+  table[MESA_FORMAT_LA_LATC2_UNORM] = _mesa_texstore_rg_rgtc2;
+  table[MESA_FORMAT_LA_LATC2_SNORM] = _mesa_texstore_signed_rg_rgtc2;
+  table[MESA_FORMAT_ETC1_RGB8] = _mesa_texstore_etc1_rgb8;
+  table[MESA_FORMAT_ETC2_RGB8] = _mesa_texstore_etc2_rgb8;
+  table[MESA_FORMAT_ETC2_SRGB8] = _mesa_texstore_etc2_srgb8;
+  table[MESA_FORMAT_ETC2_RGBA8_EAC] = _mesa_texstore_etc2_rgba8_eac;
+  table[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = 
_mesa_texstore_etc2_srgb8_alpha8_eac;
+  table[MESA_FORMAT_ETC2_R11_EAC] = _mesa_texstore_etc2_r11_eac;
+  table[MESA_FORMAT_ETC2_RG11_EAC] = _mesa_texstore_etc2_rg11_eac;
+  table[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = 
_mesa_texstore_etc2_signed_r11_eac;
+  table[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = 
_mesa_texstore_etc2_signed_rg11_eac;
+  table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] =
+ _mesa_texstore_etc2_rgb8_punchthrough_alpha1;
+  table[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] =
+ _mesa_texstore_etc2_srgb8_punchthrough_alpha1;
+
+  initialized = GL_TRUE;
+   }
+
+   ASSERT(table[dstFormat]);
+   return table[dstFormat](ctx, dims, baseInternalFormat,
+   dstFormat, dstRowStride, dstSlices,
+   srcWidth, srcHeight, srcDepth,
+   srcFormat, srcType, srcAddr, srcPacking);
+}
+
+static GLboolean
+texstore_rgba(TEXSTORE_PARAMS)
 {
static StoreTexImageFunc table[MESA_FORMAT_COUNT];
static GLboolean initialized = GL_FALSE;
 
if (!initialized) {
-  table[MESA_FORMAT_

[Mesa-dev] [PATCH 07/12] main/format_utils: Add a function for determining if a format is actually an array format and computing the array format parameters

2014-07-17 Thread Jason Ekstrand
This is a direct helper function for using _mesa_swizzle_and_convert

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/format_utils.c | 93 
 src/mesa/main/format_utils.h |  4 ++
 2 files changed, 97 insertions(+)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 0cb3eae..b9c7a54 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -55,6 +55,99 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl)
return lut[cl];
 }
 
+static const uint8_t map_identity[7] = { 0, 1, 2, 3, 4, 5, 6 };
+static const uint8_t map_3210[7] = { 3, 2, 1, 0, 4, 5, 6 };
+static const uint8_t map_1032[7] = { 1, 0, 3, 2, 4, 5, 6 };
+
+/**
+ * A helper function for figuring out if a (possibly packed) format is
+ * actually an array format and how to work with it.  If the format can not
+ * be used as an array format, thus function returns false.
+ */
+bool
+_mesa_format_to_array(mesa_format format, GLenum *type, int *num_components,
+  uint8_t swizzle[4], bool *normalized)
+{
+   int i;
+   GLuint format_components;
+   uint8_t packed_swizzle[4];
+   const uint8_t *endian;
+
+   if (_mesa_is_format_compressed(format))
+  return false;
+
+   *normalized = !_mesa_is_format_integer(format);
+
+   _mesa_format_to_type_and_comps(format, type, &format_components);
+   switch (_mesa_get_format_layout(format)) {
+   case MESA_FORMAT_LAYOUT_ARRAY:
+  *num_components = format_components;
+  _mesa_get_format_swizzle(format, swizzle);
+  return true;
+   case MESA_FORMAT_LAYOUT_PACKED:
+  switch (*type) {
+  case GL_UNSIGNED_BYTE:
+  case GL_BYTE:
+ if (_mesa_get_format_max_bits(format) != 8)
+return false;
+ *num_components = _mesa_get_format_bytes(format);
+ switch (*num_components) {
+ case 1:
+endian = map_identity;
+break;
+ case 2:
+endian = _mesa_little_endian() ? map_identity : map_1032;
+break;
+ case 4:
+endian = _mesa_little_endian() ? map_identity : map_3210;
+break;
+ default:
+assert(!"Invalid number of components");
+ }
+ break;
+  case GL_UNSIGNED_SHORT:
+  case GL_SHORT:
+  case GL_HALF_FLOAT:
+ if (_mesa_get_format_max_bits(format) != 16)
+return false;
+ *num_components = _mesa_get_format_bytes(format) / 2;
+ switch (*num_components) {
+ case 1:
+endian = map_identity;
+break;
+ case 2:
+endian = _mesa_little_endian() ? map_identity : map_1032;
+break;
+ default:
+assert(!"Invalid number of components");
+ }
+ break;
+  case GL_UNSIGNED_INT:
+  case GL_INT:
+  case GL_FLOAT:
+ /* This isn't packed.  At least not really. */
+ assert(format_components == 1);
+ if (_mesa_get_format_max_bits(format) != 32)
+return false;
+ *num_components = format_components;
+ endian = map_identity;
+ break;
+  default:
+ return false;
+  }
+
+  _mesa_get_format_swizzle(format, packed_swizzle);
+
+  for (i = 0; i < 4; ++i)
+ swizzle[i] = endian[packed_swizzle[i]];
+
+  return true;
+   case MESA_FORMAT_LAYOUT_OTHER:
+   default:
+  return false;
+   }
+}
+
 static bool
 swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
const void *src, GLenum src_type, int 
num_src_channels,
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index c5dab7b..990c3f2 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -78,6 +78,10 @@ _mesa_srgb_to_linear(float cs)
 
 float _mesa_srgb_ubyte_to_linear_float(uint8_t cl);
 
+bool
+_mesa_format_to_array(mesa_format, GLenum *type, int *num_components,
+  uint8_t swizzle[4], bool *normalized);
+
 void
 _mesa_swizzle_and_convert(void *dst, GLenum dst_type, int num_dst_channels,
   const void *src, GLenum src_type, int 
num_src_channels,
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/12] main: Gather some common format conversion functions into a single format_utils file

2014-07-17 Thread Jason Ekstrand
This initial commit puts all of the RGB <-> sRGB conversion functions in
one place.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/Makefile.sources|   1 +
 src/mesa/main/format_pack.c  | 102 ++-
 src/mesa/main/format_unpack.c|  69 --
 src/mesa/main/format_unpack.h|   3 --
 src/mesa/main/format_utils.c |  56 +
 src/mesa/main/format_utils.h |  68 ++
 src/mesa/main/texcompress_etc.c  |  20 
 src/mesa/main/texcompress_s3tc.c |  26 +-
 8 files changed, 204 insertions(+), 141 deletions(-)
 create mode 100644 src/mesa/main/format_utils.c
 create mode 100644 src/mesa/main/format_utils.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index f4904fb..a261113 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -48,6 +48,7 @@ MAIN_FILES = \
$(SRCDIR)main/formats.c \
$(SRCDIR)main/format_pack.c \
$(SRCDIR)main/format_unpack.c \
+   $(SRCDIR)main/format_utils.c \
$(SRCDIR)main/framebuffer.c \
$(SRCDIR)main/get.c \
$(SRCDIR)main/genmipmap.c \
diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
index 6b28592..fb3feb5 100644
--- a/src/mesa/main/format_pack.c
+++ b/src/mesa/main/format_pack.c
@@ -38,6 +38,7 @@
 
 #include "colormac.h"
 #include "format_pack.h"
+#include "format_utils.h"
 #include "macros.h"
 #include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
 #include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
@@ -58,39 +59,6 @@ typedef void (*pack_float_rgba_row_func)(GLuint n,
  const GLfloat src[][4], void *dst);
 
 
-
-static inline GLfloat
-linear_to_srgb(GLfloat cl)
-{
-   if (cl < 0.0f)
-  return 0.0f;
-   else if (cl < 0.0031308f)
-  return 12.92f * cl;
-   else if (cl < 1.0f)
-  return 1.055f * powf(cl, 0.41666f) - 0.055f;
-   else
-  return 1.0f;
-}
-
-
-static inline GLubyte
-linear_float_to_srgb_ubyte(GLfloat cl)
-{
-   GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl));
-   return res;
-}
-
-
-static inline GLubyte
-linear_ubyte_to_srgb_ubyte(GLubyte cl)
-{
-   GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl / 255.0f));
-   return res;
-}
-
-
-
-
 /*
  * MESA_FORMAT_A8B8G8R8_UNORM
  */
@@ -1043,18 +1011,18 @@ static void
 pack_ubyte_BGR_SRGB8(const GLubyte src[4], void *dst)
 {
GLubyte *d = ((GLubyte *) dst);
-   d[2] = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   d[1] = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   d[0] = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   d[2] = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   d[1] = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]);
+   d[0] = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]);
 }
 
 static void
 pack_float_BGR_SRGB8(const GLfloat src[4], void *dst)
 {
GLubyte *d = ((GLubyte *) dst);
-   d[2] = linear_float_to_srgb_ubyte(src[RCOMP]);
-   d[1] = linear_float_to_srgb_ubyte(src[GCOMP]);
-   d[0] = linear_float_to_srgb_ubyte(src[BCOMP]);
+   d[2] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP]));
+   d[1] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP]));
+   d[0] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP]));
 }
 
 
@@ -1064,9 +1032,9 @@ static void
 pack_ubyte_A8B8G8R8_SRGB(const GLubyte src[4], void *dst)
 {
GLuint *d = ((GLuint *) dst);
-   GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   GLubyte g = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]);
+   GLubyte b = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]);
*d = PACK_COLOR_(r, g, b, src[ACOMP]);
 }
 
@@ -1075,9 +1043,9 @@ pack_float_A8B8G8R8_SRGB(const GLfloat src[4], void *dst)
 {
GLuint *d = ((GLuint *) dst);
GLubyte r, g, b, a;
-   r = linear_float_to_srgb_ubyte(src[RCOMP]);
-   g = linear_float_to_srgb_ubyte(src[GCOMP]);
-   b = linear_float_to_srgb_ubyte(src[BCOMP]);
+   r = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP]));
+   g = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP]));
+   b = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP]));
UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
*d = PACK_COLOR_(r, g, b, a);
 }
@@ -1089,9 +1057,9 @@ static void
 pack_ubyte_B8G8R8A8_SRGB(const GLubyte src[4], void *dst)
 {
GLuint *d = ((GLuint *) dst);
-   GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   GLubyte g = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]);
+   GLubyte b = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]);
*d = PACK_COLOR_(src[ACOMP], r, g, b);
 }
 
@@ -1100,9 +1068,9 @@ pack_float_B8G8R8A8_SRGB(const GLfloat src[4], void *dst)
 {
GLuint *d = ((GLuint *) dst);
GLubyt

[Mesa-dev] [PATCH 12/12] mesa/texstore: Add a generic rgba integer texture upload path

2014-07-17 Thread Jason Ekstrand
Again, we delete a lot of functions that aren't really doing anything
interesting anymore.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/texstore.c | 545 ++-
 1 file changed, 66 insertions(+), 479 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 31317cb..ae6b286 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -1194,423 +1194,6 @@ _mesa_texstore_s8(TEXSTORE_PARAMS)
 }
 
 
-/* non-normalized, signed int8 */
-static GLboolean
-_mesa_texstore_rgba_int8(TEXSTORE_PARAMS)
-{
-   GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-   GLint components = _mesa_components_in_format(baseFormat);
-
-   /* this forces alpha to 1 in make_temp_uint_image */
-   if (dstFormat == MESA_FORMAT_RGBX_SINT8) {
-  baseFormat = GL_RGBA;
-  components = 4;
-   }
-
-   ASSERT(dstFormat == MESA_FORMAT_R_SINT8 ||
-  dstFormat == MESA_FORMAT_RG_SINT8 ||
-  dstFormat == MESA_FORMAT_RGB_SINT8 ||
-  dstFormat == MESA_FORMAT_RGBA_SINT8 ||
-  dstFormat == MESA_FORMAT_A_SINT8 ||
-  dstFormat == MESA_FORMAT_I_SINT8 ||
-  dstFormat == MESA_FORMAT_L_SINT8 ||
-  dstFormat == MESA_FORMAT_LA_SINT8 ||
-  dstFormat == MESA_FORMAT_RGBX_SINT8);
-   ASSERT(baseInternalFormat == GL_RGBA ||
-  baseInternalFormat == GL_RGB ||
-  baseInternalFormat == GL_RG ||
-  baseInternalFormat == GL_RED ||
-  baseInternalFormat == GL_ALPHA ||
-  baseInternalFormat == GL_LUMINANCE ||
-  baseInternalFormat == GL_LUMINANCE_ALPHA ||
-  baseInternalFormat == GL_INTENSITY);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLbyte));
-
-   {
-  /* general path */
-  const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-baseInternalFormat,
-baseFormat,
-srcWidth, srcHeight, 
srcDepth,
-srcFormat, srcType,
-srcAddr,
-srcPacking);
-  const GLuint *src = tempImage;
-  GLint img, row;
-  GLboolean is_unsigned = _mesa_is_type_unsigned(srcType);
-  if (!tempImage)
- return GL_FALSE;
-  for (img = 0; img < srcDepth; img++) {
- GLubyte *dstRow = dstSlices[img];
- for (row = 0; row < srcHeight; row++) {
-GLbyte *dstTexel = (GLbyte *) dstRow;
-GLint i;
-if (is_unsigned) {
-   for (i = 0; i < srcWidth * components; i++) {
-  dstTexel[i] = (GLbyte) MIN2(src[i], 0x7f);
-   }
-} else {
-   for (i = 0; i < srcWidth * components; i++) {
-  dstTexel[i] = (GLbyte) CLAMP((GLint) src[i], -0x80, 0x7f);
-   }
-}
-dstRow += dstRowStride;
-src += srcWidth * components;
- }
-  }
-
-  free((void *) tempImage);
-   }
-   return GL_TRUE;
-}
-
-
-/* non-normalized, signed int16 */
-static GLboolean
-_mesa_texstore_rgba_int16(TEXSTORE_PARAMS)
-{
-   GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-   GLint components = _mesa_components_in_format(baseFormat);
-
-   /* this forces alpha to 1 in make_temp_uint_image */
-   if (dstFormat == MESA_FORMAT_RGBX_SINT16) {
-  baseFormat = GL_RGBA;
-  components = 4;
-   }
-
-   ASSERT(dstFormat == MESA_FORMAT_R_SINT16 ||
-  dstFormat == MESA_FORMAT_RG_SINT16 ||
-  dstFormat == MESA_FORMAT_RGB_SINT16 ||
-  dstFormat == MESA_FORMAT_RGBA_SINT16 ||
-  dstFormat == MESA_FORMAT_A_SINT16 ||
-  dstFormat == MESA_FORMAT_L_SINT16 ||
-  dstFormat == MESA_FORMAT_I_SINT16 ||
-  dstFormat == MESA_FORMAT_LA_SINT16 ||
-  dstFormat == MESA_FORMAT_RGBX_SINT16);
-   ASSERT(baseInternalFormat == GL_RGBA ||
-  baseInternalFormat == GL_RGB ||
-  baseInternalFormat == GL_RG ||
-  baseInternalFormat == GL_RED ||
-  baseInternalFormat == GL_ALPHA ||
-  baseInternalFormat == GL_LUMINANCE ||
-  baseInternalFormat == GL_LUMINANCE_ALPHA ||
-  baseInternalFormat == GL_INTENSITY);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLshort));
-
-   {
-  /* general path */
-  const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-baseInternalFormat,
-baseFormat,
-srcWidth, srcHeight, 
srcDepth,
-srcFormat, srcType,
-srcAddr,
-

[Mesa-dev] [PATCH 09/12] main/texstore: Use _mesa_swizzle_and_convert when possible

2014-07-17 Thread Jason Ekstrand
This should be both faster and more accurate than our general slow-path of
converting everything to float.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/main/texstore.c | 179 +++
 1 file changed, 164 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index e1f2284..13fb3a8 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -55,6 +55,7 @@
 #include "bufferobj.h"
 #include "colormac.h"
 #include "format_pack.h"
+#include "format_utils.h"
 #include "image.h"
 #include "macros.h"
 #include "mipmap.h"
@@ -233,21 +234,44 @@ static int
 get_map_idx(GLenum value)
 {
switch (value) {
-   case GL_LUMINANCE: return IDX_LUMINANCE;
-   case GL_ALPHA: return IDX_ALPHA;
-   case GL_INTENSITY: return IDX_INTENSITY;
-   case GL_LUMINANCE_ALPHA: return IDX_LUMINANCE_ALPHA;
-   case GL_RGB: return IDX_RGB;
-   case GL_RGBA: return IDX_RGBA;
-   case GL_RED: return IDX_RED;
-   case GL_GREEN: return IDX_GREEN;
-   case GL_BLUE: return IDX_BLUE;
-   case GL_BGR: return IDX_BGR;
-   case GL_BGRA: return IDX_BGRA;
-   case GL_ABGR_EXT: return IDX_ABGR;
-   case GL_RG: return IDX_RG;
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_INTEGER_EXT:
+  return IDX_LUMINANCE;
+   case GL_ALPHA:
+   case GL_ALPHA_INTEGER:
+  return IDX_ALPHA;
+   case GL_INTENSITY:
+  return IDX_INTENSITY;
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+  return IDX_LUMINANCE_ALPHA;
+   case GL_RGB:
+   case GL_RGB_INTEGER:
+  return IDX_RGB;
+   case GL_RGBA:
+   case GL_RGBA_INTEGER:
+  return IDX_RGBA;
+   case GL_RED:
+   case GL_RED_INTEGER:
+  return IDX_RED;
+   case GL_GREEN:
+  return IDX_GREEN;
+   case GL_BLUE:
+  return IDX_BLUE;
+   case GL_BGR:
+   case GL_BGR_INTEGER:
+  return IDX_BGR;
+   case GL_BGRA:
+   case GL_BGRA_INTEGER:
+  return IDX_BGRA;
+   case GL_ABGR_EXT:
+  return IDX_ABGR;
+   case GL_RG:
+   case GL_RG_INTEGER:
+  return IDX_RG;
default:
-  _mesa_problem(NULL, "Unexpected inFormat");
+  _mesa_problem(NULL, "Unexpected inFormat %s",
+_mesa_lookup_enum_by_nr(value));
   return 0;
}
 }   
@@ -789,6 +813,7 @@ swizzle_copy(GLubyte *dst, GLuint dstComponents, const 
GLubyte *src,
 
 static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE };
 static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE };
+static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };
 
 
 /**
@@ -826,6 +851,12 @@ byteswap_mapping( GLboolean swapBytes,
switch (srcType) {
case GL_BYTE:
case GL_UNSIGNED_BYTE:
+   case GL_SHORT:
+   case GL_UNSIGNED_SHORT:
+   case GL_INT:
+   case GL_UNSIGNED_INT:
+   case GL_FLOAT:
+   case GL_HALF_FLOAT:
   return map_identity;
case GL_UNSIGNED_INT_8_8_8_8:
case GL_UNSIGNED_INT_8_8_8_8_REV:
@@ -3621,6 +3652,117 @@ texstore_compressed(TEXSTORE_PARAMS)
srcFormat, srcType, srcAddr, srcPacking);
 }
 
+static void
+invert_swizzle(uint8_t dst[4], const uint8_t src[4])
+{
+   int i, j;
+
+   dst[0] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[1] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[2] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[3] = MESA_FORMAT_SWIZZLE_NONE;
+
+   for (i = 0; i < 4; ++i)
+  for (j = 0; j < 4; ++j)
+ if (src[j] == i && dst[i] == MESA_FORMAT_SWIZZLE_NONE)
+dst[i] = j;
+}
+
+static GLboolean
+texstore_swizzle(TEXSTORE_PARAMS)
+{
+   const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
+ srcFormat, srcType);
+   const GLint srcImageStride = _mesa_image_image_stride(srcPacking,
+  srcWidth, srcHeight, srcFormat, srcType);
+   const GLubyte *srcImage = (const GLubyte *) _mesa_image_address(dims,
+srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0);
+   const int src_components = _mesa_components_in_format(srcFormat);
+
+   GLubyte swizzle[4], rgba2base[6], base2src[6], rgba2dst[4], dst2rgba[4];
+   const GLubyte *swap;
+   GLenum dst_type;
+   int dst_components;
+   bool is_array, normalized, need_swap;
+   GLint i, img, row;
+   const GLubyte *src_row;
+   GLubyte *dst_row;
+
+   is_array = _mesa_format_to_array(dstFormat, &dst_type, &dst_components,
+rgba2dst, &normalized);
+
+   if (!is_array)
+  return GL_FALSE;
+
+   switch (srcType) {
+   case GL_FLOAT:
+   case GL_UNSIGNED_BYTE:
+   case GL_BYTE:
+   case GL_UNSIGNED_SHORT:
+   case GL_SHORT:
+   case GL_UNSIGNED_INT:
+   case GL_INT:
+  /* If wa have to swap bytes in a multi-byte datatype, that means
+   * we're not doing an array conversion anymore */
+  if (srcPacking->SwapBytes)
+ return GL_FALSE;
+  need_swap = false;
+  break;
+   case GL_UNSIGNED_INT_8_8_8_8:
+  need_swap = srcPacking->SwapBytes;
+  if (_mesa_little_endian())
+ need_swap = !need_

[Mesa-dev] [PATCH] i965: Add missing persample_shading field to brw_wm_debug_recompile.

2014-07-17 Thread Kenneth Graunke
Otherwise, the performance warning for shader recompiles will just say
"something else".

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_wm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index d716e6f..d5a28dc 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -275,6 +275,8 @@ brw_wm_debug_recompile(struct brw_context *brw,
   old_key->stats_wm, key->stats_wm);
found |= key_debug(brw, "flat shading",
   old_key->flat_shade, key->flat_shade);
+   found |= key_debug(brw, "per-sample shading",
+  old_key->persample_shading, key->persample_shading);
found |= key_debug(brw, "number of color buffers",
   old_key->nr_color_regions, key->nr_color_regions);
found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Fix z_offset computation in intel_miptree_unmap_depthstencil()

2014-07-17 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On Wed, Jul 16, 2014 at 3:32 PM, Anuj Phogat  wrote:
> The bug is triggered by using glTexSubImage2d() with GL_DEPTH_STENCIL
> as base internal format and non-zero x, y offsets. Currently x, y
> offsets are ignored while updating the texture image.
>
> Fixes Khronos GLES3 CTS tests:
> npot_tex_sub_image_2d
> npot_tex_sub_image_3d
> npot_pbo_tex_sub_image_2d
> npot_pbo_tex_sub_image_2d
>
> Cc: 
> Signed-off-by: Anuj Phogat 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 2ab0faa..b36ffc7 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -2129,9 +2129,9 @@ intel_miptree_unmap_depthstencil(struct brw_context 
> *brw,
>  x + s_image_x + map->x,
>  y + s_image_y + map->y,
>  brw->has_swizzling);
> -   ptrdiff_t z_offset = ((y + z_image_y) *
> +   ptrdiff_t z_offset = ((y + z_image_y + map->y) *
>(z_mt->pitch / 4) +
> - (x + z_image_x));
> + (x + z_image_x + map->x));
>
> if (map_z32f_x24s8) {
>z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0];
> --
> 1.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders

2014-07-17 Thread Tom Stellard
On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote:
> Accuracy of some operations was recently improved in the R600 backend,
> at the cost of slower code. This is required for compute shaders,
> but not for graphics shaders. Add unsafe-fp-math hint to make LLVM
> generate faster but possibly less accurate code.
> 
> Piglit didn't indicate any regressions.

Both patches are:
Reviewed-by: Tom Stellard 

> ---
>  src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 +
>  1 file changed, 5 insertions(+)
> 
> diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c 
> b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> index 1b17dd4..171ccaa 100644
> --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
> +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
> @@ -26,6 +26,7 @@
>  #include "radeon_llvm_emit.h"
>  #include "radeon_elf_util.h"
>  #include "util/u_memory.h"
> +#include "pipe/p_shader_tokens.h"
>  
>  #include 
>  #include 
> @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
>sprintf(Str, "%1d", type);
>  
>LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str);
> +
> +  if (type != TGSI_PROCESSOR_COMPUTE) {
> +LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true");
> +  }
>  }
>  
>  static void init_r600_target() {
> -- 
> 1.8.3.2
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Release-candidate branch for upcoming 10.2.4

2014-07-17 Thread Marek Olšák
Radeonsi was indeed broken with those fixes (only CubeShadow
samplers). The attached patch fixes it. Please add it to 10.2. Thanks.

Marek

On Tue, Jul 15, 2014 at 6:49 AM, Carl Worth  wrote:
> Hi folks,
>
> I've pushed out an update to the 10.2 branch and I need some specific
> testing in the next three days.
>
> I've tested the branch on Intel (Haswell) as well as both swrast and
> Gallium softpipe and found no piglit regressions compared to the 10.2.3
> release.
>
> The branch includes a few patches to nouveau and radeonsi which I have
> not been able to test. If someone will test one of these drivers with
> piglit and let me know that all looks good, I'll be happy to include the
> patches in the release. Otherwise, I'll drop any untested patches before
> making the final release on Friday.
>
> Also, there's still time in the next three days for someone to nominate
> further driver-specific changes. I'll just need positive piglit test
> results for any such patches, (on top of the branch as it stands now),
> before I'll accept them.
>
> Thanks, all.
>
> -Carl
>
> --
> carl.d.wo...@intel.com
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
From e7467178faeab5c6bd5e6c4952fbce74ea3ff9e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= 
Date: Thu, 17 Jul 2014 22:24:50 +0200
Subject: [PATCH] radeonsi: add support for TXB2

This is needed by latest fixes for samplerCubeShadow with bias.
Otherwise, a crash occurs.
---
 src/gallium/drivers/radeonsi/si_shader.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 09c99eb..397b6ee 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1539,6 +1539,8 @@ static void tex_fetch_args(
 	/* Pack LOD bias value */
 	if (opcode == TGSI_OPCODE_TXB)
 		address[count++] = coords[3];
+	if (opcode == TGSI_OPCODE_TXB2)
+		address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0);
 
 	/* Pack depth comparison value */
 	switch (target) {
@@ -2497,6 +2499,7 @@ int si_pipe_shader_create(
 
 	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action;
+	bld_base->op_actions[TGSI_OPCODE_TXB2] = txb_action;
 #if HAVE_LLVM >= 0x0304
 	bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action;
 #endif
-- 
1.9.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Add missing persample_shading field to brw_wm_debug_recompile.

2014-07-17 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On Thu, Jul 17, 2014 at 11:18 AM, Kenneth Graunke  wrote:
> Otherwise, the performance warning for shader recompiles will just say
> "something else".
>
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/drivers/dri/i965/brw_wm.c | 2 ++
>  1 file changed, 2 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
> b/src/mesa/drivers/dri/i965/brw_wm.c
> index d716e6f..d5a28dc 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -275,6 +275,8 @@ brw_wm_debug_recompile(struct brw_context *brw,
>old_key->stats_wm, key->stats_wm);
> found |= key_debug(brw, "flat shading",
>old_key->flat_shade, key->flat_shade);
> +   found |= key_debug(brw, "per-sample shading",
> +  old_key->persample_shading, key->persample_shading);
> found |= key_debug(brw, "number of color buffers",
>old_key->nr_color_regions, key->nr_color_regions);
> found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
> --
> 2.0.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i915: Fix up intelInitScreen2 for DRI3

2014-07-17 Thread Kenneth Graunke
On Thursday, July 03, 2014 10:13:53 PM Adel Gadllah wrote:
> Commit 442442026eb updated both i915 and i965 for DRI3 support,
> but one check in intelInitScreen2 was missed for i915 causing crashes
> when trying to use i915 with DRI3.
> 
> So fix that up.
> 
> Reported-by: Igor Gnatenko 
> Tested-by: František Zatloukal 
> Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1115323
> Cc: "10.2" 
> Signed-off-by: Adel Gadllah 
> ---
>  src/mesa/drivers/dri/i915/intel_screen.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
> 
> diff --git a/src/mesa/drivers/dri/i915/intel_screen.c 
b/src/mesa/drivers/dri/i915/intel_screen.c
> index 9b4e490..4c9726c 100644
> --- a/src/mesa/drivers/dri/i915/intel_screen.c
> +++ b/src/mesa/drivers/dri/i915/intel_screen.c
> @@ -1152,7 +1152,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
>  {
> struct intel_screen *intelScreen;
>  
> -   if (psp->dri2.loader->base.version <= 2 ||
> +   if (psp->image.loader) {
> +   } else if (psp->dri2.loader->base.version <= 2 ||
> psp->dri2.loader->getBuffersWithFormat == NULL) {
>fprintf(stderr,
> "\nERROR!  DRI2 loader with getBuffersWithFormat() "
> 

Pushed, thanks.

I didn't actually test it, but it does look like the image loader support is 
there (in commit 442442026eb), and this really was just missed.

signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] Release-candidate branch for upcoming 10.2.4

2014-07-17 Thread Carl Worth
Marek Olšák  writes:
> Radeonsi was indeed broken with those fixes (only CubeShadow
> samplers). The attached patch fixes it. Please add it to 10.2. Thanks.

Thanks, Marek!

This is exactly the kind of breakage I was worried about, and exactly
the kind of test reporting I was hoping for. So thanks, again.

I'm glad that the new plan with a window for testing is proving useful.

I've pushed out an update to the 10.2 branch with this fix.

-Carl


pgpAA2lGvIT30.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy

2014-07-17 Thread Jason Ekstrand
Signed-off-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 04cbc4c..4043eb7 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -585,6 +585,16 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
   } else if (format == GL_RGBA) {
  mem_copy = rgba8_copy;
   }
+   } else if ((texImage->TexFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
+  (texImage->TexFormat == MESA_FORMAT_R8G8B8X8_UNORM)) {
+  cpp = 4;
+  if (format == GL_BGRA) {
+ /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
+  * use the same function. */
+ mem_copy = rgba8_copy;
+  } else if (format == GL_RGBA) {
+ mem_copy = memcpy;
+  }
}
if (!mem_copy)
   return false;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: Improve debug output in intelTexImage and intelTexSubimage

2014-07-17 Thread Jason Ekstrand
Signed-off-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/intel_tex_image.c| 4 +++-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c | 6 ++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c 
b/src/mesa/drivers/dri/i965/intel_tex_image.c
index de0546a..029d59b 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -173,8 +173,10 @@ intelTexImage(struct gl_context * ctx,
 {
bool ok;
 
-   DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__,
+   DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
+   __FUNCTION__, _mesa_get_format_name(texImage->TexFormat),
_mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+   _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
 
ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
index 03e69ae..04cbc4c 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -667,6 +667,12 @@ intelTexSubImage(struct gl_context * ctx,
 {
bool ok;
 
+   DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n",
+   __FUNCTION__, _mesa_get_format_name(texImage->TexFormat),
+   _mesa_lookup_enum_by_nr(texImage->TexObject->Target),
+   _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type),
+   texImage->Level, texImage->Width, texImage->Height, texImage->Depth);
+
ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage,
xoffset, yoffset, zoffset,
width, height, depth,
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Improve debug output in intelTexImage and intelTexSubimage

2014-07-17 Thread Matt Turner
These two are

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/7] i965: Replace cfg instances with calls to calculate_cfg().

2014-07-17 Thread Matt Turner
Avoids regenerating it unnecessarily.

Every program in shader-db improved, none by an amount less than a 1/3
reduction. One Dota2 shader decreased from 62 -> 24.

cfg calculations: 429492 -> 193197 (-55.02%)
---
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 20 +++-
 .../dri/i965/brw_fs_peephole_predicated_break.cpp|  6 +++---
 src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp|  6 +++---
 src/mesa/drivers/dri/i965/brw_vec4.cpp   |  6 +++---
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp   |  6 ++
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 10e19d8..8a54969 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -594,31 +594,33 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, 
bblock_t *block,
 bool
 fs_visitor::opt_copy_propagate()
 {
+   calculate_cfg();
+
bool progress = false;
void *copy_prop_ctx = ralloc_context(NULL);
-   cfg_t cfg(&instructions);
-   exec_list *out_acp[cfg.num_blocks];
-   for (int i = 0; i < cfg.num_blocks; i++)
+   exec_list *out_acp[cfg->num_blocks];
+
+   for (int i = 0; i < cfg->num_blocks; i++)
   out_acp[i] = new exec_list [ACP_HASH_SIZE];
 
/* First, walk through each block doing local copy propagation and getting
 * the set of copies available at the end of the block.
 */
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
 
   progress = opt_copy_propagate_local(copy_prop_ctx, block,
   out_acp[b]) || progress;
}
 
/* Do dataflow analysis for those available copies. */
-   fs_copy_prop_dataflow dataflow(copy_prop_ctx, &cfg, out_acp);
+   fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp);
 
/* Next, re-run local copy propagation, this time with the set of copies
 * provided by the dataflow analysis available at the start of a block.
 */
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
   exec_list in_acp[ACP_HASH_SIZE];
 
   for (int i = 0; i < dataflow.num_acp; i++) {
@@ -631,7 +633,7 @@ fs_visitor::opt_copy_propagate()
   progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || 
progress;
}
 
-   for (int i = 0; i < cfg.num_blocks; i++)
+   for (int i = 0; i < cfg->num_blocks; i++)
   delete [] out_acp[i];
ralloc_free(copy_prop_ctx);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
index bb0a2ac..3ba0b26 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp
@@ -45,10 +45,10 @@ fs_visitor::opt_peephole_predicated_break()
 {
bool progress = false;
 
-   cfg_t cfg(&instructions);
+   calculate_cfg();
 
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
 
   /* BREAK and CONTINUE instructions, by definition, can only be found at
* the ends of basic blocks.
diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
index db0be19..cf47cb5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp
@@ -127,10 +127,10 @@ fs_visitor::opt_peephole_sel()
 {
bool progress = false;
 
-   cfg_t cfg(&instructions);
+   calculate_cfg();
 
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
 
   /* IF instructions, by definition, can only be found at the ends of
* basic blocks.
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 9ea0b14..49a4e9b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -748,13 +748,13 @@ vec4_visitor::opt_set_dependency_control()
vec4_instruction *last_mrf_write[BRW_MAX_GRF];
uint8_t mrf_channels_written[BRW_MAX_GRF];
 
-   cfg_t cfg(&instructions);
+   calculate_cfg();
 
assert(prog_data->total_grf ||
   !"Must be called after register allocation");
 
-   for (int i = 0; i < cfg.num_blocks; i++) {
-  bblock_t *bblock = cfg.blocks[i];
+   for (int i = 0; i < cfg->num_blocks; i++) {
+  bblock_t *bblock = cfg->blocks[i];
   vec4_instruction *inst;
 
   memset(last_grf_write, 0, sizeof(last_grf_write));
diff

[Mesa-dev] [PATCH 7/7] i965: Pass a cfg pointer to generate_{code, assembly}.

2014-07-17 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp   |  4 +++-
 src/mesa/drivers/dri/i965/brw_fs.cpp  | 10 ++
 src/mesa/drivers/dri/i965/brw_fs.h| 12 ++--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 22 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  6 --
 src/mesa/drivers/dri/i965/brw_vec4.h  |  8 
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp  | 12 
 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 10 +-
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp   | 22 +-
 src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 12 
 src/mesa/drivers/dri/i965/intel_asm_annotation.c  |  2 +-
 src/mesa/drivers/dri/i965/intel_asm_annotation.h  |  2 +-
 12 files changed, 56 insertions(+), 66 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index c1676a9..8fa2e0e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -24,6 +24,7 @@
 #include "glsl/ralloc.h"
 #include "brw_blorp_blit_eu.h"
 #include "brw_blorp.h"
+#include "brw_cfg.h"
 
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw,
bool debug_flag)
@@ -43,7 +44,8 @@ brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
 const unsigned *
 brw_blorp_eu_emitter::get_program(unsigned *program_size)
 {
-   return generator.generate_assembly(NULL, &insts, program_size);
+   cfg_t cfg(&insts);
+   return generator.generate_assembly(NULL, &cfg, program_size);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 56a0183..3ce909b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3202,6 +3202,8 @@ fs_visitor::run()
 */
assert(sanity_param_count == fp->Base.Parameters->NumParameters);
 
+   calculate_cfg();
+
return !failed;
 }
 
@@ -3245,7 +3247,7 @@ brw_wm_fs_emit(struct brw_context *brw,
   return NULL;
}
 
-   exec_list *simd16_instructions = NULL;
+   cfg_t *simd16_cfg = NULL;
fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16);
if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) {
   if (!v.simd16_unsupported) {
@@ -3255,7 +3257,7 @@ brw_wm_fs_emit(struct brw_context *brw,
 perf_debug("SIMD16 shader failed to compile, falling back to "
"SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg);
  } else {
-simd16_instructions = &v2.instructions;
+simd16_cfg = v2.cfg;
  }
   } else {
  perf_debug("SIMD16 shader unsupported, falling back to "
@@ -3266,12 +3268,12 @@ brw_wm_fs_emit(struct brw_context *brw,
const unsigned *assembly = NULL;
if (brw->gen >= 8) {
   gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, 
v.do_dual_src);
-  assembly = g.generate_assembly(&v.instructions, simd16_instructions,
+  assembly = g.generate_assembly(v.cfg, simd16_cfg,
  final_assembly_size);
} else {
   fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src,
  v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM);
-  assembly = g.generate_assembly(&v.instructions, simd16_instructions,
+  assembly = g.generate_assembly(v.cfg, simd16_cfg,
  final_assembly_size);
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9ba3f38..009a6d5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -578,12 +578,12 @@ public:
 bool debug_flag);
~fs_generator();
 
-   const unsigned *generate_assembly(exec_list *simd8_instructions,
- exec_list *simd16_instructions,
+   const unsigned *generate_assembly(const cfg_t *simd8_cfg,
+ const cfg_t *simd16_cfg,
  unsigned *assembly_size);
 
 private:
-   void generate_code(exec_list *instructions);
+   void generate_code(const cfg_t *cfg);
void fire_fb_write(fs_inst *inst,
   GLuint base_reg,
   struct brw_reg implied_header,
@@ -706,12 +706,12 @@ public:
  bool dual_source_output);
~gen8_fs_generator();
 
-   const unsigned *generate_assembly(exec_list *simd8_instructions,
- exec_list *simd16_instructions,
+   const unsigned *generate_assembly(const cfg_t *simd8_cfg,
+ const cfg_t *simd16_cfg,
  unsigned *assembly_size);
 
 private:
-   void generate_code(exec_list *instructions);
+   void generate_code(const cfg_t *cfg);
void generate_fb_write(fs_inst *inst);
   

[Mesa-dev] [PATCH 0/7] i965: Preserve the CFG (first pass)

2014-07-17 Thread Matt Turner
We'd like to avoid regenerating the control flow graph for every
pass that uses it. This series adds a cfg pointer to the backend
visitor class that we use to save the CFG across optimization
passes. It's invalidated and recreated by invalidate/calculate_cfg
and these functions are called by the similarly named live intervals
functions.

Just by doing this, we reduce the number of times we calculate the
CFG in a shader-db run by 55%.

Ultimately I'd like the CFG to be a fundamental data structure in our
backend, where each basic block contains its own list of instructions
rather than basic blocks containing pointers into a large list.

My WIP branch is getting a bit big, so here's a digestable chunk
that's ready for master.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/7] i965/vec4: Use foreach_inst_in_block a couple more places.

2014-07-17 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 +
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 5 +
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 49a4e9b..045e1c5 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -755,14 +755,11 @@ vec4_visitor::opt_set_dependency_control()
 
for (int i = 0; i < cfg->num_blocks; i++) {
   bblock_t *bblock = cfg->blocks[i];
-  vec4_instruction *inst;
 
   memset(last_grf_write, 0, sizeof(last_grf_write));
   memset(last_mrf_write, 0, sizeof(last_mrf_write));
 
-  for (inst = (vec4_instruction *)bblock->start;
-   inst != (vec4_instruction *)bblock->end->next;
-   inst = (vec4_instruction *)inst->next) {
+  foreach_inst_in_block (vec4_instruction, inst, bblock) {
  /* If we read from a register that we were doing dependency control
   * on, don't do dependency control across the read.
   */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 86360d2..29d2e02 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -132,10 +132,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
void *cse_ctx = ralloc_context(NULL);
 
int ip = block->start_ip;
-   for (vec4_instruction *inst = (vec4_instruction *)block->start;
-inst != block->end->next;
-inst = (vec4_instruction *) inst->next) {
-
+   foreach_inst_in_block (vec4_instruction, inst, block) {
   /* Skip some cases. */
   if (is_expression(inst) && !inst->predicate && inst->mlen == 0 &&
   (inst->dst.file != HW_REG || inst->dst.is_null()))
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/7] i965/cfg: Add a foreach_block_and_inst macro.

2014-07-17 Thread Matt Turner
Will let us abstract how the instructions are stored.
---
 src/mesa/drivers/dri/i965/brw_cfg.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index cdbadde..01fcc1b 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -103,6 +103,10 @@ struct cfg_t {
int num_blocks;
 };
 
+#define foreach_block_and_inst(__block, __type, __inst, __cfg) \
+   foreach_block (__block, __cfg)  \
+  foreach_inst_in_block (__type, __inst, __block)
+
 #define foreach_inst_in_block(__type, __inst, __block) \
for (__type *__inst = (__type *)__block->start; \
 __inst != __block->end->next;  \
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/7] i965: Add and use foreach_block macro.

2014-07-17 Thread Matt Turner
Use this as an opportunity to rename 'block_num' to 'num'. block->num is
clear, and block->block_num has always been redundant.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp  | 17 ++---
 src/mesa/drivers/dri/i965/brw_cfg.h|  5 +-
 .../drivers/dri/i965/brw_dead_control_flow.cpp |  3 +-
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   | 89 ++
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp   |  4 +-
 .../dri/i965/brw_fs_dead_code_eliminate.cpp|  5 +-
 .../drivers/dri/i965/brw_fs_live_variables.cpp | 50 ++--
 .../dri/i965/brw_fs_peephole_predicated_break.cpp  |  9 +--
 .../dri/i965/brw_fs_saturate_propagation.cpp   |  5 +-
 src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp  |  4 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp |  6 +-
 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp |  4 +-
 .../drivers/dri/i965/brw_vec4_live_variables.cpp   | 50 ++--
 src/mesa/drivers/dri/i965/intel_asm_annotation.c   |  8 +-
 14 files changed, 116 insertions(+), 143 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 4a5c912..d806b83 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -51,7 +51,7 @@ link(void *mem_ctx, bblock_t *block)
 }
 
 bblock_t::bblock_t() :
-   start_ip(0), end_ip(0), block_num(0)
+   start_ip(0), end_ip(0), num(0)
 {
start = NULL;
end = NULL;
@@ -284,7 +284,7 @@ cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int 
ip)
}
 
block->start_ip = ip;
-   block->block_num = num_blocks++;
+   block->num = num_blocks++;
block_list.push_tail(&block->link);
*cur = block;
 }
@@ -295,7 +295,7 @@ cfg_t::make_block_array()
blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks);
 
int i = 0;
-   foreach_list_typed(bblock_t, block, link, &block_list) {
+   foreach_block (block, this) {
   blocks[i++] = block;
}
assert(i == num_blocks);
@@ -304,19 +304,18 @@ cfg_t::make_block_array()
 void
 cfg_t::dump(backend_visitor *v)
 {
-   for (int b = 0; b < this->num_blocks; b++) {
-bblock_t *block = this->blocks[b];
-  fprintf(stderr, "START B%d", b);
+   foreach_block (block, this) {
+  fprintf(stderr, "START B%d", block->num);
   foreach_list_typed(bblock_link, link, link, &block->parents) {
  fprintf(stderr, " <-B%d",
- link->block->block_num);
+ link->block->num);
   }
   fprintf(stderr, "\n");
   block->dump(v);
-  fprintf(stderr, "END B%d", b);
+  fprintf(stderr, "END B%d", block->num);
   foreach_list_typed(bblock_link, link, link, &block->children) {
  fprintf(stderr, " ->B%d",
- link->block->block_num);
+ link->block->num);
   }
   fprintf(stderr, "\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index 324df6c..f7203e2 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -71,7 +71,7 @@ struct bblock_t {
 
struct exec_list parents;
struct exec_list children;
-   int block_num;
+   int num;
 
/* If the current basic block ends in an IF, ELSE, or ENDIF instruction,
 * these pointers will hold the locations of the other associated control
@@ -109,6 +109,9 @@ struct cfg_t {
foreach_block (__block, __cfg)  \
   foreach_inst_in_block (__type, __inst, __block)
 
+#define foreach_block(__block, __cfg)  \
+   foreach_list_typed (bblock_t, __block, link, &(__cfg)->block_list)
+
 #define foreach_inst_in_block(__type, __inst, __block) \
for (__type *__inst = (__type *)__block->start; \
 __inst != __block->end->next;  \
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp 
b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index 14c6898..1591991 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -41,8 +41,7 @@ dead_control_flow_eliminate(backend_visitor *v)
 
v->calculate_cfg();
 
-   for (int b = 0; b < v->cfg->num_blocks; b++) {
-  bblock_t *block = v->cfg->blocks[b];
+   foreach_block (block, v->cfg) {
   bool found = false;
 
   /* ENDIF instructions, by definition, can only be found at the start of
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 8a54969..52d3328 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -104,9 +104,9 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, 
cfg_t *cfg,
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 
num_acp = 0;
-   for (int b = 0; b < cfg->num_blocks; b++) {
+   foreach_block (block, cfg) {
 

[Mesa-dev] [PATCH 1/7] i965: Add cfg to backend_visitor.

2014-07-17 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp |  6 +++---
 src/mesa/drivers/dri/i965/brw_fs.h  |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp|  7 +++
 .../drivers/dri/i965/brw_fs_dead_code_eliminate.cpp |  8 +++-
 src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 12 +---
 .../drivers/dri/i965/brw_fs_saturate_propagation.cpp|  8 +++-
 src/mesa/drivers/dri/i965/brw_shader.cpp| 17 -
 src/mesa/drivers/dri/i965/brw_shader.h  |  5 +
 src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp   | 16 +---
 9 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp 
b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index 63a3e5b..14c6898 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -39,10 +39,10 @@ dead_control_flow_eliminate(backend_visitor *v)
 {
bool progress = false;
 
-   cfg_t cfg(&v->instructions);
+   v->calculate_cfg();
 
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < v->cfg->num_blocks; b++) {
+  bblock_t *block = v->cfg->blocks[b];
   bool found = false;
 
   /* ENDIF instructions, by definition, can only be found at the start of
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9c76bd2..9ba3f38 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -330,7 +330,7 @@ public:
void assign_constant_locations();
void demote_pull_constants();
void invalidate_live_intervals();
-   void calculate_live_intervals(const cfg_t *cfg = NULL);
+   void calculate_live_intervals();
void calculate_register_pressure();
bool opt_algebraic();
bool opt_cse();
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index d435d84..63d87f9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -315,11 +315,10 @@ fs_visitor::opt_cse()
 {
bool progress = false;
 
-   cfg_t cfg(&instructions);
-   calculate_live_intervals(&cfg);
+   calculate_live_intervals();
 
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
 
   progress = opt_cse_local(block) || progress;
}
diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index d41a42c..c00ec1b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -39,15 +39,13 @@ fs_visitor::dead_code_eliminate()
 {
bool progress = false;
 
-   cfg_t cfg(&instructions);
-
-   calculate_live_intervals(&cfg);
+   calculate_live_intervals();
 
int num_vars = live_intervals->num_vars;
BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
 
-   for (int b = 0; b < cfg.num_blocks; b++) {
-  bblock_t *block = cfg.blocks[b];
+   for (int b = 0; b < cfg->num_blocks; b++) {
+  bblock_t *block = cfg->blocks[b];
   memcpy(live, live_intervals->bd[b].liveout,
  sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 585dc3d..57f3ce4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -295,6 +295,8 @@ fs_visitor::invalidate_live_intervals()
 {
ralloc_free(live_intervals);
live_intervals = NULL;
+
+   invalidate_cfg();
 }
 
 /**
@@ -304,7 +306,7 @@ fs_visitor::invalidate_live_intervals()
  * information about whole VGRFs.
  */
 void
-fs_visitor::calculate_live_intervals(const cfg_t *cfg)
+fs_visitor::calculate_live_intervals()
 {
if (this->live_intervals)
   return;
@@ -320,12 +322,8 @@ fs_visitor::calculate_live_intervals(const cfg_t *cfg)
   virtual_grf_end[i] = -1;
}
 
-   if (cfg) {
-  this->live_intervals = new(mem_ctx) fs_live_variables(this, cfg);
-   } else {
-  cfg_t cfg(&instructions);
-  this->live_intervals = new(mem_ctx) fs_live_variables(this, &cfg);
-   }
+   calculate_cfg();
+   this->live_intervals = new(mem_ctx) fs_live_variables(this, cfg);
 
/* Merge the per-component live ranges to whole VGRF live ranges. */
for (int i = 0; i < live_intervals->num_vars; i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
index 1287adb..0e04d3f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp
@@ -93,12 +93,10 @

[Mesa-dev] [PATCH 5/7] i965/cfg: Embed link in bblock_t for main block list.

2014-07-17 Thread Matt Turner
The next patch adds a foreach_block (block, cfg) macro, which works
better if it provides a direct bblock_t pointer, rather than a
bblock_link pointer that you have to use to find the actual block.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 10 +-
 src/mesa/drivers/dri/i965/brw_cfg.h   |  2 ++
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 07111f5..4a5c912 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -67,8 +67,8 @@ bblock_t::bblock_t() :
 void
 bblock_t::add_successor(void *mem_ctx, bblock_t *successor)
 {
-   successor->parents.push_tail(link(mem_ctx, this));
-   children.push_tail(link(mem_ctx, successor));
+   successor->parents.push_tail(::link(mem_ctx, this));
+   children.push_tail(::link(mem_ctx, successor));
 }
 
 void
@@ -285,7 +285,7 @@ cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int 
ip)
 
block->start_ip = ip;
block->block_num = num_blocks++;
-   block_list.push_tail(link(mem_ctx, block));
+   block_list.push_tail(&block->link);
*cur = block;
 }
 
@@ -295,8 +295,8 @@ cfg_t::make_block_array()
blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks);
 
int i = 0;
-   foreach_list_typed(bblock_link, link, link, &block_list) {
-  blocks[i++] = link->block;
+   foreach_list_typed(bblock_t, block, link, &block_list) {
+  blocks[i++] = block;
}
assert(i == num_blocks);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index 01fcc1b..324df6c 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -61,6 +61,8 @@ struct bblock_t {
void dump(backend_visitor *v);
 #endif
 
+   struct exec_node link;
+
struct backend_instruction *start;
struct backend_instruction *end;
 
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/6] Enable Pixman for Mesa

2014-07-17 Thread Jason Ekstrand
Juha-Pekka,
The patch I just sent to the list here:

http://lists.freedesktop.org/archives/mesa-dev/2014-July/063501.html

Should also help with chrome performance.  We have had a fast-path inside
the intel driver for some time to do BGRA/RGBA -> BGRA conversions.  For
some reason, chrome seems to have switched to using RGBA textures (they
used to use BGRA).  The above patch enables the fastpath for converting to
RGBA as well as BGRA.  Because this fastpath is in the driver itself and
handles tiling directly, it's much faster than anything that goes through
the paths in main/texstore.c. It would be interesting to note what that
does to power consumption.
--Jason Ekstrand



On Wed, Jun 25, 2014 at 5:38 AM, Juha-Pekka Heikkila <
juhapekka.heikk...@gmail.com> wrote:

> This is my old set which enable using Pixman on Mesa for some texture
> conversion fast paths. As is this passes Piglit quick set on my IVB.
>
> For general case this does not offer essentially better fps or such but
> these were recently tested on Chromebook with HSW inside where it showed
> promising drop on Watt drain. With/without Pixman Mesa was compiled
> with march=corei7
>
> Number here indicate % change in Watts, for comparison was used averages
> of multiple runs. While Watt drain reduced performance remained +/- on
> the same level.
>
> Javascript - Octane v20.457%
> Page Rendering - Page Loading-0.280%
> HTML5 Games - JSGameBench-2.275%
> HTML5 Games - AngryBirds -4.483%
> Canvas 2D - CanvasEarth  -4.003%
> Canvas2D - FishTank 250 fish -3.176%
> WebGL - EarthScreen  -2.959%
> Browser Performance - RoboHornet -5.833%
> WebGL - Aquarium 50 fish -4.448%
> WebGL - Aquarium 100 fish-4.094%
>
> /Juha-Pekka
>
> Juha-Pekka Heikkila (6):
>   mesa: Add Pixman library to configure.ac
>   mesa: Add Pixman usage to texstore.c
>   mesa: Pixman texture conversion rgb to abgr
>   mesa: Pixman texture conversion argb to abgr
>   mesa: Pixman texture conversion argb to rgb
>   mesa: Pixman texture conversion rgb to rgb565 and rgb565_rev
>
>  configure.ac |  32 +++
>  src/mesa/main/texstore.c | 105
> +--
>  2 files changed, 134 insertions(+), 3 deletions(-)
>
> --
> 1.8.1.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965/disasm: Don't disassemble the URB complete field on Broadwell.

2014-07-17 Thread Kenneth Graunke
It doesn't exist, so attempting to read it will trigger generation
assertions in the brw_inst API.

Signed-off-by: Kenneth Graunke 
---
 src/mesa/drivers/dri/i965/brw_disasm.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index d7362c1..f66865f 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -1410,8 +1410,10 @@ brw_disassemble_inst(FILE *file, struct brw_context 
*brw, brw_inst *inst,
err |= control(file, "urb used", urb_used,
   brw_inst_urb_used(brw, inst), &space);
 }
-err |= control(file, "urb complete", urb_complete,
-   brw_inst_urb_complete(brw, inst), &space);
+if (brw->gen < 8) {
+   err |= control(file, "urb complete", urb_complete,
+  brw_inst_urb_complete(brw, inst), &space);
+}
 break;
  case BRW_SFID_THREAD_SPAWNER:
 break;
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79949

Mau  changed:

   What|Removed |Added

 CC||mav...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79949

--- Comment #7 from Axel Davy  ---
It could be solved by this commit:
http://cgit.freedesktop.org/mesa/mesa/commit/?h=10.2&id=3ca21195939477fa1f9b113ce4f6da36a8480321

Please test mesa git or mesa 10.2.3

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy

2014-07-17 Thread Kenneth Graunke
On Thursday, July 17, 2014 02:41:31 PM Jason Ekstrand wrote:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/mesa/drivers/dri/i965/intel_tex_subimage.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c 
> b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> index 04cbc4c..4043eb7 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
> @@ -585,6 +585,16 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
>} else if (format == GL_RGBA) {
>   mem_copy = rgba8_copy;
>}
> +   } else if ((texImage->TexFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
> +  (texImage->TexFormat == MESA_FORMAT_R8G8B8X8_UNORM)) {
> +  cpp = 4;
> +  if (format == GL_BGRA) {
> + /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
> +  * use the same function. */

*/ goes on a separate line.

Patch 2 is:
Reviewed-by: Kenneth Graunke 

(patch 1 looks fine too but I didn't check it very thoroughly)

> + mem_copy = rgba8_copy;
> +  } else if (format == GL_RGBA) {
> + mem_copy = memcpy;
> +  }
> }
> if (!mem_copy)
>return false;
> 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Disable hex offset printing in disassembly.

2014-07-17 Thread Kenneth Graunke
Printing the hex offsets makes it basically impossible to diff assembly:
if you add even a single instruction, the entire shader shows up as a
difference.  So, every time I want to compare assembly, I have to strip
this out.

The hex offsets might be useful when debugging compaction, or when
inspecting the program cache buffer.  Since it's occasionally useful,
but uncommon, this patch disables it by default, but makes it easy to
re-enable it temporarily when the need arises.

Signed-off-by: Kenneth Graunke 
Cc: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_eu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.c 
b/src/mesa/drivers/dri/i965/brw_eu.c
index f4c7495..a400b44 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -241,7 +241,8 @@ brw_disassemble(struct brw_context *brw,
   brw_inst *insn = assembly + offset;
   brw_inst uncompacted;
   bool compacted = brw_inst_cmpt_control(brw, insn);
-  fprintf(out, "0x%08x: ", offset);
+  if (0)
+ fprintf(out, "0x%08x: ", offset);
 
   if (compacted) {
  brw_compact_inst *compacted = (void *)insn;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/23] glsl/glcpp: Stop using a lexer start condition () for token skipping.

2014-07-17 Thread Jordan Justen
On Thu, Jun 26, 2014 at 3:19 PM, Carl Worth  wrote:
> Here, "skipping" refers to the lexer not emitting any tokens for portions of
> the file within an #if condition (or similar) that evaluates to false.
>
> Previously, the lexer had a special  start condition used to control
> this skipping. This start condition was not handled like a normal start
> condition. Instead, there was a particularly ugly block of code set to be
> included at the top of the generated lexing loop that would change from
>  to  or from  to  depending on various pieces of
> parser state, (such as parser->skip_state and parser->lexing_directive).
>
> Not only was that an ugly approach, but the  start condition was
> complicating several glcpp bug fixes I attempted recently that want to use
> start conditions for other purposes, (such as a new  start condition).
>
> The recently added RETURN_TOKEN macro gives us a convenient way to implement
> skipping without using a lexer start condition. Now, at the top of the
> generated lexer, we examine all the necessary parser state and set a new
> parser->skipping bit. Then, in RETURN_TOKEN, we examine parser->skipping to
> determine whether to actually emit the token or not.
>
> Besides this, there are only a couple of other places where we need to examine
> the skipping bit (other than when returning a token):
>
> * To avoid emitting an error for #error if skipped.
>
> * To avoid entering the  start condition for a #define that is
>   skipped.
>
> With all of this in place in the present commit, there are hopefully no
> behavioral changes with this patch, ("make check" still passes all of the
> glcpp tests at least).
> ---
>  src/glsl/glcpp/glcpp-lex.l   | 160 
> ++-
>  src/glsl/glcpp/glcpp-parse.y |   1 +
>  src/glsl/glcpp/glcpp.h   |   1 +
>  3 files changed, 99 insertions(+), 63 deletions(-)
>
> diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l
> index 37fcc84..cb06bb8 100644
> --- a/src/glsl/glcpp/glcpp-lex.l
> +++ b/src/glsl/glcpp/glcpp-lex.l
> @@ -61,19 +61,52 @@ void glcpp_set_column (int  column_no , yyscan_t 
> yyscanner);
> yylloc->source = 0; \
> } while(0)
>
> -#define RETURN_TOKEN(token)\
> +/* It's ugly to have macros that have return statements inside of
> + * them, but flex-based lexer generation is all built around the
> + * return statement.
> + *
> + * To mitigate the ugliness, we defer as much of the logic as possible
> + * to an actual function, not a macro (see
> + * glcpplex_update_state_per_token) and we make the word RETURN
> + * prominent in all of the macros which may return.
> + *
> + * The most-commonly-used macro is RETURN_TOKEN which will perform all
> + * necessary state updates based on the provided token,, then
> + * conditionally return the token. It will not return a token if the
> + * parser is currently skipping tokens, (such as within #if
> + * 0...#else).
> + *
> + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that
> + * makes the token returning unconditional. This is needed for things
> + * like #if and the tokens of its condition, (since these must be
> + * evaluated by the parser even when otherwise skipping).
> + *
> + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top
> + * of RETURN_TOKEN that performs a string copy of yytext before the
> + * return.
> + */
> +#define RETURN_TOKEN_NEVER_SKIP(token) \
> do {\
> if (token == NEWLINE)   \
> parser->last_token_was_newline = 1; \
> else\
> parser->last_token_was_newline = 0; \
> return (token); \
> +   } while (0)
> +
> +#define RETURN_TOKEN(token)\
> +   do {\
> +   if (! parser->skipping) {   \
> +   RETURN_TOKEN_NEVER_SKIP(token); \

It looks like parser->last_token_was_newline will not be updated while
skipping. Should we update that during the skipping even though we're
not returning tokens?

> +   }   \
> } while(0)
>
> -#define RETURN_STRING_TOKEN(token) \
> -   do {\
> -   yylval->str = ralloc_strdup (yyextra, yytext);  \
> -   RETURN_TOKEN (token);   \
> +#define RETURN_STRING_TOKEN(token) \
> +   do {\
> +   if (! parser->skipping) 

[Mesa-dev] [PATCH] i965: Don't print WE_normal in disassembly.

2014-07-17 Thread Kenneth Graunke
Dropping this helps most lines fit in an 80 column terminal.  The
absence of WE_normal also helps call attention to WE_all, where
something unusual is going on.

Signed-off-by: Kenneth Graunke 
Cc: Matt Turner 
---
 src/mesa/drivers/dri/i965/brw_disasm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index f66865f..c6f981a 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -213,7 +213,7 @@ static const char *const accwr[2] = {
 };
 
 static const char *const wectrl[2] = {
-   [0] = "WE_normal",
+   [0] = "",
[1] = "WE_all"
 };
 
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Don't print WE_normal in disassembly.

2014-07-17 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Disable hex offset printing in disassembly.

2014-07-17 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/disasm: Don't disassemble the URB complete field on Broadwell.

2014-07-17 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] glsl/glcpp: A bunch of pre-processor cleanups

2014-07-17 Thread Jordan Justen
Made it ~25% through. :) I'll be busy for a bit, but I'll continue
looking at the rest later.

01/23 glsl/glcpp: Emit proper error for #define with a non-identifier
  Reviewed-by: Jordan Justen 

02/23 glsl/glcpp: Add support for comments between #define and macro identifier
  Reviewed-by: Jordan Justen 

03/23 glsl/glcpp: Remove some un-needed calls to NEWLINE_CATCHUP
  * Reference 6005e9cb in comment?
  Reviewed-by: Jordan Justen 

04/23 glsl/glcpp: Add testing for EOF sans newline (and fix for
, )
  Reviewed-by: Jordan Justen 

05/23 glsl/glcpp: Drop extra, final newline from most output
  * In the "\n {" section, you set
"parser->last_token_was_newline = 1;"
Doesn't "RETURN_TOKEN (NEWLINE);" do this as well?
  Reviewed-by: Jordan Justen 

06/23 glsl/glcpp: Abstract a bit of common code for returning string tokens
  Reviewed-by: Jordan Justen 

On Thu, Jun 26, 2014 at 3:19 PM, Carl Worth  wrote:
> Here's my latest series of patches to improve conformance of glcpp, (the glsl
> preprocessor in mesa).
>
> Most of these changes are fixes that only a test-suite author could love. Most
> fix nit-picky tests that do things that no sance application would actually
> do. They're all reasonable things to do, but few are likely to impact many
> real applications.
>
> The entire series (as well as some earlier patches already reviewed) can be
> found on the glcpp-fixup branch of my mesa tree:
>
> git://people.freedesktop.org/~cworth/mesa
>
> Here's a run-down of what the changes are in this series:
>
> Patch 01: Give an error for "#define 123" or similar non-identifier
>
> Not a useful thing to do, of course, but an error we need.
>
> Patch 02: Support comment here: "#define /* Ha! */ FOO"
>
> Patches 03-12: Many cleanups/rewriting while working on the next patch
>
> Patch 13: Support comment here: "# /* Tricky! */ define FOO"
>
> Comments appearing in these places are not likely, but are clearly
> valid according to the language specification. There was a bunch of
> work necessary to make this fix easy, (and even with all the
> preliminary work, the final patch was longer than I wanted).
>
> I am happy that the lexer state at the end of this cleanup is much
> simpler and easier to read than it was before.
>
> Patch 14: Emit internal error for unrecognized character
>
> This is to make un-subtle all classes of subtle bugs where the default
> flex rule was simply printing unrecongized characters to stdout and
> dropping them from the GLSL source.
>
> This is not actually in glcpp but in the lexer for the main glsl
> compiler.
>
> Patch 15: Emit error for bogus extra characters after #extension
>
> This is an example of a fix for one of those subtle bugs from the flex
> default-rule. This is a patch from Ken that was sent some time ago.
>
> Patches 16-17: Trivial fixups (renaming of token identifiers and new comment)
>
> Patch 18: Emit an error for duplicate macro parameter, eg "#define FOO(a, a)"
>
> Patch 19: Emit error if "++" or "--" appear in preprocessor condition
>
> Patches 20-21: Two new tests for bugs that I wrote (and fixed) while working
>on some of the above.
>
> Patch 22: Emit internal error for unrecognized character
>
> This is just like patch 14, but for the lexer in glcpp itself.
>
> Patch 23: Treat '\r' as equivalent to '\n'
>
> The '\r' character was previously hitting the default lex, "print and
> throw away" rule so was being entirely ignored. With patch 22, '\r'
> would instead generate an internal error. Fix this by making '\r'
> equivalent to '\n'.
>
> I'd like to be even more spec-compliant for '\r', but I think this is
> OK for now. I'd also like to add some more-exhaustive testing for
> '\r', (such as running all of glcpp-test on the test cases with '\n'
> changed to "\r\n").
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] gbm: Log at least one dlerror() when we fail to open any drivers.

2014-07-17 Thread Eric Anholt
We don't want to log every single error (such as all the ones where the file
wasn't even present in our list of search paths), but if you didn't find any
driver, then seeing at least one error is useful (since the common case as a
developer is a single DEFAULT_DRIVER_DIR or GBM_DRIVERS_PATH entry).
---
 src/gbm/backends/dri/gbm_dri.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index f421a03..4a19011 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -245,6 +245,7 @@ dri_load_driver(struct gbm_dri_device *dri)
if (dri->driver == NULL) {
   fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n",
   search_paths);
+  fprintf(stderr, "gbm: Last dlopen error: %s\n", dlerror());
   return -1;
}
 
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] gallium: Add a uif() helper function to complement fui()

2014-07-17 Thread Eric Anholt
I found myself often wanting this when I'm printing out a uint32_t mapping
of some GPU data, and I want to put in an interpretation of that value as
a float.
---
 src/gallium/auxiliary/util/u_math.h | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..68ca3f2 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -616,6 +616,14 @@ fui( float f )
return fi.ui;
 }
 
+static INLINE float
+uif(uint32_t ui)
+{
+union fi fi;
+fi.ui = ui;
+return fi.f;
+}
+
 
 /**
  * Convert ubyte to float in [0, 1].
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] gbm: Fix a debug log message

2014-07-17 Thread Eric Anholt
---
 src/gbm/backends/dri/gbm_dri.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 347bc99..f421a03 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -243,7 +243,7 @@ dri_load_driver(struct gbm_dri_device *dri)
}
 
if (dri->driver == NULL) {
-  fprintf(stderr, "gbm: failed to open any driver (search paths %s)",
+  fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n",
   search_paths);
   return -1;
}
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] common-code patches before vc4 driver

2014-07-17 Thread Eric Anholt
Here are the patches I have for common code in my vc4 driver tree.  I
think they should be obvious enough.

I'm curious what people feel about merging vc4.  I've got a series at this
point that's clean enough in my opinion (copyrights fixed up, and I think
a tolerable mix of "giant code dump for starting out the project" and
"mostly sensible incremental work from there".)  Should I just go ahead
whenever?  Does anybody want to engage in a review process with me, or
should I just go ahead on my own, like Rob's doing with freedreno?

Of course, the driver code I've written so far isn't using an actual
stable kernel ABI -- I still need to handle little things like validating
shaders and uniforms for security (and to relocate sampler configuration
parameters), and asynchronous execution of command lists.  So anything I
land would be getting ABI-breaking reworks later on.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCHi v3] r600g: Implement GL_ARB_texture_gather

2014-07-17 Thread Marek Olšák
Pushed. Thanks.

Marek

On Wed, Jul 16, 2014 at 4:31 PM, Glenn Kennard  wrote:
> Only supported on evergreen and later. Currently limited
> to single component textures as the hardware GATHER4
> instruction ignores texture swizzles.
>
> Piglit quick run passes on radeon 6670 with all
> applicable textureGather tests, no regressions.
>
> Signed-off-by: Glenn Kennard 
> ---
> Changes from v2:
>  Remove accidental disabling of unrelated caps that snuck in.
>  Oddly enough not caught by comparing piglit "quick" runs.
> Changes from v1:
>  Removed PIPE_CAP_TEXTURE_GATHER_SM5 cap
>
>  docs/GL3.txt   |  2 +-
>  docs/relnotes/10.3.html|  2 +-
>  src/gallium/drivers/r600/r600_pipe.c   |  2 +-
>  src/gallium/drivers/r600/r600_shader.c | 47 
> +-
>  4 files changed, 44 insertions(+), 9 deletions(-)
>
> diff --git a/docs/GL3.txt b/docs/GL3.txt
> index a2f438b..20e57b0 100644
> --- a/docs/GL3.txt
> +++ b/docs/GL3.txt
> @@ -118,7 +118,7 @@ GL 4.0:
>GL_ARB_tessellation_shader   started (Fabian)
>GL_ARB_texture_buffer_object_rgb32   DONE (i965, nvc0, 
> r600, radeonsi, softpipe)
>GL_ARB_texture_cube_map_arrayDONE (i965, nv50, 
> nvc0, r600, radeonsi, softpipe)
> -  GL_ARB_texture_gatherDONE (i965, nv50, 
> nvc0, radeonsi)
> +  GL_ARB_texture_gatherDONE (i965, nv50, 
> nvc0, radeonsi, r600)
>GL_ARB_texture_query_lod DONE (i965, nv50, 
> nvc0, radeonsi)
>GL_ARB_transform_feedback2   DONE (i965, nv50, 
> nvc0, r600, radeonsi)
>GL_ARB_transform_feedback3   DONE (i965, nv50, 
> nvc0, r600, radeonsi)
> diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html
> index 2e718fc..1c0fab6 100644
> --- a/docs/relnotes/10.3.html
> +++ b/docs/relnotes/10.3.html
> @@ -49,7 +49,7 @@ Note: some of the new features are only available with 
> certain drivers.
>  GL_ARB_sample_shading on radeonsi
>  GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi
>  GL_ARB_texture_cube_map_array on radeonsi
> -GL_ARB_texture_gather on radeonsi
> +GL_ARB_texture_gather on radeonsi, r600
>  GL_ARB_texture_query_levels on nv50, nvc0, llvmpipe, r600, radeonsi, 
> softpipe
>  GL_ARB_texture_query_lod on radeonsi
>  GL_ARB_viewport_array on nvc0
> diff --git a/src/gallium/drivers/r600/r600_pipe.c 
> b/src/gallium/drivers/r600/r600_pipe.c
> index ca6399f..5bf9c00 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -303,6 +303,7 @@ static int r600_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
> case PIPE_CAP_CUBE_MAP_ARRAY:
> case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT:
> +   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
> return family >= CHIP_CEDAR ? 1 : 0;
>
> /* Unsupported features. */
> @@ -312,7 +313,6 @@ static int r600_get_param(struct pipe_screen* pscreen, 
> enum pipe_cap param)
> case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
> case PIPE_CAP_VERTEX_COLOR_CLAMPED:
> case PIPE_CAP_USER_VERTEX_BUFFERS:
> -   case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
> case PIPE_CAP_TEXTURE_GATHER_SM5:
> case PIPE_CAP_TEXTURE_QUERY_LOD:
> case PIPE_CAP_SAMPLE_SHADING:
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index 6952e3c..db928f3 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -4477,7 +4477,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
>
> if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 ||
> inst->Instruction.Opcode == TGSI_OPCODE_TXB2 ||
> -   inst->Instruction.Opcode == TGSI_OPCODE_TXL2)
> +   inst->Instruction.Opcode == TGSI_OPCODE_TXL2 ||
> +   inst->Instruction.Opcode == TGSI_OPCODE_TG4)
> sampler_src_reg = 2;
>
> src_gpr = tgsi_tex_get_src_gpr(ctx, 0);
> @@ -5079,6 +5080,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
> case FETCH_OP_SAMPLE_G:
> opcode = FETCH_OP_SAMPLE_C_G;
> break;
> +   /* Texture gather variants */
> +   case FETCH_OP_GATHER4:
> +   tex.op = FETCH_OP_GATHER4_C;
> +   break;
> +   case FETCH_OP_GATHER4_O:
> +   tex.op = FETCH_OP_GATHER4_C_O;
> +   break;
> }
> }
>
> @@ -5089,9 +5097,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx)
> tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS;
> tex.src_gpr = src_gpr;
> tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + 
> inst->Dst[0].Register.Index;
> -

Re: [Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy

2014-07-17 Thread Chad Versace
Both patches are
Reviewed-by: Chad Versace 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] gallium: Add a uif() helper function to complement fui()

2014-07-17 Thread Michel Dänzer
On 18.07.2014 08:56, Eric Anholt wrote:
> I found myself often wanting this when I'm printing out a uint32_t mapping
> of some GPU data, and I want to put in an interpretation of that value as
> a float.
> ---
>  src/gallium/auxiliary/util/u_math.h | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/src/gallium/auxiliary/util/u_math.h 
> b/src/gallium/auxiliary/util/u_math.h
> index b9ed197..68ca3f2 100644
> --- a/src/gallium/auxiliary/util/u_math.h
> +++ b/src/gallium/auxiliary/util/u_math.h
> @@ -616,6 +616,14 @@ fui( float f )
> return fi.ui;
>  }
>  
> +static INLINE float
> +uif(uint32_t ui)
> +{
> +union fi fi;
> +fi.ui = ui;
> +return fi.f;
> +}
> +
>  
>  /**
>   * Convert ubyte to float in [0, 1].
> 

Reviewed-by: Michel Dänzer 

The GBM changes look good to me as well.


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.

2014-07-17 Thread Popov, Pavel E
Hi Kenneth, Ian,

Could you look through my patch?

Regards,
Pavel

-Original Message-
From: Popov, Pavel E 
Sent: Thursday, July 17, 2014 10:21 PM
To: mesa-dev@lists.freedesktop.org
Cc: Popov, Pavel E
Subject: [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and 
glDrawBuffer for one buffer.

According to spec (OpenGL 4.0 specification, pages 254-255) we have a different 
bits set for one buffer and for multiple buffers. For glDrawBuffer we may have 
up to four bits set but for glDrawBuffers we can only have one bit set.

The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL 
arguments when _mesa_update_framebuffer or _mesa_update_draw_buffers is called. 
In this situation realization for glDrawBuffers is used for any number of 
buffers. Even for one. But glDrawBuffer have to be used for one buffer instead 
of glDrawBuffers.

Piglit test 'gl30basic' fails with assert with debug Mesa and pass with release
'main/buffers.c:520: _mesa_drawbuffers: Assertion 
`__builtin_popcount(destMask[buf]) == 1' failed.'
Probably some other tests also can be affected.

Signed-off-by: Pavel Popov 
---
 src/mesa/main/buffers.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index 
b13a7af..a640360 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
struct gl_framebuffer *fb = ctx->DrawBuffer;
GLbitfield mask[MAX_DRAW_BUFFERS];
GLuint buf;
+   GLuint m = n;
 
if (!destMask) {
   /* compute destMask values now */ @@ -489,15 +490,17 @@ 
_mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers,
  mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
  ASSERT(mask[output] != BAD_MASK);
  mask[output] &= supportedMask;
+ if (mask[output] == 0)
+m--;
   }
   destMask = mask;
}
 
/*
-* If n==1, destMask[0] may have up to four bits set.
+* If m==1, destMask[0] may have up to four bits set.
 * Otherwise, destMask[x] can only have one bit set.
 */
-   if (n == 1) {
+   if (m == 1) {
   GLuint count = 0, destMask0 = destMask[0];
   while (destMask0) {
  GLint bufIndex = ffs(destMask0) - 1;
--
1.9.1



Closed Joint Stock Company Intel A/O
Registered legal address: Krylatsky Hills Business Park, 
17 Krylatskaya Str., Bldg 4, Moscow 121614, 
Russian Federation

This e-mail and any attachments may contain confidential material for
the sole use of the intended recipient(s). Any review or distribution
by others is strictly prohibited. If you are not the intended
recipient, please contact the sender and delete all copies.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 36414] Screen mess "white smoke" when running vdrift

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=36414

meng  changed:

   What|Removed |Added

 Status|RESOLVED|VERIFIED

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 36414] Screen mess "white smoke" when running vdrift

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=36414

--- Comment #12 from meng  ---
As the issue is application's bug, verified it.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool

2014-07-17 Thread Tom Stellard
On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote:
> Hi,
> 
> This series finally adds support for defragmenting the pool for
> OpenCL buffers in the r600g driver. It is mostly a rewritten of
> the series that I wrote some months ago.
> 
> For defragmenting the pool I have thought of two different
> possibilities:
> 
> - Creating a new pool and moving every item here in the correct
> position. This has the advantage of being very simple to
> implement and that it allows the pool to be grown at the
> same time. But it has a couple of problems, namely that it
> has a high memory peak usage (sum of current pool + new pool)
> and that in the case of having a pool not very fragmented you
> have to copy every item to its new place.
> - Using the same pool by moving the items in it. This has the
> advantage of using less memory (sum of current pool + biggest
> item in it) and that it is easier to handle the case of
> only having few elements out of place. The disadvantages
> are that it doesn't allow growing the pool at the same time
> and that it may involve twice the number of item-copies in 
> the worst case.
> 
> I have chosen to implement the second option, but if you think
> that it is better the first one I can rewrite the series for it.
> (^_^)
> 
> The worst case I have mentioned is this: Imagine that you have
> a series of items in which the first is, at least, 1 'unit'
> smaller than the rest. You now free this item and create a new
> one with the same size [why would anyone do this? I don't know]
> For now, the defragmenter code is so dumb that it will move
> every item to the front of the pool without trying first to
> put this new item in the available space.
> 
> Hopefully situations like this won't be very common.
> 
> If you want me to explain any detail about any of the patches
> just ask. And as said, if you prefer the first version of the
> defragmenter, just ask. [In fact, after having written this,
> I may add it for the case grow+defrag]
> 
> Also, no regressions found in piglit.
> 
> Thanks in advance!
> Bruno
> 
> Bruno Jiménez (5):
>   r600g/compute: Add a function for moving items in the pool
>   r600g/compute: Add a function for defragmenting the pool
>   r600g/compute: Defrag the pool if it's necesary
>   r600g/compute: Quick exit if there's nothing to add to the pool
>   r600g/compute: Remove unneeded code from compute_memory_promote_item
> 
>  src/gallium/drivers/r600/compute_memory_pool.c | 196 
> ++---
>  src/gallium/drivers/r600/compute_memory_pool.h |  13 +-
>  2 files changed, 156 insertions(+), 53 deletions(-)

Hi,

A took a brief look at these patches and they look pretty good.  I will
look at them again tomorrow and then commit if I don't see any issues.

-Tom

> 
> -- 
> 2.0.1
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly

2014-07-17 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=79949

--- Comment #8 from Joseph Booker  ---
Thanks. It doesn't seem to address this. With xf86-video-intel 2.99.912, the
same problem shows with mesa 10.2.3 and mesa master branch (on commit
f14d217f5c72651b9f24a83b11ace16837db4603 ).

If it helps, the screen flickers between old/new content occurs when I switch
tabs to a tab already loaded, or when I alternate page up/down (so a section of
the page recently loaded is reshown). Also, it stops (and doesn't seem to
start) when I scroll with my touchpad or use the arrow keys.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-17 Thread Michel Dänzer
On 17.07.2014 19:09, Christian König wrote:
> Am 17.07.2014 12:01, schrieb Michel Dänzer:
>> In order to try and improve X(Shm)PutImage performance with glamor, I
>> implemented support for write-combined CPU mappings of BOs in GTT.
>>
>> This did provide a nice speedup, but to my surprise, using VRAM instead
>> of write-combined GTT turned out to be even faster in general on my
>> Kaveri machine, both for the internal GPU and for discrete GPUs.
>>
>> However, I've kept the changes from GTT to VRAM separated, in case this
>> turns out to be a loss on other setups.
>>
>> Kernel patches:
>>
>> [PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
>> [PATCH 2/5] drm/radeon: Pass GART page flags to
>> [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in
>> [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and
> 
> Those four are Reviewed-by: Christian König 

Thanks!


>> [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI
> 
> I'm still not very keen with this change since I still don't understand
> the reason why it's faster than with GTT. Definitely needs more testing
> on a wider range of systems.

Sure. If anyone wants to give this patch a spin and see if they can
measure any performance difference, good or bad, that would be interesting.

> Maybe limit it to APUs for now?

But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even
bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
system. I suspect it may depend on the bandwidth available for PCIe vs.
system memory though.


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-17 Thread Michel Dänzer
On 17.07.2014 21:00, Marek Olšák wrote:
> On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
>> From: Michel Dänzer 
>>
>> This is hopefully safe: The kernel makes sure writes to these mappings
>> finish before the GPU might start reading from them, and the GPU caches
>> are invalidated at the start of a command stream.
>>
> The resource flags actually tell you what you can do. If the COHERENT
> flag is set, the mapping must be cached.

Why is that required? As I explain above, we should satisfy the
requirements of the ARB_buffer_storage extension AFAICT.


As pointed out by you and Grigori in other posts, I should probably just
drop the special treatment of persistent mappings though, so the
placement and flags are derived from the buffer usage.


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965: Set minimum point size to 1.0 for non-sprite non-aa points

2014-07-17 Thread Popov, Pavel E
Hi Ian,

Looks like this patch is not reasonable for Mesa which now concentrates on 
OpenGL 3.3 Core. Am I right?

I prepared this patch to pass Piglit test 'spec_OpenGL_2.0_vs-point_size-zero' 
which was created for earlier versions of OpenGL. 
Also I found these analogous patches for 'svga', 'r300g' and 'r600g' 
implementations from Marek:
2012-01-30  r600g: set minimum point size to 1.0 for non-sprite non-aa 
points   Marek Olšák
2012-01-30  r300g: set minimum point size to 1.0 for non-sprite non-aa 
points   Marek Olšák
2012-01-30  svga: set POINTSIZEMIN to 1.0 for non-sprite non-aa points  
Marek Olšák

Regards,
Pavel

-Original Message-
From: Popov, Pavel E 
Sent: Wednesday, June 04, 2014 7:34 PM
To: mesa-dev@lists.freedesktop.org
Cc: Popov, Pavel E
Subject: [PATCH] i965: Set minimum point size to 1.0 for non-sprite non-aa 
points

Both point size states were covered (glPointSize function and  gl_PointSize 
value). Piglit test 'spec_OpenGL_2.0_vs-point_size-zero' which  uses 
gl_PointSize value in a shader passes. Modification of this test which  uses 
glPointSize function also passes.

Signed-off-by: Pavel Popov 
---
 src/mesa/drivers/dri/i965/gen6_clip_state.c | 3 ++-
 src/mesa/drivers/dri/i965/gen6_sf_state.c   | 6 --
 src/mesa/drivers/dri/i965/gen7_sf_state.c   | 6 --
 src/mesa/drivers/dri/i965/gen8_sf_state.c   | 6 --
 4 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c 
b/src/mesa/drivers/dri/i965/gen6_clip_state.c
index 0ba190e..cf11331 100644
--- a/src/mesa/drivers/dri/i965/gen6_clip_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c
@@ -128,7 +128,8 @@ upload_clip_state(struct brw_context *brw)
 GEN6_CLIP_MODE_NORMAL |
 GEN6_CLIP_XY_TEST |
 dw2);
-   OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
+   /* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 
3.0 and earlier) */
+   OUT_BATCH(U_FIXED(!(ctx->Point.SmoothFlag || ctx->Point.PointSprite) 
+ ? 1.0 : 0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT |
  U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT |
  (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) |
  ((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK)); 
diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c 
b/src/mesa/drivers/dri/i965/gen6_sf_state.c
index ec14be4..b8d66ab 100644
--- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
@@ -346,8 +346,10 @@ upload_sf_state(struct brw_context *brw)
/* Clamp to ARB_point_parameters user limits */
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   /* Clamp to the hardware limits and convert to fixed point.
+* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 
3.0 and earlier).
+*/
+   dw4 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || 
+ ctx->Point.PointSprite) ? 1.0 : 0.125, 255.875), 3);
 
/*
 * Window coordinates in an FBO are inverted, which means point diff --git 
a/src/mesa/drivers/dri/i965/gen7_sf_state.c 
b/src/mesa/drivers/dri/i965/gen7_sf_state.c
index 7fe1435..a192dcb 100644
--- a/src/mesa/drivers/dri/i965/gen7_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c
@@ -220,8 +220,10 @@ upload_sf_state(struct brw_context *brw)
/* Clamp to ARB_point_parameters user limits */
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   /* Clamp to the hardware limits and convert to fixed point.
+* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 
3.0 and earlier).
+*/
+   dw3 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || 
+ ctx->Point.PointSprite) ? 1.0 : 0.125, 255.875), 3);
 
/* _NEW_LIGHT */
if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git 
a/src/mesa/drivers/dri/i965/gen8_sf_state.c 
b/src/mesa/drivers/dri/i965/gen8_sf_state.c
index 0a69e70..d130a00 100644
--- a/src/mesa/drivers/dri/i965/gen8_sf_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c
@@ -132,8 +132,10 @@ upload_sf(struct brw_context *brw)
/* Clamp to ARB_point_parameters user limits */
point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize);
 
-   /* Clamp to the hardware limits and convert to fixed point */
-   dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3);
+   /* Clamp to the hardware limits and convert to fixed point.
+* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 
3.0 and earlier).
+*/
+   dw3 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || 
+ ctx->Point.PointSpr