[Mesa-dev] [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT
From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/gallium/drivers/r300/r300_query.c | 2 +- src/gallium/drivers/r300/r300_render.c| 2 +- src/gallium/drivers/r300/r300_screen_buffer.c | 4 ++-- src/gallium/drivers/r300/r300_texture.c | 2 +- src/gallium/drivers/radeon/r600_buffer_common.c | 9 ++-- src/gallium/drivers/radeon/r600_texture.c | 2 ++ src/gallium/drivers/radeon/radeon_uvd.c | 8 +--- src/gallium/drivers/radeon/radeon_vce.c | 8 src/gallium/drivers/radeon/radeon_video.c | 11 ++ src/gallium/drivers/radeon/radeon_video.h | 4 +++- src/gallium/drivers/radeonsi/si_state.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 25 +++ src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 1 + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 2 +- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 12 +++ src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 2 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 7 ++- 17 files changed, 77 insertions(+), 26 deletions(-) diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5305ebd..1679433 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -59,7 +59,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->num_pipes = r300screen->info.r300_num_gb_pipes; q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE, - RADEON_DOMAIN_GTT); + RADEON_DOMAIN_GTT, 0); if (!q->buf) { FREE(q); return NULL; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 175b83a..6e5b381 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -907,7 +907,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, r300->vbo = rws->buffer_create(rws, MAX2(R300_MAX_DRAW_VBO_SIZE, size), R300_BUFFER_ALIGNMENT, TRUE, - RADEON_DOMAIN_GTT); + RADEON_DOMAIN_GTT, 0); if (!r300->vbo) { return FALSE; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 86e4478..de557b5 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -103,7 +103,7 @@ r300_buffer_transfer_map( struct pipe_context *context, /* Create a new one in the same pipe_resource. */ new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, TRUE, - rbuf->domain); + rbuf->domain, 0); if (new_buf) { /* Discard the old buffer. */ pb_reference(&rbuf->buf, NULL); @@ -185,7 +185,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, TRUE, - rbuf->domain); + rbuf->domain, 0); if (!rbuf->buf) { FREE(rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 4ea69dc..ffe8c00 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1042,7 +1042,7 @@ r300_texture_create_object(struct r300_screen *rscreen, /* Create the backing buffer if needed. */ if (!tex->buf) { tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE, - tex->domain); + tex->domain, 0); if (!tex->buf) { goto fail; diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 0eaa817..4e6b897 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -107,11 +107,14 @@ bool r600_init_resource(struct r600_common_screen *rscreen, { struct r600_texture *rtex = (struct r600_texture*)res; struct pb_buffer *old_buf, *new_buf; + enum radeon_bo_flag flags = 0; switch (res->b.b.usage) { - case PIPE_USAGE_STAGING: case PIPE_USAGE_DYNAMIC: case PIPE_USAGE_STREAM: + flags = RADEON_FLAG_GTT_WC; + /* fall through */ +
[Mesa-dev] [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for VRAM and GTT
From: Michel Dänzer Should reduce overhead because the caching buffer manager doesn't need to consider buffers of the wrong type. Signed-off-by: Michel Dänzer --- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 10 +++--- src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 16 +++- src/gallium/winsys/radeon/drm/radeon_drm_winsys.h | 3 ++- 3 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index 0ebe196..d06bb34 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -800,10 +800,14 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, desc.initial_domains = domain; /* Assign a buffer manager. */ -if (use_reusable_pool) -provider = ws->cman; -else +if (use_reusable_pool) { +if (domain == RADEON_DOMAIN_VRAM) +provider = ws->cman_vram; +else +provider = ws->cman_gtt; +} else { provider = ws->kman; +} buffer = provider->create_buffer(provider, size, &desc.base); if (!buffer) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 576fea5..0834cbd 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -417,7 +417,8 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) pipe_mutex_destroy(ws->cmask_owner_mutex); pipe_mutex_destroy(ws->cs_stack_lock); -ws->cman->destroy(ws->cman); +ws->cman_vram->destroy(ws->cman_vram); +ws->cman_gtt->destroy(ws->cman_gtt); ws->kman->destroy(ws->kman); if (ws->gen >= DRV_R600) { radeon_surface_manager_free(ws->surf_man); @@ -632,8 +633,11 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->kman = radeon_bomgr_create(ws); if (!ws->kman) goto fail; -ws->cman = pb_cache_manager_create(ws->kman, 100, 2.0f, 0); -if (!ws->cman) +ws->cman_vram = pb_cache_manager_create(ws->kman, 100, 2.0f, 0); +if (!ws->cman_vram) +goto fail; +ws->cman_gtt = pb_cache_manager_create(ws->kman, 100, 2.0f, 0); +if (!ws->cman_gtt) goto fail; if (ws->gen >= DRV_R600) { @@ -689,8 +693,10 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) fail: pipe_mutex_unlock(fd_tab_mutex); -if (ws->cman) -ws->cman->destroy(ws->cman); +if (ws->cman_gtt) +ws->cman_gtt->destroy(ws->cman_gtt); +if (ws->cman_vram) +ws->cman_vram->destroy(ws->cman_vram); if (ws->kman) ws->kman->destroy(ws->kman); if (ws->surf_man) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index 18fe0ae..fc6f53b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -57,7 +57,8 @@ struct radeon_drm_winsys { uint32_t va_start; struct pb_manager *kman; -struct pb_manager *cman; +struct pb_manager *cman_vram; +struct pb_manager *cman_gtt; struct radeon_surface_manager *surf_man; uint32_t num_cpus; /* Number of CPUs. */ -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT
In order to try and improve X(Shm)PutImage performance with glamor, I implemented support for write-combined CPU mappings of BOs in GTT. This did provide a nice speedup, but to my surprise, using VRAM instead of write-combined GTT turned out to be even faster in general on my Kaveri machine, both for the internal GPU and for discrete GPUs. However, I've kept the changes from GTT to VRAM separated, in case this turns out to be a loss on other setups. Kernel patches: [PATCH 1/5] drm/radeon: Remove radeon_gart_restore() [PATCH 2/5] drm/radeon: Pass GART page flags to [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI Mesa patches: [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming [PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT [PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings
From: Michel Dänzer This is hopefully safe: The kernel makes sure writes to these mappings finish before the GPU might start reading from them, and the GPU caches are invalidated at the start of a command stream. Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeon/r600_buffer_common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 40917f0..c8a0723 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen, res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { res->domains = RADEON_DOMAIN_GTT; - flags = 0; + flags = RADEON_FLAG_GTT_WC; } /* Tiled textures are unmappable. Always put them in VRAM. */ -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] drm/radeon: Pass GART page flags to radeon_gart_set_page() explicitly
From: Michel Dänzer Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/r100.c| 2 +- drivers/gpu/drm/radeon/r300.c| 12 +--- drivers/gpu/drm/radeon/radeon.h | 12 +--- drivers/gpu/drm/radeon/radeon_asic.h | 8 drivers/gpu/drm/radeon/radeon_gart.c | 9 ++--- drivers/gpu/drm/radeon/radeon_ttm.c | 8 ++-- drivers/gpu/drm/radeon/rs400.c | 13 ++--- drivers/gpu/drm/radeon/rs600.c | 16 +++- include/uapi/drm/radeon_drm.h| 4 +++- 9 files changed, 59 insertions(+), 25 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index ed1c53e..9241b89 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -682,7 +682,7 @@ void r100_pci_gart_disable(struct radeon_device *rdev) } void r100_pci_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr) + uint64_t addr, uint32_t flags) { u32 *gtt = rdev->gart.ptr; gtt[i] = cpu_to_le32(lower_32_bits(addr)); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 8d14e66..75b3033 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -69,17 +69,23 @@ void rv370_pcie_gart_tlb_flush(struct radeon_device *rdev) mb(); } +#define R300_PTE_UNSNOOPED (1 << 0) #define R300_PTE_WRITEABLE (1 << 2) #define R300_PTE_READABLE (1 << 3) void rv370_pcie_gart_set_page(struct radeon_device *rdev, unsigned i, - uint64_t addr) + uint64_t addr, uint32_t flags) { void __iomem *ptr = rdev->gart.ptr; addr = (lower_32_bits(addr) >> 8) | - ((upper_32_bits(addr) & 0xff) << 24) | - R300_PTE_WRITEABLE | R300_PTE_READABLE; + ((upper_32_bits(addr) & 0xff) << 24); + if (flags & RADEON_GART_PAGE_READ) + addr |= R300_PTE_READABLE; + if (flags & RADEON_GART_PAGE_WRITE) + addr |= R300_PTE_WRITEABLE; + if (!(flags & RADEON_GART_PAGE_SNOOP)) + addr |= R300_PTE_UNSNOOPED; /* on x86 we want this to be CPU endian, on powerpc * on powerpc without HW swappers, it'll get swapped on way * into VRAM - so no need for cpu_to_le32 on VRAM tables */ diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index f4869b4..4dd092e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -589,6 +589,12 @@ struct radeon_mc; #define RADEON_GPU_PAGE_SHIFT 12 #define RADEON_GPU_PAGE_ALIGN(a) (((a) + RADEON_GPU_PAGE_MASK) & ~RADEON_GPU_PAGE_MASK) +#define RADEON_GART_PAGE_DUMMY 0 +#define RADEON_GART_PAGE_VALID (1 << 0) +#define RADEON_GART_PAGE_READ (1 << 1) +#define RADEON_GART_PAGE_WRITE (1 << 2) +#define RADEON_GART_PAGE_SNOOP (1 << 3) + struct radeon_gart { dma_addr_t table_addr; struct radeon_bo*robj; @@ -613,7 +619,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, int pages); int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int pages, struct page **pagelist, -dma_addr_t *dma_addr); +dma_addr_t *dma_addr, uint32_t flags); /* @@ -1775,7 +1781,7 @@ struct radeon_asic { struct { void (*tlb_flush)(struct radeon_device *rdev); void (*set_page)(struct radeon_device *rdev, unsigned i, -uint64_t addr); +uint64_t addr, uint32_t flags); } gart; struct { int (*init)(struct radeon_device *rdev); @@ -2702,7 +2708,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v); #define radeon_vga_set_state(rdev, state) (rdev)->asic->vga_set_state((rdev), (state)) #define radeon_asic_reset(rdev) (rdev)->asic->asic_reset((rdev)) #define radeon_gart_tlb_flush(rdev) (rdev)->asic->gart.tlb_flush((rdev)) -#define radeon_gart_set_page(rdev, i, p) (rdev)->asic->gart.set_page((rdev), (i), (p)) +#define radeon_gart_set_page(rdev, i, p, f) (rdev)->asic->gart.set_page((rdev), (i), (p), (f)) #define radeon_asic_vm_init(rdev) (rdev)->asic->vm.init((rdev)) #define radeon_asic_vm_fini(rdev) (rdev)->asic->vm.fini((rdev)) #define radeon_asic_vm_set_page(rdev, ib, pe, addr, count, incr, flags) ((rdev)->asic->vm.set_page((rdev), (ib), (pe), (addr), (count), (incr), (flags))) diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h index 01e7c0a..f632e31 100644 --- a/drivers/gpu/drm/radeon/radeon_asic.h +++ b/drivers/gpu/drm/radeon/radeon_asic.h @@ -68,7 +68,7 @@ int r100_asic_reset(struct radeon_device *rdev); u32 r100_get_vblank_counter(struct radeon_device *rdev, int crtc); void r100_pci_gart_tlb_flush(struct radeon_device
[Mesa-dev] [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming buffers
From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeon/r600_buffer_common.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 4e6b897..40917f0 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -110,15 +110,13 @@ bool r600_init_resource(struct r600_common_screen *rscreen, enum radeon_bo_flag flags = 0; switch (res->b.b.usage) { - case PIPE_USAGE_DYNAMIC: - case PIPE_USAGE_STREAM: - flags = RADEON_FLAG_GTT_WC; - /* fall through */ case PIPE_USAGE_STAGING: /* Transfers are likely to occur more often with these resources. */ res->domains = RADEON_DOMAIN_GTT; break; case PIPE_USAGE_DEFAULT: + case PIPE_USAGE_STREAM: + case PIPE_USAGE_DYNAMIC: case PIPE_USAGE_IMMUTABLE: default: /* Not listing GTT here improves performance in some apps. */ -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI
From: Michel Dänzer Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/cik.c | 3 +++ drivers/gpu/drm/radeon/cik_sdma.c| 2 ++ drivers/gpu/drm/radeon/ni.c | 3 +++ drivers/gpu/drm/radeon/ni_dma.c | 2 ++ drivers/gpu/drm/radeon/radeon_ring.c | 2 +- 5 files changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index df39095..8af5c9a 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -3846,6 +3846,9 @@ void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) (ib->gpu_addr & 0xFFFC)); radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0x); radeon_ring_write(ring, control); + + /* Flush HDP cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } /** diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 3396b28..2ab873d 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -158,6 +158,8 @@ void cik_sdma_ring_ib_execute(struct radeon_device *rdev, radeon_ring_write(ring, upper_32_bits(ib->gpu_addr)); radeon_ring_write(ring, ib->length_dw); + /* Flush HDP cache */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0); } /** diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index b589fe7..ea58e5b 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1397,6 +1397,9 @@ void cayman_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib) radeon_ring_write(ring, 0x); radeon_ring_write(ring, 0); radeon_ring_write(ring, ((ib->vm ? ib->vm->id : 0) << 24) | 10); /* poll interval */ + + /* Flush HDP cache (for SI) */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); } static void cayman_cp_enable(struct radeon_device *rdev, bool enable) diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 119fc69..0e575ea 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -148,6 +148,8 @@ void cayman_dma_ring_ib_execute(struct radeon_device *rdev, radeon_ring_write(ring, (ib->gpu_addr & 0xFFE0)); radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); + /* Flush HDP cache (for SI) */ + WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1); } /** diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 62e9e57..31ac4fd 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -206,7 +206,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev) r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, RADEON_IB_POOL_SIZE*64*1024, RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_DOMAIN_VRAM, RADEON_GEM_GTT_WC); } else { /* Without GPUVM, it's better to stick to cacheable GTT due -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in GTT
From: Michel Dänzer Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/cik.c | 4 ++-- drivers/gpu/drm/radeon/cik_sdma.c | 3 ++- drivers/gpu/drm/radeon/evergreen.c| 12 drivers/gpu/drm/radeon/r600.c | 4 ++-- drivers/gpu/drm/radeon/radeon.h | 3 ++- drivers/gpu/drm/radeon/radeon_benchmark.c | 4 ++-- drivers/gpu/drm/radeon/radeon_device.c| 3 ++- drivers/gpu/drm/radeon/radeon_fb.c| 2 +- drivers/gpu/drm/radeon/radeon_gart.c | 2 +- drivers/gpu/drm/radeon/radeon_gem.c | 16 ++-- drivers/gpu/drm/radeon/radeon_object.c| 24 +++- drivers/gpu/drm/radeon/radeon_object.h| 5 +++-- drivers/gpu/drm/radeon/radeon_prime.c | 2 +- drivers/gpu/drm/radeon/radeon_ring.c | 4 ++-- drivers/gpu/drm/radeon/radeon_sa.c| 4 ++-- drivers/gpu/drm/radeon/radeon_test.c | 4 ++-- drivers/gpu/drm/radeon/radeon_ttm.c | 2 +- drivers/gpu/drm/radeon/radeon_uvd.c | 6 +++--- drivers/gpu/drm/radeon/radeon_vce.c | 2 +- drivers/gpu/drm/radeon/radeon_vm.c| 8 ++-- drivers/gpu/drm/radeon/si_dma.c | 3 ++- 21 files changed, 70 insertions(+), 47 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 1b0da66..a9fd3e7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4374,7 +4374,7 @@ static int cik_mec_init(struct radeon_device *rdev) r = radeon_bo_create(rdev, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2, PAGE_SIZE, true, -RADEON_GEM_DOMAIN_GTT, NULL, +RADEON_GEM_DOMAIN_GTT, 0, NULL, &rdev->mec.hpd_eop_obj); if (r) { dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r); @@ -4544,7 +4544,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) r = radeon_bo_create(rdev, sizeof(struct bonaire_mqd), PAGE_SIZE, true, -RADEON_GEM_DOMAIN_GTT, NULL, +RADEON_GEM_DOMAIN_GTT, 0, NULL, &rdev->ring[idx].mqd_obj); if (r) { dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r); diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index 8e9d0f1..a7f66c8 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -742,7 +742,8 @@ void cik_sdma_vm_set_page(struct radeon_device *rdev, trace_radeon_vm_set_page(pe, addr, count, incr, flags); - if (flags == R600_PTE_GART) { + /* XXX: How to distinguish between GART and other system memory pages? */ + if (flags & R600_PTE_SYSTEM) { uint64_t src = rdev->gart.table_addr + (addr >> 12) * 8; while (count) { unsigned bytes = count * 8; diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 39ada71..902334f 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4022,7 +4022,8 @@ int sumo_rlc_init(struct radeon_device *rdev) /* save restore block */ if (rdev->rlc.save_restore_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, -RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.save_restore_obj); +RADEON_GEM_DOMAIN_VRAM, 0, NULL, +&rdev->rlc.save_restore_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC sr bo failed\n", r); return r; @@ -4100,7 +4101,8 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.clear_state_obj == NULL) { r = radeon_bo_create(rdev, dws * 4, PAGE_SIZE, true, -RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->rlc.clear_state_obj); +RADEON_GEM_DOMAIN_VRAM, 0, NULL, +&rdev->rlc.clear_state_obj); if (r) { dev_warn(rdev->dev, "(%d) create RLC c bo failed\n", r); sumo_rlc_fini(rdev); @@ -4174,8 +4176,10 @@ int sumo_rlc_init(struct radeon_device *rdev) if (rdev->rlc.cp_table_size) { if (rdev->rlc.cp_table_obj == NULL) { - r = radeon_bo_create(rdev, rdev->rlc.c
[Mesa-dev] [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and IBs on >= SI
From: Michel Dänzer Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/cik.c | 3 +++ drivers/gpu/drm/radeon/cik_sdma.c| 4 drivers/gpu/drm/radeon/ni.c | 3 +++ drivers/gpu/drm/radeon/ni_dma.c | 4 drivers/gpu/drm/radeon/radeon_ring.c | 22 +- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index a9fd3e7..df39095 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -4181,6 +4181,9 @@ u32 cik_gfx_get_wptr(struct radeon_device *rdev, void cik_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { + /* Make IB/ring buffer writes land before the WPTR register write */ + wmb(); + WREG32(CP_RB0_WPTR, ring->wptr); (void)RREG32(CP_RB0_WPTR); } diff --git a/drivers/gpu/drm/radeon/cik_sdma.c b/drivers/gpu/drm/radeon/cik_sdma.c index a7f66c8..3396b28 100644 --- a/drivers/gpu/drm/radeon/cik_sdma.c +++ b/drivers/gpu/drm/radeon/cik_sdma.c @@ -112,12 +112,16 @@ void cik_sdma_set_wptr(struct radeon_device *rdev, { u32 reg; + /* Make IB/ring buffer writes land before the WPTR register write */ + wmb(); + if (ring->idx == R600_RING_TYPE_DMA_INDEX) reg = SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET; else reg = SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET; WREG32(reg, (ring->wptr << 2) & 0x3fffc); + (void)RREG32(reg); } /** diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 327b85f..b589fe7 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1449,6 +1449,9 @@ u32 cayman_gfx_get_wptr(struct radeon_device *rdev, void cayman_gfx_set_wptr(struct radeon_device *rdev, struct radeon_ring *ring) { + /* Make IB/ring buffer writes land before the WPTR register write */ + wmb(); + if (ring->idx == RADEON_RING_TYPE_GFX_INDEX) { WREG32(CP_RB0_WPTR, ring->wptr); (void)RREG32(CP_RB0_WPTR); diff --git a/drivers/gpu/drm/radeon/ni_dma.c b/drivers/gpu/drm/radeon/ni_dma.c index 6378e02..119fc69 100644 --- a/drivers/gpu/drm/radeon/ni_dma.c +++ b/drivers/gpu/drm/radeon/ni_dma.c @@ -103,12 +103,16 @@ void cayman_dma_set_wptr(struct radeon_device *rdev, { u32 reg; + /* Make IB/ring buffer writes land before the WPTR register write */ + wmb(); + if (ring->idx == R600_RING_TYPE_DMA_INDEX) reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; else reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; WREG32(reg, (ring->wptr << 2) & 0x3fffc); + (void)RREG32(reg); } /** diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c index 71439f0..62e9e57 100644 --- a/drivers/gpu/drm/radeon/radeon_ring.c +++ b/drivers/gpu/drm/radeon/radeon_ring.c @@ -201,10 +201,22 @@ int radeon_ib_pool_init(struct radeon_device *rdev) if (rdev->ib_pool_ready) { return 0; } - r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, - RADEON_IB_POOL_SIZE*64*1024, - RADEON_GPU_PAGE_SIZE, - RADEON_GEM_DOMAIN_GTT, 0); + + if (rdev->family >= CHIP_TAHITI) { + r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, + RADEON_IB_POOL_SIZE*64*1024, + RADEON_GPU_PAGE_SIZE, + RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_GTT_WC); + } else { + /* Without GPUVM, it's better to stick to cacheable GTT due +* to the command stream patching +*/ + r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo, + RADEON_IB_POOL_SIZE*64*1024, + RADEON_GPU_PAGE_SIZE, + RADEON_GEM_DOMAIN_GTT, 0); + } if (r) { return r; } @@ -640,7 +652,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig /* Allocate ring buffer */ if (ring->ring_obj == NULL) { r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true, -RADEON_GEM_DOMAIN_GTT, 0, +RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_WC, NULL, &ring->ring_obj); if (r) { dev_err(rdev->dev, "(%d) ring create failed\n", r); -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/
[Mesa-dev] [PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
From: Michel Dänzer Doesn't seem necessary, the GART table memory should be persistent. Signed-off-by: Michel Dänzer --- drivers/gpu/drm/radeon/cik.c | 1 - drivers/gpu/drm/radeon/evergreen.c | 1 - drivers/gpu/drm/radeon/ni.c | 1 - drivers/gpu/drm/radeon/r100.c| 1 - drivers/gpu/drm/radeon/r300.c| 1 - drivers/gpu/drm/radeon/r600.c| 1 - drivers/gpu/drm/radeon/radeon.h | 1 - drivers/gpu/drm/radeon/radeon_gart.c | 27 --- drivers/gpu/drm/radeon/rs400.c | 1 - drivers/gpu/drm/radeon/rs600.c | 1 - drivers/gpu/drm/radeon/rv770.c | 1 - drivers/gpu/drm/radeon/si.c | 1 - 12 files changed, 38 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 0b24711..1b0da66 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -5401,7 +5401,6 @@ static int cik_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; - radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) | diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 250bac3..39ada71 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -2424,7 +2424,6 @@ static int evergreen_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; - radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE | diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 5a33ca6..327b85f 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -1229,7 +1229,6 @@ static int cayman_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; - radeon_gart_restore(rdev); /* Setup TLB control */ WREG32(MC_VM_MX_L1_TLB_CNTL, (0xA << 7) | diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 1544efc..ed1c53e 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -652,7 +652,6 @@ int r100_pci_gart_enable(struct radeon_device *rdev) { uint32_t tmp; - radeon_gart_restore(rdev); /* discard memory request outside of configured range */ tmp = RREG32(RADEON_AIC_CNTL) | RADEON_DIS_OUT_OF_PCI_GART_ACCESS; WREG32(RADEON_AIC_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/r300.c b/drivers/gpu/drm/radeon/r300.c index 3c21d77..8d14e66 100644 --- a/drivers/gpu/drm/radeon/r300.c +++ b/drivers/gpu/drm/radeon/r300.c @@ -120,7 +120,6 @@ int rv370_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; - radeon_gart_restore(rdev); /* discard memory request outside of configured range */ tmp = RADEON_PCIE_TX_GART_UNMAPPED_ACCESS_DISCARD; WREG32_PCIE(RADEON_PCIE_TX_GART_CNTL, tmp); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index c66952d..e1be5ce 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -968,7 +968,6 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev) r = radeon_gart_table_vram_pin(rdev); if (r) return r; - radeon_gart_restore(rdev); /* Setup L2 cache */ WREG32(VM_L2_CNTL, ENABLE_L2_CACHE | ENABLE_L2_FRAGMENT_PROCESSING | diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 079eac7..f4869b4 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -614,7 +614,6 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset, int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, int pages, struct page **pagelist, dma_addr_t *dma_addr); -void radeon_gart_restore(struct radeon_device *rdev); /* diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c index 2e72365..b7d3e84 100644 --- a/drivers/gpu/drm/radeon/radeon_gart.c +++ b/drivers/gpu/drm/radeon/radeon_gart.c @@ -298,33 +298,6 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset, } /** - * radeon_gart_restore - bind all pages in the gart page table - * - * @rdev: radeon_device pointer - * - * Binds all pages in the gart page table (all asics). - * Used to rebuild the gart table on device startup or resume. - */ -void radeon_gart_restore(struct radeon_device *rdev) -{ - int i, j, t; - u64 page_base; - - if (!rdev->gart.ptr) { - return; - }
[Mesa-dev] [PATCH 5/5] r600g, radeonsi: Prefer VRAM for persistent mappings
From: Michel Dänzer Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeon/r600_buffer_common.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index c8a0723..6f7fa29 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -125,12 +125,10 @@ bool r600_init_resource(struct r600_common_screen *rscreen, break; } - /* Use GTT for all persistent mappings, because they are -* always cached and coherent. */ if (res->b.b.target == PIPE_BUFFER && res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { - res->domains = RADEON_DOMAIN_GTT; + res->domains = RADEON_DOMAIN_VRAM; flags = RADEON_FLAG_GTT_WC; } -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT
Am 17.07.2014 12:01, schrieb Michel Dänzer: In order to try and improve X(Shm)PutImage performance with glamor, I implemented support for write-combined CPU mappings of BOs in GTT. This did provide a nice speedup, but to my surprise, using VRAM instead of write-combined GTT turned out to be even faster in general on my Kaveri machine, both for the internal GPU and for discrete GPUs. However, I've kept the changes from GTT to VRAM separated, in case this turns out to be a loss on other setups. Kernel patches: [PATCH 1/5] drm/radeon: Remove radeon_gart_restore() [PATCH 2/5] drm/radeon: Pass GART page flags to [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and Those four are Reviewed-by: Christian König [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI I'm still not very keen with this change since I still don't understand the reason why it's faster than with GTT. Definitely needs more testing on a wider range of systems. Maybe limit it to APUs for now? Regards, Christian. Mesa patches: [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming [PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT [PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings ___ dri-devel mailing list dri-de...@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/dri-devel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings
On 17.07.2014 12:01, Michel Dänzer wrote: > From: Michel Dänzer > > This is hopefully safe: The kernel makes sure writes to these mappings > finish before the GPU might start reading from them, and the GPU caches > are invalidated at the start of a command stream. > Aren't CPU reads from write-combined GTT memory extraordinarily slow, because they're uncached? And don't you need the right access patterns to make write combining perform well? Grigori > Signed-off-by: Michel Dänzer > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index 40917f0..c8a0723 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen > *rscreen, > res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | > PIPE_RESOURCE_FLAG_MAP_COHERENT)) { > res->domains = RADEON_DOMAIN_GTT; > - flags = 0; > + flags = RADEON_FLAG_GTT_WC; > } > > /* Tiled textures are unmappable. Always put them in VRAM. */ > signature.asc Description: OpenPGP digital signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] Fix crash in update_framebuffer_state
The patch is wrong. is_rtt is only set for textures, not renderbuffers, so it breaks rendering to depth renderbuffers. Marek On Thu, Jul 17, 2014 at 12:02 AM, David Weber wrote: > Hi, > > GPU: Radeon HD 5770 > mesa: 10.2.2 with gallium/llvm backend > llvm: 3.4.2 > linux: 3.15.3 > xf86-video-ati: 7.4.0 > > Switching from the software to the OpenGL backend in Gwenview with EGL > enabled QT4 crashes with the following BT: > state_tracker/st_atom_framebuffer.c:60:update_framebuffer_size: > Assertion `surface' failed. > > Program received signal SIGTRAP, Trace/breakpoint trap. > 0x7fffe09e7ec1 in _debug_assert_fail (expr=0x7fffe0f9f85c > "surface", file=0x7fffe0f9f838 "state_tracker/st_atom_framebuffer.c", > line=60, > function=0x7fffe0f9f9a0 <__func__.33915> > "update_framebuffer_size") at util/u_debug.c:277 > 277 util/u_debug.c: Datei oder Verzeichnis nicht gefunden. > (gdb) bt > #0 0x7fffe09e7ec1 in _debug_assert_fail (expr=0x7fffe0f9f85c > "surface", file=0x7fffe0f9f838 "state_tracker/st_atom_framebuffer.c", > line=60, > function=0x7fffe0f9f9a0 <__func__.33915> > "update_framebuffer_size") at util/u_debug.c:277 > #1 0x7fffe0c8715d in update_framebuffer_size > (framebuffer=0x17f82b0, surface=0x0) at > state_tracker/st_atom_framebuffer.c:60 > #2 0x7fffe0c87446 in update_framebuffer_state (st=0x17f76d0) at > state_tracker/st_atom_framebuffer.c:132 > #3 0x7fffe0c84457 in st_validate_state (st=0x17f76d0) at > state_tracker/st_atom.c:213 > #4 0x7fffe0c91618 in st_Clear (ctx=0x17b3a30, mask=2) at > state_tracker/st_cb_clear.c:446 > #5 0x7fffe0b10a39 in _mesa_Clear (mask=16384) at main/clear.c:226 > #6 0x720c9aaa in ?? () from /usr/lib64/qt4/libQtOpenGL.so.4 > #7 0x74a21cfb in QPainter::begin(QPaintDevice*) () from > /usr/lib64/qt4/libQtGui.so.4 > #8 0x74a22768 in QPainter::QPainter(QPaintDevice*) () from > /usr/lib64/qt4/libQtGui.so.4 > #9 0x74ec9544 in QGraphicsView::paintEvent(QPaintEvent*) () > from /usr/lib64/qt4/libQtGui.so.4 > #10 0x749221f0 in QWidget::event(QEvent*) () from > /usr/lib64/qt4/libQtGui.so.4 > #11 0x74cb595e in QFrame::event(QEvent*) () from > /usr/lib64/qt4/libQtGui.so.4 > #12 0x74ecd32b in QGraphicsView::viewportEvent(QEvent*) () > from /usr/lib64/qt4/libQtGui.so.4 > #13 0x76a9f223 in > QCoreApplicationPrivate::sendThroughObjectEventFilters(QObject*, > QEvent*) () from /usr/lib64/qt4/libQtCore.so.4 > #14 0x748d4bac in QApplicationPrivate::notify_helper(QObject*, > QEvent*) () from /usr/lib64/qt4/libQtGui.so.4 > #15 0x748d7602 in QApplication::notify(QObject*, QEvent*) () > from /usr/lib64/qt4/libQtGui.so.4 > #16 0x75600a08 in KApplication::notify(QObject*, QEvent*) () > from /usr/lib64/libkdeui.so.5 > #17 0x76a9f0ad in QCoreApplication::notifyInternal(QObject*, > QEvent*) () from /usr/lib64/qt4/libQtCore.so.4 > #18 0x7492705f in QWidgetPrivate::drawWidget(QPaintDevice*, > QRegion const&, QPoint const&, int, QPainter*, QWidgetBackingStore*) > () from /usr/lib64/qt4/libQtGui.so.4 > #19 0x74ae5639 in QWidgetPrivate::repaint_sys(QRegion const&) > () from /usr/lib64/qt4/libQtGui.so.4 > #20 0x749159e4 in QWidgetPrivate::syncBackingStore() () from > /usr/lib64/qt4/libQtGui.so.4 > #21 0x74922691 in QWidget::event(QEvent*) () from > /usr/lib64/qt4/libQtGui.so.4 > #22 0x7209fd0a in QGLWidget::event(QEvent*) () from > /usr/lib64/qt4/libQtOpenGL.so.4 > #23 0x748d4bcc in QApplicationPrivate::notify_helper(QObject*, > QEvent*) () from /usr/lib64/qt4/libQtGui.so.4 > #24 0x748d7602 in QApplication::notify(QObject*, QEvent*) () > from /usr/lib64/qt4/libQtGui.so.4 > #25 0x75600a08 in KApplication::notify(QObject*, QEvent*) () > from /usr/lib64/libkdeui.so.5 > #26 0x76a9f0ad in QCoreApplication::notifyInternal(QObject*, > QEvent*) () from /usr/lib64/qt4/libQtCore.so.4 > #27 0x76aa26e8 in > QCoreApplicationPrivate::sendPostedEvents(QObject*, int, QThreadData*) > () from /usr/lib64/qt4/libQtCore.so.4 > #28 0x76acd653 in ?? () from /usr/lib64/qt4/libQtCore.so.4 > #29 0x705e4a94 in g_main_context_dispatch () from > /usr/lib64/libglib-2.0.so.0 > #30 0x705e4df0 in ?? () from /usr/lib64/libglib-2.0.so.0 > #31 0x705e4eac in g_main_context_iteration () from > /usr/lib64/libglib-2.0.so.0 > #32 0x76acd7c6 in > QEventDispatcherGlib::processEvents(QFlags) > () from /usr/lib64/qt4/libQtCore.so.4 > #33 0x74975f26 in ?? () from /usr/lib64/qt4/libQtGui.so.4 > #34 0x76a9dcef in > QEventLoop::processEvents(QFlags) () > from /usr/lib64/qt4/libQtCore.so.4 > #35 0x76a9dfd0 in > QEventLoop::exec(QFlags) () from > /usr/lib64/qt4/libQtCore.so.4 > #36 0x74d6e147 in QDialog::exec() () from /usr/lib64/qt4/libQtGui.so.4 > #37 0x0044c7f3 in Gwenview::MainWindow::showConfigDialog > (this=0xaf1e30) at /home/weber/work/gwenview/app/mainwin
Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings
The resource flags actually tell you what you can do. If the COHERENT flag is set, the mapping must be cached. If it's unset, it's up to you. If write-combining is faster for vertex uploads, then Glamor shouldn't set the coherent flag. Marek On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer wrote: > From: Michel Dänzer > > This is hopefully safe: The kernel makes sure writes to these mappings > finish before the GPU might start reading from them, and the GPU caches > are invalidated at the start of a command stream. > > Signed-off-by: Michel Dänzer > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index 40917f0..c8a0723 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -131,7 +131,7 @@ bool r600_init_resource(struct r600_common_screen > *rscreen, > res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | > PIPE_RESOURCE_FLAG_MAP_COHERENT)) { > res->domains = RADEON_DOMAIN_GTT; > - flags = 0; > + flags = RADEON_FLAG_GTT_WC; > } > > /* Tiled textures are unmappable. Always put them in VRAM. */ > -- > 2.0.0 > > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] clover: Call end_query before getting timestamp result v2
Tom Stellard writes: > v2: > - Move the end_query() call into the timestamp constructor. > - Still pass false as the wait parameter to get_query_result(). Reviewed-by: Francisco Jerez > --- > src/gallium/state_trackers/clover/core/timestamp.cpp | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/src/gallium/state_trackers/clover/core/timestamp.cpp > b/src/gallium/state_trackers/clover/core/timestamp.cpp > index 481c4f9..3fd341f 100644 > --- a/src/gallium/state_trackers/clover/core/timestamp.cpp > +++ b/src/gallium/state_trackers/clover/core/timestamp.cpp > @@ -30,6 +30,7 @@ using namespace clover; > timestamp::query::query(command_queue &q) : > q(q), > _query(q.pipe->create_query(q.pipe, PIPE_QUERY_TIMESTAMP, 0)) { > + q.pipe->end_query(q.pipe, _query); > } > > timestamp::query::query(query &&other) : > -- > 1.8.1.5 pgp6BHrKR3yXH.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 5/5] r600g, radeonsi: Prefer VRAM for persistent mappings
Like I said at patch 4, this would be okay if the COHERENT flag wasn't set. If you removed the PERSISTENT flag from the conditional, the placement of persistent non-coherent buffers would be driven by the "usage", meaning that you would be able to get any kind of placement you want. Marek On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer wrote: > From: Michel Dänzer > > Signed-off-by: Michel Dänzer > --- > src/gallium/drivers/radeon/r600_buffer_common.c | 4 +--- > 1 file changed, 1 insertion(+), 3 deletions(-) > > diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c > b/src/gallium/drivers/radeon/r600_buffer_common.c > index c8a0723..6f7fa29 100644 > --- a/src/gallium/drivers/radeon/r600_buffer_common.c > +++ b/src/gallium/drivers/radeon/r600_buffer_common.c > @@ -125,12 +125,10 @@ bool r600_init_resource(struct r600_common_screen > *rscreen, > break; > } > > - /* Use GTT for all persistent mappings, because they are > -* always cached and coherent. */ > if (res->b.b.target == PIPE_BUFFER && > res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | > PIPE_RESOURCE_FLAG_MAP_COHERENT)) { > - res->domains = RADEON_DOMAIN_GTT; > + res->domains = RADEON_DOMAIN_VRAM; > flags = RADEON_FLAG_GTT_WC; > } > > -- > 2.0.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] clover: Use 1 as default value for CL_DEVICE_PROFILING_TIMER_RESOLUTION
Tom Stellard writes: > We use PIPE_QUERY_TIMESTAMP for profiling events, and gallium specifies > that the timestamp be in nanoseconds. > --- > src/gallium/state_trackers/clover/api/device.cpp | 4 +++- > 1 file changed, 3 insertions(+), 1 deletion(-) > > diff --git a/src/gallium/state_trackers/clover/api/device.cpp > b/src/gallium/state_trackers/clover/api/device.cpp > index 1176668..25d29f5 100644 > --- a/src/gallium/state_trackers/clover/api/device.cpp > +++ b/src/gallium/state_trackers/clover/api/device.cpp > @@ -249,7 +249,9 @@ clGetDeviceInfo(cl_device_id d_dev, cl_device_info param, >break; > > case CL_DEVICE_PROFILING_TIMER_RESOLUTION: > - buf.as_scalar() = 0; > + // PIPE_QUERY_TIMESTAMP returns a timestamp in units of nanoseconds, > + // so we default to 1 here. > + buf.as_scalar() = 1; >break; I guess we should use PIPE_QUERY_TIMESTAMP_DISJOINT to calculate the timer resolution? Thanks. > > case CL_DEVICE_ENDIAN_LITTLE: > -- > 1.8.1.5 pgpo4ygcLtAqk.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT
On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer wrote: > Mesa patches: > > [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for > [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some > [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming For these 3 patches: Reviewed-by: Marek Olšák Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT
On Thu, Jul 17, 2014 at 6:01 AM, Michel Dänzer wrote: > In order to try and improve X(Shm)PutImage performance with glamor, I > implemented support for write-combined CPU mappings of BOs in GTT. > > This did provide a nice speedup, but to my surprise, using VRAM instead > of write-combined GTT turned out to be even faster in general on my > Kaveri machine, both for the internal GPU and for discrete GPUs. > > However, I've kept the changes from GTT to VRAM separated, in case this > turns out to be a loss on other setups. > > Kernel patches: > > [PATCH 1/5] drm/radeon: Remove radeon_gart_restore() > [PATCH 2/5] drm/radeon: Pass GART page flags to > [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in > [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and > [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI Applied 1-4 to my 3.17 tree. thanks! Alex > > Mesa patches: > > [PATCH 1/5] winsys/radeon: Use separate caching buffer managers for > [PATCH 2/5] r600g/radeonsi: Use write-combined CPU mappings of some > [PATCH 3/5] r600g/radeonsi: Prefer VRAM for CPU -> GPU streaming > [PATCH 4/5] r600g,radeonsi: Use write-combined persistent GTT > [PATCH 5/5] r600g,radeonsi: Prefer VRAM for persistent mappings > ___ > dri-devel mailing list > dri-de...@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/dri-devel ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, 17 Jul 2014, Eero Tamminen wrote: > While effect of unaligned accesses is normally invisible, No, the compiler is inserting padding here silently. We call this “implicit padding”. The problem with it is that this padding is architecture-dependent, and some platforms have other alignment requirements than other platforms. Take this example: struct { char c; int i; } foo; This looks like this to the programmer: ┌───┬───┬───┬───┬───┐ │ c │ i i i i │ └───┴───┴───┴───┴───┘ But it looks like this on i386: ┌───┬───┬───┬───┬───┬───┬───┬───┐ │ c │ XpaddingX │ i i i i │ └───┴───┴───┴───┴───┴───┴───┴───┘ And only like this on m68k: ┌───┬───┬───┬───┬───┬───┐ │ c │ X │ i i i i │ └───┴───┴───┴───┴───┴───┘ This is because the compiler uses the architecture’s optimal minimum alignment for “implicit” padding, to avoid the misalignment you’re talking about. On i386, access to a 32-bit quantity is fast if it’s 4-byte aligned; on m68k, 2-byte alignment is not only enough for it to be fast (4-byte would have no benefit), but is also required by the ABI. To fix this, we use explicit padding: struct { char c; char unused1[3]; int i; } foo; Now all cases look the same (except if you have a CPU which wants to align its “int”s to 64 bit…). The problem here is that the code in question uses arrays of such structs with implicit padding, and checks their sizes against its expectations. Maybe because the array is written directly to the hardware. What my patch does is to insert e̲x̲p̲l̲i̲c̲i̲t̲ padding to exactly match the i̲m̲p̲l̲i̲c̲i̲t̲ padding present on the i386 architecture, to make this the “minimum amount of padding” used. (Other architectures may still insert implicit padding, e.g. if they want their “int”s to be 64-bit aligned, but that’s outside of the scope of this, and will fail with that code anyway.) bye, //mirabilos -- [16:04:33] bkix: "veni vidi violini" [16:04:45] bkix: "ich kam, sah und vergeigte"... ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, Jul 17, 2014 at 4:21 AM, Thorsten Glaser wrote: > On Thu, 17 Jul 2014, Eero Tamminen wrote: > >> While effect of unaligned accesses is normally invisible, > > No, the compiler is inserting padding here silently. > We call this “implicit padding”. The problem with it [ strip excellent explanation of the problem ] Yes, implicit padding is definitely bad. I didn't realize I even had it in there, although I'm glad my STATIC_ASSERT did its job. There are various references to patches, however I haven't seen any. Perhaps they're lost in my inbox, or they were never sent to mesa-dev, or something else. Could the patch author (or another interested party) send it as a proper patch to mesa-dev? Assuming it doesn't break x86, happy to check it in. Cheers, -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, 2014-07-17 at 10:21 +0200, Thorsten Glaser wrote: > On Thu, 17 Jul 2014, Eero Tamminen wrote: > > > While effect of unaligned accesses is normally invisible, > > No, the compiler is inserting padding here silently. > We call this “implicit padding”. The problem with it > is that this padding is architecture-dependent, and > some platforms have other alignment requirements than > other platforms. > > Take this example: > > struct { > char c; > int i; > } foo; > > This looks like this to the programmer: > > ┌───┬───┬───┬───┬───┐ > │ c │ i i i i │ > └───┴───┴───┴───┴───┘ > > But it looks like this on i386: > > ┌───┬───┬───┬───┬───┬───┬───┬───┐ > │ c │ XpaddingX │ i i i i │ > └───┴───┴───┴───┴───┴───┴───┴───┘ > > And only like this on m68k: > > ┌───┬───┬───┬───┬───┬───┐ > │ c │ X │ i i i i │ > └───┴───┴───┴───┴───┴───┘ > > This is because the compiler uses the architecture’s optimal > minimum alignment for “implicit” padding, to avoid the misalignment > you’re talking about. On i386, access to a 32-bit quantity is fast > if it’s 4-byte aligned; on m68k, 2-byte alignment is not only enough > for it to be fast (4-byte would have no benefit), but is also required > by the ABI. > > > To fix this, we use explicit padding: > > struct { > char c; > char unused1[3]; > int i; > } foo; > > Now all cases look the same (except if you have a CPU which > wants to align its “int”s to 64 bit…). > > > The problem here is that the code in question uses arrays of > such structs with implicit padding, and checks their sizes > against its expectations. Maybe because the array is written > directly to the hardware. > > What my patch does is to insert e̲x̲p̲l̲i̲c̲i̲t̲ padding to exactly > match the i̲m̲p̲l̲i̲c̲i̲t̲ padding present on the i386 architecture, > to make this the “minimum amount of padding” used. (Other > architectures may still insert implicit padding, e.g. if > they want their “int”s to be 64-bit aligned, but that’s > outside of the scope of this, and will fail with that code > anyway.) just a question why not use __attribute__ ((aligned(X))) for explicit padding? the attached program produces the following output on my x64 machine: natural: size 8, offset c: 0, offset i: 4 explicit 8: size 16, offset c: 0, offset i: 8 explicit 2: size 8, offset c: 0, offset i: 4 and I get the same output on arm32. regards, Jan > > bye, > //mirabilos -- Jan Vesely #include #include struct foo { char c; int i; }; struct bar { char c; int i __attribute__ ((aligned (8))); }; struct baz { char c; int i __attribute__ ((aligned (2))); }; int main(void) { printf("natural: size %zu, offset c: %zu, offset i: %zu \n", sizeof(struct foo), offsetof(struct foo, c), offsetof(struct foo, i)); printf("explicit 8: size %zu, offset c: %zu, offset i: %zu \n", sizeof(struct bar), offsetof(struct bar, c), offsetof(struct bar, i)); printf("explicit 2: size %zu, offset c: %zu, offset i: %zu \n", sizeof(struct baz), offsetof(struct baz, c), offsetof(struct baz, i)); return 0; } signature.asc Description: This is a digitally signed message part ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, Jul 17, 2014 at 10:24 AM, Thorsten Glaser wrote: > On Thu, 17 Jul 2014, Ilia Mirkin wrote: > >> Yes, implicit padding is definitely bad. I didn't realize I even had >> it in there, although I'm glad my STATIC_ASSERT did its job. There are > > ;-) > >> various references to patches, however I haven't seen any. Perhaps >> they're lost in my inbox, or they were never sent to mesa-dev, or >> something else. Could the patch author (or another interested party) >> send it as a proper patch to mesa-dev? Assuming it doesn't break x86, >> happy to check it in. > > Oh fun. I’m the patch author, and it was here: > https://bugs.debian.org/cgi-bin/bugreport.cgi?msg=37;filename=mesa_10.2.3-1%2Bm68k.1.debdiff;att=1;bug=728053 > > I’ve attached the part you’re probably most interested in. Thanks! Munged the commit description a little and pushed to the master branch. I think the patches for 10.2.4 have already been selected, so it should make its way into 10.2.5. -ilia ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, 17 Jul 2014, Ilia Mirkin wrote: > Yes, implicit padding is definitely bad. I didn't realize I even had > it in there, although I'm glad my STATIC_ASSERT did its job. There are ;-) > various references to patches, however I haven't seen any. Perhaps > they're lost in my inbox, or they were never sent to mesa-dev, or > something else. Could the patch author (or another interested party) > send it as a proper patch to mesa-dev? Assuming it doesn't break x86, > happy to check it in. Oh fun. I’m the patch author, and it was here: https://bugs.debian.org/cgi-bin/bugreport.cgi?msg=37;filename=mesa_10.2.3-1%2Bm68k.1.debdiff;att=1;bug=728053 I’ve attached the part you’re probably most interested in. Thanks, //mirabilos -- Sometimes they [people] care too much: pretty printers [and syntax highligh- ting, d.A.] mechanically produce pretty output that accentuates irrelevant detail in the program, which is as sensible as putting all the prepositions in English text in bold font. -- Rob Pike in "Notes on Programming in C"From: Thorsten Glaser ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
On Thu, 17 Jul 2014, Jan Vesely wrote: > why not use __attribute__ ((aligned(X))) for explicit padding? That’s ① GCC-specific and ② relies on environmental guarantees that cannot always be given (e.g. you cannot align a struct more than the stack alignment if it is ever passed on the stack; for static or auto storage, it relies on the linker; for malloc, it relies on libc). The approach here is not to make the struct aligned, but rather, to make it consistent within itself. In a portable way, even (although we do use GCC 4.9 on Debian/m68k). bye, //mirabilos -- «MyISAM tables -will- get corrupted eventually. This is a fact of life. » “mysql is about as much database as ms access” – “MSSQL at least descends from a database” “it's a rebranded SyBase” “MySQL however was born from a flatfile and went downhill from there” – “at least jetDB doesn’t claim to be a database” ‣‣‣ Please, http://deb.li/mysql and MariaDB, finally die! ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Updated debdiff for mesa to compile on m68k
Hi Ilia! On 07/17/2014 04:35 PM, Ilia Mirkin wrote: > Thanks! Munged the commit description a little and pushed to the > master branch. I think the patches for 10.2.4 have already been > selected, so it should make its way into 10.2.5. Awesome, thank you so much! Glad we could finally this one :). Cheers, Adrian -- .''`. John Paul Adrian Glaubitz : :' : Debian Developer - glaub...@debian.org `. `' Freie Universitaet Berlin - glaub...@physik.fu-berlin.de `-GPG: 62FF 8A75 84E0 2956 9546 0006 7426 3B37 F5B5 F913 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/2] Make a llvmpipe context basically thread safe.
Hi Mathias, Thanks for doing this. Patch 2 looks great. For patch 1 I'd prefer that instead of keeping a global list of contexts, these are passed by the caller as argument to gallivm_create() . It will be a more invasive change, but it will be cleaner. And in particular it will garantee that when llvmpipe contexts are destroyed there will be no lingering llvm contexts neither. Jose On 13/07/14 17:13, Mathias Fröhlich wrote: Hi Jose, This makes llvmpipe thread safe as mandated by the OpenGL standard. The changes replace the use of two global data structures with non global ones. The changes pass piglit as of today without regressions. The patchset deviates form your last suggestion. But lacking sufficient time I just went ahead and implemented something that fixes this problem preserving hopefully enough of the memory savings the shared ShaderMemoryManager initially provided. The more extended solution with a single jit memory manager per GL context could be implemented on top of this change. Please review! Mathias Mathias Fröhlich (2): llvmpipe: Pool the LLVMContexts in use. llvmpipe: Make a llvmpipe OpenGL context thread safe. src/gallium/auxiliary/gallivm/lp_bld_init.c | 87 --- src/gallium/auxiliary/gallivm/lp_bld_init.h | 1 + src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 38 ++-- src/gallium/auxiliary/gallivm/lp_bld_misc.h | 3 + 4 files changed, 100 insertions(+), 29 deletions(-) ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.
According to spec (OpenGL 4.0 specification, pages 254-255) we have a different bits set for one buffer and for multiple buffers. For glDrawBuffer we may have up to four bits set but for glDrawBuffers we can only have one bit set. The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL arguments when _mesa_update_framebuffer or _mesa_update_draw_buffers is called. In this situation realization for glDrawBuffers is used for any number of buffers. Even for one. But glDrawBuffer have to be used for one buffer instead of glDrawBuffers. Piglit test 'gl30basic' fails with assert with debug Mesa and pass with release 'main/buffers.c:520: _mesa_drawbuffers: Assertion `__builtin_popcount(destMask[buf]) == 1' failed.' Probably some other tests also can be affected. Signed-off-by: Pavel Popov --- src/mesa/main/buffers.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index b13a7af..a640360 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, struct gl_framebuffer *fb = ctx->DrawBuffer; GLbitfield mask[MAX_DRAW_BUFFERS]; GLuint buf; + GLuint m = n; if (!destMask) { /* compute destMask values now */ @@ -489,15 +490,17 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]); ASSERT(mask[output] != BAD_MASK); mask[output] &= supportedMask; + if (mask[output] == 0) +m--; } destMask = mask; } /* -* If n==1, destMask[0] may have up to four bits set. +* If m==1, destMask[0] may have up to four bits set. * Otherwise, destMask[x] can only have one bit set. */ - if (n == 1) { + if (m == 1) { GLuint count = 0, destMask0 = destMask[0]; while (destMask0) { GLint bufIndex = ffs(destMask0) - 1; -- 1.9.1 Closed Joint Stock Company Intel A/O Registered legal address: Krylatsky Hills Business Park, 17 Krylatskaya Str., Bldg 4, Moscow 121614, Russian Federation This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders
Accuracy of some operations was recently improved in the R600 backend, at the cost of slower code. This is required for compute shaders, but not for graphics shaders. Add unsafe-fp-math hint to make LLVM generate faster but possibly less accurate code. Piglit didn't indicate any regressions. --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 1b17dd4..171ccaa 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -26,6 +26,7 @@ #include "radeon_llvm_emit.h" #include "radeon_elf_util.h" #include "util/u_memory.h" +#include "pipe/p_shader_tokens.h" #include #include @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) sprintf(Str, "%1d", type); LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); + + if (type != TGSI_PROCESSOR_COMPUTE) { +LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true"); + } } static void init_r600_target() { -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] radeon/llvm: fix formatting
Use K&R and same indent as most other code. No functional change intended. --- src/gallium/drivers/radeon/radeon_llvm_emit.c | 24 ++-- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c index 171ccaa..53694b7 100644 --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c @@ -47,17 +47,18 @@ */ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) { - char Str[2]; - sprintf(Str, "%1d", type); + char Str[2]; + sprintf(Str, "%1d", type); - LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); + LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); - if (type != TGSI_PROCESSOR_COMPUTE) { -LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true"); - } + if (type != TGSI_PROCESSOR_COMPUTE) { + LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true"); + } } -static void init_r600_target() { +static void init_r600_target() +{ static unsigned initialized = 0; if (!initialized) { LLVMInitializeR600TargetInfo(); @@ -68,7 +69,8 @@ static void init_r600_target() { } } -static LLVMTargetRef get_r600_target() { +static LLVMTargetRef get_r600_target() +{ LLVMTargetRef target = NULL; for (target = LLVMGetFirstTarget(); target; @@ -87,7 +89,8 @@ static LLVMTargetRef get_r600_target() { #if HAVE_LLVM >= 0x0305 -static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { +static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) +{ if (LLVMGetDiagInfoSeverity(di) == LLVMDSError) { unsigned int *diagnosticflag = (unsigned int *)context; char *diaginfo_message = LLVMGetDiagInfoDescription(di); @@ -106,7 +109,8 @@ static void radeonDiagnosticHandler(LLVMDiagnosticInfoRef di, void *context) { * @returns 0 for success, 1 for failure */ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary, - const char * gpu_family, unsigned dump) { + const char *gpu_family, unsigned dump) +{ LLVMTargetRef target; LLVMTargetMachineRef tm; -- 1.8.3.2 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 80848] [dri3] Building mesa fails with dri3 enabled
https://bugs.freedesktop.org/show_bug.cgi?id=80848 --- Comment #8 from Emil Velikov --- Juha-Pekka, Bryan Can you guys test the patch in comment 7 ? I feel slightly reluctant about breaking my system in order to test it myself. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/12] main/formats: Remove IndexBits
Mesa hasn't supported color-indexed textures for some time. This is 0 for all texture formats, so we don't need to store it. Signed-off-by: Jason Ekstrand --- src/mesa/main/format_info.py | 2 +- src/mesa/main/formats.c | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py index 9b63bfb..b8956a5 100644 --- a/src/mesa/main/format_info.py +++ b/src/mesa/main/format_info.py @@ -171,7 +171,7 @@ for fmat in formats: bits = [ get_channel_bits(fmat, name) for name in ['r', 'g', 'b', 'a']] print ' {0},'.format(', '.join(map(str, bits))) - bits = [ get_channel_bits(fmat, name) for name in ['l', 'i', 'I', 'z', 's']] + bits = [ get_channel_bits(fmat, name) for name in ['l', 'i', 'z', 's']] print ' {0},'.format(', '.join(map(str, bits))) print ' {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height, diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index e237064..39cc5f1 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -59,7 +59,6 @@ struct gl_format_info GLubyte AlphaBits; GLubyte LuminanceBits; GLubyte IntensityBits; - GLubyte IndexBits; GLubyte DepthBits; GLubyte StencilBits; @@ -145,7 +144,7 @@ _mesa_get_format_bits(mesa_format format, GLenum pname) case GL_TEXTURE_LUMINANCE_SIZE: return info->LuminanceBits; case GL_INDEX_BITS: - return info->IndexBits; + return 0; case GL_DEPTH_BITS: case GL_TEXTURE_DEPTH_SIZE_ARB: case GL_RENDERBUFFER_DEPTH_SIZE_EXT: -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/12] main: Add a format description CSV file
Signed-off-by: Jason Ekstrand --- src/mesa/main/formats.csv | 282 ++ 1 file changed, 282 insertions(+) create mode 100644 src/mesa/main/formats.csv diff --git a/src/mesa/main/formats.csv b/src/mesa/main/formats.csv new file mode 100644 index 000..5abb706 --- /dev/null +++ b/src/mesa/main/formats.csv @@ -0,0 +1,282 @@ +### +# +# Copyright 2009-2010 VMware, Inc. +# Copyright 2014 Intel Corporation +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +### + +# This CSV file has the input data for gen_format.h and gen_format.c +# +# Each format entry contains: +# - name, per enum mesa_format +# - layout +# - pixel block's width +# - pixel block's height +# - channel encoding (only meaningful for array or packed layout), containing for each +# channel the following information: +# - type, one of +# - 'x': void +# - 'u': unsigned +# - 's': signed +# - 'h': fixed +# - 'f': FLOAT +# - optionally followed by 'n' if it is normalized +# - number of bits +# - channel swizzle +# - color space: rgb, srgb, yub, sz + +# None +# Described as regular uint_8 bytes, i.e. MESA_FORMAT_R8_USCALED +MESA_FORMAT_NONE , other , 1, 1, x8 , , , , 0001, rgb + +# Packed unorm formats +MESA_FORMAT_A8B8G8R8_UNORM, packed, 1, 1, un8 , un8 , un8 , un8 , wzyx, rgb +MESA_FORMAT_X8B8G8R8_UNORM, packed, 1, 1, x8 , un8 , un8 , un8 , wzy1, rgb +MESA_FORMAT_R8G8B8A8_UNORM, packed, 1, 1, un8 , un8 , un8 , un8 , xyzw, rgb +MESA_FORMAT_R8G8B8X8_UNORM, packed, 1, 1, un8 , un8 , un8 , x8 , xyz1, rgb +MESA_FORMAT_B8G8R8A8_UNORM, packed, 1, 1, un8 , un8 , un8 , un8 , zyxw, rgb +MESA_FORMAT_B8G8R8X8_UNORM, packed, 1, 1, un8 , un8 , un8 , x8 , zyx1, rgb +MESA_FORMAT_A8R8G8B8_UNORM, packed, 1, 1, un8 , un8 , un8 , un8 , yzwx, rgb +MESA_FORMAT_X8R8G8B8_UNORM, packed, 1, 1, x8 , un8 , un8 , un8 , yzw1, rgb +MESA_FORMAT_L16A16_UNORM , packed, 1, 1, un16, un16, , , xxxy, rgb +MESA_FORMAT_A16L16_UNORM , packed, 1, 1, un16, un16, , , yyyx, rgb +MESA_FORMAT_B5G6R5_UNORM , packed, 1, 1, un5 , un6 , un5 , , zyx1, rgb +MESA_FORMAT_R5G6B5_UNORM , packed, 1, 1, un5 , un6 , un5 , , xyz1, rgb +MESA_FORMAT_B4G4R4A4_UNORM, packed, 1, 1, un4 , un4 , un4 , un4 , zyxw, rgb +MESA_FORMAT_B4G4R4X4_UNORM, packed, 1, 1, un4 , un4 , un4 , x4 , zyx1, rgb +MESA_FORMAT_A4R4G4B4_UNORM, packed, 1, 1, un4 , un4 , un4 , un4 , yzwx, rgb +MESA_FORMAT_A1B5G5R5_UNORM, packed, 1, 1, un1 , un5 , un5 , un5 , wzyx, rgb +MESA_FORMAT_B5G5R5A1_UNORM, packed, 1, 1, un5 , un5 , un5 , un1 , zyxw, rgb +MESA_FORMAT_B5G5R5X1_UNORM, packed, 1, 1, un5 , un5 , un5 , x1 , zyx1, rgb +MESA_FORMAT_A1R5G5B5_UNORM, packed, 1, 1, un1 , un5 , un5 , un5 , yzwx, rgb +MESA_FORMAT_L8A8_UNORM, packed, 1, 1, un8 , un8 , , , xxxy, rgb +MESA_FORMAT_A8L8_UNORM, packed, 1, 1, un8 , un8 , , , yyyx, rgb +MESA_FORMAT_R8G8_UNORM, packed, 1, 1, un8 , un8 , , , xy01, rgb +MESA_FORMAT_G8R8_UNORM, packed, 1, 1, un8 , un8 , , , yx01, rgb +MESA_FORMAT_L4A4_UNORM, packed, 1, 1, un4 , un4 , , , xxxy, rgb + +MESA_FORMAT_B2G3R3_UNORM , packed, 1, 1, un2 , un3 , un3 , , zyx1, rgb +MESA_FORMAT_R16G16_UNORM , packed, 1, 1, un16, un16, , , xy01, rgb +MESA_FORMAT_G16R16_UNORM
[Mesa-dev] [PATCH 04/12] main/formats: Add layout and swizzle information
Signed-off-by: Jason Ekstrand --- src/mesa/main/format_info.py | 11 +++ src/mesa/main/formats.c | 46 src/mesa/main/formats.h | 29 3 files changed, 86 insertions(+) diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py index b8956a5..448bd00 100644 --- a/src/mesa/main/format_info.py +++ b/src/mesa/main/format_info.py @@ -96,6 +96,14 @@ def get_gl_data_type(fmat): else: assert False +def get_mesa_layout(fmat): + if fmat.layout == 'array': + return 'MESA_FORMAT_LAYOUT_ARRAY' + elif fmat.layout == 'packed': + return 'MESA_FORMAT_LAYOUT_PACKED' + else: + return 'MESA_FORMAT_LAYOUT_OTHER' + def get_channel_bits(fmat, chan_name): if fmat.is_compressed(): # These values are pretty-much bogus, but OpenGL requires that we @@ -166,6 +174,7 @@ for fmat in formats: print ' {' print ' {0},'.format(fmat.name) print ' "{0}",'.format(fmat.name) + print ' {0},'.format(get_mesa_layout(fmat)) print ' {0},'.format(get_gl_base_format(fmat)) print ' {0},'.format(get_gl_data_type(fmat)) @@ -176,6 +185,8 @@ for fmat in formats: print ' {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height, int(fmat.block_size() / 8)) + + print ' {{ {0} }},'.format(', '.join(map(str, fmat.swizzle))) print ' },' print '};' diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c index 39cc5f1..f03425e 100644 --- a/src/mesa/main/formats.c +++ b/src/mesa/main/formats.c @@ -40,6 +40,8 @@ struct gl_format_info /** text name for debugging */ const char *StrName; + enum mesa_format_layout Layout; + /** * Base format is one of GL_RED, GL_RG, GL_RGB, GL_RGBA, GL_ALPHA, * GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_INTENSITY, GL_YCBCR_MESA, @@ -67,6 +69,8 @@ struct gl_format_info */ GLubyte BlockWidth, BlockHeight; GLubyte BytesPerBlock; + + uint8_t Swizzle[4]; }; #include "format_info.c" @@ -178,6 +182,21 @@ _mesa_get_format_max_bits(mesa_format format) /** + * Return the layout type of the given format. + * The return value will be one of: + *MESA_FORMAT_LAYOUT_ARRAY + *MESA_FORMAT_LAYOUT_PACKED + *MESA_FORMAT_LAYOUT_OTHER + */ +extern enum mesa_format_layout +_mesa_get_format_layout(mesa_format format) +{ + const struct gl_format_info *info = _mesa_get_format_info(format); + return info->Layout; +} + + +/** * Return the data type (or more specifically, the data representation) * for the given format. * The return value will be one of: @@ -224,6 +243,33 @@ _mesa_get_format_block_size(mesa_format format, GLuint *bw, GLuint *bh) } +/** + * Returns the an array of four numbers representing the transformation + * from the RGBA or SZ colorspace to the given format. For array formats, + * the i'th RGBA component is given by: + * + * if (swizzle[i] <= MESA_FORMAT_SWIZZLE_W) + *comp = data[swizzle[i]]; + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ZERO) + *comp = 0; + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ONE) + *comp = 1; + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_NONE) + *// data does not contain a channel of this format + * + * For packed formats, the swizzle gives the number of components left of + * the least significant bit. + * + * Compressed formats have no swizzle. + */ +void +_mesa_get_format_swizzle(mesa_format format, uint8_t swizzle_out[4]) +{ + const struct gl_format_info *info = _mesa_get_format_info(format); + memcpy(swizzle_out, info->Swizzle, sizeof(info->Swizzle)); +} + + /** Is the given format a compressed format? */ GLboolean _mesa_is_format_compressed(mesa_format format) diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h index dc50bc8..48aad44 100644 --- a/src/mesa/main/formats.h +++ b/src/mesa/main/formats.h @@ -56,6 +56,15 @@ extern "C" { */ #define MAX_PIXEL_BYTES 16 +/** + * Specifies the layout of a pixel format. See the MESA_FORMAT + * documentation below. + */ +enum mesa_format_layout { + MESA_FORMAT_LAYOUT_ARRAY, + MESA_FORMAT_LAYOUT_PACKED, + MESA_FORMAT_LAYOUT_OTHER, +}; /** * Mesa texture/renderbuffer image formats. @@ -419,6 +428,9 @@ _mesa_get_format_bits(mesa_format format, GLenum pname); extern GLuint _mesa_get_format_max_bits(mesa_format format); +extern enum mesa_format_layout +_mesa_get_format_layout(mesa_format format); + extern GLenum _mesa_get_format_datatype(mesa_format format); @@ -428,6 +440,23 @@ _mesa_get_format_base_format(mesa_format format); extern void _mesa_get_format_block_size(mesa_format format, GLuint *bw, GLuint *bh); +/** + * An enum representing different possible swizzling values. This is used + * to interpret the output of _mesa_get_format_swizzle + */ +enum { + MESA_FORMAT_SWIZZLE_X = 0, + MESA_FORMAT_SWIZZLE_Y = 1, + MESA_FORMAT_SWIZZLE_Z = 2, + MESA_FORMAT_SW
[Mesa-dev] [PATCH 00/12] Rework texture upload code
This is the first installment of some work I've been doing over the past couple of weeks to refactor mesa's texture conversion/storage code. There is more to be done and more that I have done but have not included in this series. This is the first mailing-list-ready fruits of my efforts. The important bits here include: 1) Using a human-readable CSV file to describe texture formats similar to the way it is currently don in gallium. This is much easier to read/edit than the structure in formats.c. The guts of formats.c is then autogenerated from this CSV file. 2) Adding a very generic yet efficient _mesa_swizzle_and_convert function that is capable of performing the vast majority of texture format conversions in one function. It has also been fairly carefully tuned to be even faster than the _mesa_swizzle_ubyte_image special-case that we had before for ubyte textures only it also works on the other datatypes and can even do type conversions as it swizzles. 3) Refactoring of texstore.c including the use of the above _mesa_swizzle_and_convert function along with the already-existing packing functions to remove a lot of hand-written special-case code. Thanks to the format CSV file, there's even more that we can now do. Things I hope to accomplish in the future include: 1) Autogenerate the bulk of main/format_pack.c, main/format_unpack.c, and main/pack.c from CSV files. There's some refactoring that will be required first, but it shouldn't be that hard and I already have the python code to do the generation; it's just not part of this patch series. 2) Find a general way to do depth-stencil formats. I'm a bit dubious as to whether or not this will turn out to be practical, but I haven't had a chance to look into it too much yet. 2) Do similar refactors for GetTexImage, ReadPixels, and DrawPixels. Happy Reviewing! --Jason Ekstrand Jason Ekstrand (12): main: Add a format description CSV file main/formats: Autogenerate the format_info structure from a CSV file main/formats: Remove IndexBits main/formats: Add layout and swizzle information main: Gather some common format conversion functions into a single format_utils file main/format_utils: Add a general format conversion function main/format_utils: Add a function for determining if a format is actually an array format and computing the array format parameters main/texstore: Split texture storage into three functions: texstore_depth_stencil, texstore_compressed, and texstore_rgba main/texstore: Use _mesa_swizzle_and_convert when possible main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM main/texstore: Add a general texture upload path for rgba floating-point and normalized textures mesa/texstore: Add a generic rgba integer texture upload path src/mesa/Makefile.am | 10 + src/mesa/Makefile.sources|1 + src/mesa/main/.gitignore |1 + src/mesa/main/format_info.py | 192 +++ src/mesa/main/format_pack.c | 104 +- src/mesa/main/format_parser.py | 406 + src/mesa/main/format_unpack.c| 69 +- src/mesa/main/format_unpack.h|3 - src/mesa/main/format_utils.c | 715 + src/mesa/main/format_utils.h | 90 ++ src/mesa/main/formats.c | 1786 + src/mesa/main/formats.csv| 282 src/mesa/main/formats.h | 29 + src/mesa/main/texcompress_etc.c | 20 +- src/mesa/main/texcompress_s3tc.c | 26 +- src/mesa/main/texstore.c | 3172 +++--- 16 files changed, 2412 insertions(+), 4494 deletions(-) create mode 100644 src/mesa/main/format_info.py create mode 100755 src/mesa/main/format_parser.py create mode 100644 src/mesa/main/format_utils.c create mode 100644 src/mesa/main/format_utils.h create mode 100644 src/mesa/main/formats.csv -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/12] main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM
Before it was only storing one of the color components due to truncation. With this patch it now properly stores all of them. Signed-off-by: Jason Ekstrand --- src/mesa/main/format_pack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c index fb3feb5..4b52405 100644 --- a/src/mesa/main/format_pack.c +++ b/src/mesa/main/format_pack.c @@ -856,7 +856,7 @@ pack_float_R_UNORM8(const GLfloat src[4], void *dst) static void pack_ubyte_R8G8_UNORM(const GLubyte src[4], void *dst) { - GLubyte *d = ((GLubyte *) dst); + GLushort *d = ((GLushort *) dst); *d = PACK_COLOR_88(src[GCOMP], src[RCOMP]); } -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/12] main/format_utils: Add a general format conversion function
Most format conversion operations required by GL can be performed by converting one channel at a time, shuffling the channels around, and optionally filling missing channels with zeros and ones. This adds a function to do just that in a general, yet efficient, way. Signed-off-by: Jason Ekstrand --- src/mesa/main/format_utils.c | 566 +++ src/mesa/main/format_utils.h | 18 ++ 2 files changed, 584 insertions(+) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 241c158..0cb3eae 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -54,3 +54,569 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl) return lut[cl]; } + +static bool +swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels, + const void *src, GLenum src_type, int num_src_channels, + const uint8_t swizzle[4], bool normalized, int count) +{ + int i; + + if (src_type != dst_type) + return false; + if (num_src_channels != num_dst_channels) + return false; + + for (i = 0; i < num_dst_channels; ++i) + if (swizzle[i] != i && swizzle[i] != MESA_FORMAT_SWIZZLE_NONE) + return false; + + memcpy(dst, src, count * num_src_channels * _mesa_sizeof_type(src_type)); + + return true; +} + +/* Note: This loop is carefully crafted for performance. Be careful when + * changing it and run some benchmarks to ensure no performance regressions + * if you do. + */ +#define SWIZZLE_CONVERT_LOOP(DST_TYPE, SRC_TYPE, CONV) \ + do { \ + const SRC_TYPE *typed_src = void_src; \ + DST_TYPE *typed_dst = void_dst;\ + DST_TYPE tmp[7]; \ + tmp[4] = 0;\ + tmp[5] = one; \ + for (s = 0; s < count; ++s) { \ + for (j = 0; j < num_src_channels; ++j) {\ +SRC_TYPE src = typed_src[j]; \ +tmp[j] = CONV; \ + } \ + \ + typed_dst[0] = tmp[swizzle_x]; \ + if (num_dst_channels > 1) { \ +typed_dst[1] = tmp[swizzle_y]; \ +if (num_dst_channels > 2) { \ + typed_dst[2] = tmp[swizzle_z];\ + if (num_dst_channels > 3) { \ + typed_dst[3] = tmp[swizzle_w]; \ + } \ +}\ + } \ + typed_src += num_src_channels; \ + typed_dst += num_dst_channels; \ + } \ + } while (0); + +/** + * Convert between array-based color formats. + * + * Most format conversion operations required by GL can be performed by + * converting one channel at a time, shuffling the channels around, and + * optionally filling missing channels with zeros and ones. This function + * does just that in a general, yet efficient, way. + * + * Most of the parameters are self-explanitory. The swizzle parameter is + * an array of 4 numbers (see _mesa_get_format_swizzle) that describes + * where each channel in the destination should come from in the source. + * + * Under most circumstances, the source and destination images must be + * different as no care is taken not to clobber one with the other. + * However, if they have the same number of bits per pixel, it is safe to + * do an in-place conversion. + */ +void +_mesa_swizzle_and_convert(void *void_dst, GLenum dst_type, int num_dst_channels, + const void *void_src, GLenum src_type, int num_src_channels, + const uint8_t swizzle[4], bool normalized, int count) +{ + int s, j; + register uint8_t swizzle_x, swizzle_y, swizzle_z, swizzle_w; + + if (swizzle_convert_try_memcpy(void_dst, dst_type, num_dst_channels, + void_src, src_type, num_src_channels, + swizzle, normalized, count)) + return; + + swizzle_x = swizzle[0]; + swizzle_y = swizzle[1]; + swizzle_z = swizzle[2]; + swizzle_w = swizzle[3]; + + switch (dst_type) { + case GL_FLOAT: + { + const float one = 1.0f; + switch (src_type) { + case GL_FLOAT: + SWIZZLE_CONVERT_LOOP(float, float, src) + break; + case GL_HALF_FLOAT: + SWIZZLE_CONVERT_LOOP(float, uint16_t, _mesa_half_to_float(src)) + break; + case GL_UNSIGNED_BYTE: + if (normalized) { +SWIZZLE
[Mesa-dev] [PATCH 08/12] main/texstore: Split texture storage into three functions: texstore_depth_stencil, texstore_compressed, and texstore_rgba
Signed-off-by: Jason Ekstrand --- src/mesa/main/texstore.c | 171 +++ 1 file changed, 100 insertions(+), 71 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index d363f9f..e1f2284 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -3540,35 +3540,95 @@ _mesa_texstore_abgr2101010(TEXSTORE_PARAMS) return GL_TRUE; } + static GLboolean -_mesa_texstore_null(TEXSTORE_PARAMS) +texstore_depth_stencil(TEXSTORE_PARAMS) { - (void) ctx; (void) dims; - (void) baseInternalFormat; - (void) dstFormat; - (void) dstRowStride; (void) dstSlices, - (void) srcWidth; (void) srcHeight; (void) srcDepth; - (void) srcFormat; (void) srcType; - (void) srcAddr; - (void) srcPacking; - - /* should never happen */ - _mesa_problem(NULL, "_mesa_texstore_null() is called"); - return GL_FALSE; + static StoreTexImageFunc table[MESA_FORMAT_COUNT]; + static GLboolean initialized = GL_FALSE; + + if (!initialized) { + memset(table, 0, sizeof table); + + table[MESA_FORMAT_S8_UINT_Z24_UNORM] = _mesa_texstore_z24_s8; + table[MESA_FORMAT_Z24_UNORM_S8_UINT] = _mesa_texstore_s8_z24; + table[MESA_FORMAT_Z_UNORM16] = _mesa_texstore_z16; + table[MESA_FORMAT_Z24_UNORM_X8_UINT] = _mesa_texstore_x8_z24; + table[MESA_FORMAT_X8_UINT_Z24_UNORM] = _mesa_texstore_z24_x8; + table[MESA_FORMAT_Z_UNORM32] = _mesa_texstore_z32; + table[MESA_FORMAT_S_UINT8] = _mesa_texstore_s8; + table[MESA_FORMAT_Z_FLOAT32] = _mesa_texstore_z32; + table[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = _mesa_texstore_z32f_x24s8; + + initialized = GL_TRUE; + } + + ASSERT(table[dstFormat]); + return table[dstFormat](ctx, dims, baseInternalFormat, + dstFormat, dstRowStride, dstSlices, + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, srcPacking); } +static GLboolean +texstore_compressed(TEXSTORE_PARAMS) +{ + static StoreTexImageFunc table[MESA_FORMAT_COUNT]; + static GLboolean initialized = GL_FALSE; + + if (!initialized) { + memset(table, 0, sizeof table); -/** - * Return the StoreTexImageFunc pointer to store an image in the given format. - */ -static StoreTexImageFunc -_mesa_get_texstore_func(mesa_format format) + table[MESA_FORMAT_SRGB_DXT1] = _mesa_texstore_rgb_dxt1; + table[MESA_FORMAT_SRGBA_DXT1] = _mesa_texstore_rgba_dxt1; + table[MESA_FORMAT_SRGBA_DXT3] = _mesa_texstore_rgba_dxt3; + table[MESA_FORMAT_SRGBA_DXT5] = _mesa_texstore_rgba_dxt5; + table[MESA_FORMAT_RGB_FXT1] = _mesa_texstore_rgb_fxt1; + table[MESA_FORMAT_RGBA_FXT1] = _mesa_texstore_rgba_fxt1; + table[MESA_FORMAT_RGB_DXT1] = _mesa_texstore_rgb_dxt1; + table[MESA_FORMAT_RGBA_DXT1] = _mesa_texstore_rgba_dxt1; + table[MESA_FORMAT_RGBA_DXT3] = _mesa_texstore_rgba_dxt3; + table[MESA_FORMAT_RGBA_DXT5] = _mesa_texstore_rgba_dxt5; + table[MESA_FORMAT_R_RGTC1_UNORM] = _mesa_texstore_red_rgtc1; + table[MESA_FORMAT_R_RGTC1_SNORM] = _mesa_texstore_signed_red_rgtc1; + table[MESA_FORMAT_RG_RGTC2_UNORM] = _mesa_texstore_rg_rgtc2; + table[MESA_FORMAT_RG_RGTC2_SNORM] = _mesa_texstore_signed_rg_rgtc2; + table[MESA_FORMAT_L_LATC1_UNORM] = _mesa_texstore_red_rgtc1; + table[MESA_FORMAT_L_LATC1_SNORM] = _mesa_texstore_signed_red_rgtc1; + table[MESA_FORMAT_LA_LATC2_UNORM] = _mesa_texstore_rg_rgtc2; + table[MESA_FORMAT_LA_LATC2_SNORM] = _mesa_texstore_signed_rg_rgtc2; + table[MESA_FORMAT_ETC1_RGB8] = _mesa_texstore_etc1_rgb8; + table[MESA_FORMAT_ETC2_RGB8] = _mesa_texstore_etc2_rgb8; + table[MESA_FORMAT_ETC2_SRGB8] = _mesa_texstore_etc2_srgb8; + table[MESA_FORMAT_ETC2_RGBA8_EAC] = _mesa_texstore_etc2_rgba8_eac; + table[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = _mesa_texstore_etc2_srgb8_alpha8_eac; + table[MESA_FORMAT_ETC2_R11_EAC] = _mesa_texstore_etc2_r11_eac; + table[MESA_FORMAT_ETC2_RG11_EAC] = _mesa_texstore_etc2_rg11_eac; + table[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = _mesa_texstore_etc2_signed_r11_eac; + table[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = _mesa_texstore_etc2_signed_rg11_eac; + table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] = + _mesa_texstore_etc2_rgb8_punchthrough_alpha1; + table[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] = + _mesa_texstore_etc2_srgb8_punchthrough_alpha1; + + initialized = GL_TRUE; + } + + ASSERT(table[dstFormat]); + return table[dstFormat](ctx, dims, baseInternalFormat, + dstFormat, dstRowStride, dstSlices, + srcWidth, srcHeight, srcDepth, + srcFormat, srcType, srcAddr, srcPacking); +} + +static GLboolean +texstore_rgba(TEXSTORE_PARAMS) { static StoreTexImageFunc table[MESA_FORMAT_COUNT]; static GLboolean initialized = GL_FALSE; if (!initialized) { - table[MESA_FORMAT_
[Mesa-dev] [PATCH 07/12] main/format_utils: Add a function for determining if a format is actually an array format and computing the array format parameters
This is a direct helper function for using _mesa_swizzle_and_convert Signed-off-by: Jason Ekstrand --- src/mesa/main/format_utils.c | 93 src/mesa/main/format_utils.h | 4 ++ 2 files changed, 97 insertions(+) diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c index 0cb3eae..b9c7a54 100644 --- a/src/mesa/main/format_utils.c +++ b/src/mesa/main/format_utils.c @@ -55,6 +55,99 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl) return lut[cl]; } +static const uint8_t map_identity[7] = { 0, 1, 2, 3, 4, 5, 6 }; +static const uint8_t map_3210[7] = { 3, 2, 1, 0, 4, 5, 6 }; +static const uint8_t map_1032[7] = { 1, 0, 3, 2, 4, 5, 6 }; + +/** + * A helper function for figuring out if a (possibly packed) format is + * actually an array format and how to work with it. If the format can not + * be used as an array format, thus function returns false. + */ +bool +_mesa_format_to_array(mesa_format format, GLenum *type, int *num_components, + uint8_t swizzle[4], bool *normalized) +{ + int i; + GLuint format_components; + uint8_t packed_swizzle[4]; + const uint8_t *endian; + + if (_mesa_is_format_compressed(format)) + return false; + + *normalized = !_mesa_is_format_integer(format); + + _mesa_format_to_type_and_comps(format, type, &format_components); + switch (_mesa_get_format_layout(format)) { + case MESA_FORMAT_LAYOUT_ARRAY: + *num_components = format_components; + _mesa_get_format_swizzle(format, swizzle); + return true; + case MESA_FORMAT_LAYOUT_PACKED: + switch (*type) { + case GL_UNSIGNED_BYTE: + case GL_BYTE: + if (_mesa_get_format_max_bits(format) != 8) +return false; + *num_components = _mesa_get_format_bytes(format); + switch (*num_components) { + case 1: +endian = map_identity; +break; + case 2: +endian = _mesa_little_endian() ? map_identity : map_1032; +break; + case 4: +endian = _mesa_little_endian() ? map_identity : map_3210; +break; + default: +assert(!"Invalid number of components"); + } + break; + case GL_UNSIGNED_SHORT: + case GL_SHORT: + case GL_HALF_FLOAT: + if (_mesa_get_format_max_bits(format) != 16) +return false; + *num_components = _mesa_get_format_bytes(format) / 2; + switch (*num_components) { + case 1: +endian = map_identity; +break; + case 2: +endian = _mesa_little_endian() ? map_identity : map_1032; +break; + default: +assert(!"Invalid number of components"); + } + break; + case GL_UNSIGNED_INT: + case GL_INT: + case GL_FLOAT: + /* This isn't packed. At least not really. */ + assert(format_components == 1); + if (_mesa_get_format_max_bits(format) != 32) +return false; + *num_components = format_components; + endian = map_identity; + break; + default: + return false; + } + + _mesa_get_format_swizzle(format, packed_swizzle); + + for (i = 0; i < 4; ++i) + swizzle[i] = endian[packed_swizzle[i]]; + + return true; + case MESA_FORMAT_LAYOUT_OTHER: + default: + return false; + } +} + static bool swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels, const void *src, GLenum src_type, int num_src_channels, diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h index c5dab7b..990c3f2 100644 --- a/src/mesa/main/format_utils.h +++ b/src/mesa/main/format_utils.h @@ -78,6 +78,10 @@ _mesa_srgb_to_linear(float cs) float _mesa_srgb_ubyte_to_linear_float(uint8_t cl); +bool +_mesa_format_to_array(mesa_format, GLenum *type, int *num_components, + uint8_t swizzle[4], bool *normalized); + void _mesa_swizzle_and_convert(void *dst, GLenum dst_type, int num_dst_channels, const void *src, GLenum src_type, int num_src_channels, -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/12] main: Gather some common format conversion functions into a single format_utils file
This initial commit puts all of the RGB <-> sRGB conversion functions in one place. Signed-off-by: Jason Ekstrand --- src/mesa/Makefile.sources| 1 + src/mesa/main/format_pack.c | 102 ++- src/mesa/main/format_unpack.c| 69 -- src/mesa/main/format_unpack.h| 3 -- src/mesa/main/format_utils.c | 56 + src/mesa/main/format_utils.h | 68 ++ src/mesa/main/texcompress_etc.c | 20 src/mesa/main/texcompress_s3tc.c | 26 +- 8 files changed, 204 insertions(+), 141 deletions(-) create mode 100644 src/mesa/main/format_utils.c create mode 100644 src/mesa/main/format_utils.h diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index f4904fb..a261113 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -48,6 +48,7 @@ MAIN_FILES = \ $(SRCDIR)main/formats.c \ $(SRCDIR)main/format_pack.c \ $(SRCDIR)main/format_unpack.c \ + $(SRCDIR)main/format_utils.c \ $(SRCDIR)main/framebuffer.c \ $(SRCDIR)main/get.c \ $(SRCDIR)main/genmipmap.c \ diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c index 6b28592..fb3feb5 100644 --- a/src/mesa/main/format_pack.c +++ b/src/mesa/main/format_pack.c @@ -38,6 +38,7 @@ #include "colormac.h" #include "format_pack.h" +#include "format_utils.h" #include "macros.h" #include "../../gallium/auxiliary/util/u_format_rgb9e5.h" #include "../../gallium/auxiliary/util/u_format_r11g11b10f.h" @@ -58,39 +59,6 @@ typedef void (*pack_float_rgba_row_func)(GLuint n, const GLfloat src[][4], void *dst); - -static inline GLfloat -linear_to_srgb(GLfloat cl) -{ - if (cl < 0.0f) - return 0.0f; - else if (cl < 0.0031308f) - return 12.92f * cl; - else if (cl < 1.0f) - return 1.055f * powf(cl, 0.41666f) - 0.055f; - else - return 1.0f; -} - - -static inline GLubyte -linear_float_to_srgb_ubyte(GLfloat cl) -{ - GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl)); - return res; -} - - -static inline GLubyte -linear_ubyte_to_srgb_ubyte(GLubyte cl) -{ - GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl / 255.0f)); - return res; -} - - - - /* * MESA_FORMAT_A8B8G8R8_UNORM */ @@ -1043,18 +1011,18 @@ static void pack_ubyte_BGR_SRGB8(const GLubyte src[4], void *dst) { GLubyte *d = ((GLubyte *) dst); - d[2] = linear_ubyte_to_srgb_ubyte(src[RCOMP]); - d[1] = linear_ubyte_to_srgb_ubyte(src[GCOMP]); - d[0] = linear_ubyte_to_srgb_ubyte(src[BCOMP]); + d[2] = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]); + d[1] = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]); + d[0] = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]); } static void pack_float_BGR_SRGB8(const GLfloat src[4], void *dst) { GLubyte *d = ((GLubyte *) dst); - d[2] = linear_float_to_srgb_ubyte(src[RCOMP]); - d[1] = linear_float_to_srgb_ubyte(src[GCOMP]); - d[0] = linear_float_to_srgb_ubyte(src[BCOMP]); + d[2] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP])); + d[1] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP])); + d[0] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP])); } @@ -1064,9 +1032,9 @@ static void pack_ubyte_A8B8G8R8_SRGB(const GLubyte src[4], void *dst) { GLuint *d = ((GLuint *) dst); - GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]); - GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]); - GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]); + GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]); + GLubyte g = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]); + GLubyte b = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]); *d = PACK_COLOR_(r, g, b, src[ACOMP]); } @@ -1075,9 +1043,9 @@ pack_float_A8B8G8R8_SRGB(const GLfloat src[4], void *dst) { GLuint *d = ((GLuint *) dst); GLubyte r, g, b, a; - r = linear_float_to_srgb_ubyte(src[RCOMP]); - g = linear_float_to_srgb_ubyte(src[GCOMP]); - b = linear_float_to_srgb_ubyte(src[BCOMP]); + r = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP])); + g = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP])); + b = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP])); UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]); *d = PACK_COLOR_(r, g, b, a); } @@ -1089,9 +1057,9 @@ static void pack_ubyte_B8G8R8A8_SRGB(const GLubyte src[4], void *dst) { GLuint *d = ((GLuint *) dst); - GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]); - GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]); - GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]); + GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]); + GLubyte g = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]); + GLubyte b = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]); *d = PACK_COLOR_(src[ACOMP], r, g, b); } @@ -1100,9 +1068,9 @@ pack_float_B8G8R8A8_SRGB(const GLfloat src[4], void *dst) { GLuint *d = ((GLuint *) dst); GLubyt
[Mesa-dev] [PATCH 12/12] mesa/texstore: Add a generic rgba integer texture upload path
Again, we delete a lot of functions that aren't really doing anything interesting anymore. Signed-off-by: Jason Ekstrand --- src/mesa/main/texstore.c | 545 ++- 1 file changed, 66 insertions(+), 479 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index 31317cb..ae6b286 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -1194,423 +1194,6 @@ _mesa_texstore_s8(TEXSTORE_PARAMS) } -/* non-normalized, signed int8 */ -static GLboolean -_mesa_texstore_rgba_int8(TEXSTORE_PARAMS) -{ - GLenum baseFormat = _mesa_get_format_base_format(dstFormat); - GLint components = _mesa_components_in_format(baseFormat); - - /* this forces alpha to 1 in make_temp_uint_image */ - if (dstFormat == MESA_FORMAT_RGBX_SINT8) { - baseFormat = GL_RGBA; - components = 4; - } - - ASSERT(dstFormat == MESA_FORMAT_R_SINT8 || - dstFormat == MESA_FORMAT_RG_SINT8 || - dstFormat == MESA_FORMAT_RGB_SINT8 || - dstFormat == MESA_FORMAT_RGBA_SINT8 || - dstFormat == MESA_FORMAT_A_SINT8 || - dstFormat == MESA_FORMAT_I_SINT8 || - dstFormat == MESA_FORMAT_L_SINT8 || - dstFormat == MESA_FORMAT_LA_SINT8 || - dstFormat == MESA_FORMAT_RGBX_SINT8); - ASSERT(baseInternalFormat == GL_RGBA || - baseInternalFormat == GL_RGB || - baseInternalFormat == GL_RG || - baseInternalFormat == GL_RED || - baseInternalFormat == GL_ALPHA || - baseInternalFormat == GL_LUMINANCE || - baseInternalFormat == GL_LUMINANCE_ALPHA || - baseInternalFormat == GL_INTENSITY); - ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLbyte)); - - { - /* general path */ - const GLuint *tempImage = make_temp_uint_image(ctx, dims, -baseInternalFormat, -baseFormat, -srcWidth, srcHeight, srcDepth, -srcFormat, srcType, -srcAddr, -srcPacking); - const GLuint *src = tempImage; - GLint img, row; - GLboolean is_unsigned = _mesa_is_type_unsigned(srcType); - if (!tempImage) - return GL_FALSE; - for (img = 0; img < srcDepth; img++) { - GLubyte *dstRow = dstSlices[img]; - for (row = 0; row < srcHeight; row++) { -GLbyte *dstTexel = (GLbyte *) dstRow; -GLint i; -if (is_unsigned) { - for (i = 0; i < srcWidth * components; i++) { - dstTexel[i] = (GLbyte) MIN2(src[i], 0x7f); - } -} else { - for (i = 0; i < srcWidth * components; i++) { - dstTexel[i] = (GLbyte) CLAMP((GLint) src[i], -0x80, 0x7f); - } -} -dstRow += dstRowStride; -src += srcWidth * components; - } - } - - free((void *) tempImage); - } - return GL_TRUE; -} - - -/* non-normalized, signed int16 */ -static GLboolean -_mesa_texstore_rgba_int16(TEXSTORE_PARAMS) -{ - GLenum baseFormat = _mesa_get_format_base_format(dstFormat); - GLint components = _mesa_components_in_format(baseFormat); - - /* this forces alpha to 1 in make_temp_uint_image */ - if (dstFormat == MESA_FORMAT_RGBX_SINT16) { - baseFormat = GL_RGBA; - components = 4; - } - - ASSERT(dstFormat == MESA_FORMAT_R_SINT16 || - dstFormat == MESA_FORMAT_RG_SINT16 || - dstFormat == MESA_FORMAT_RGB_SINT16 || - dstFormat == MESA_FORMAT_RGBA_SINT16 || - dstFormat == MESA_FORMAT_A_SINT16 || - dstFormat == MESA_FORMAT_L_SINT16 || - dstFormat == MESA_FORMAT_I_SINT16 || - dstFormat == MESA_FORMAT_LA_SINT16 || - dstFormat == MESA_FORMAT_RGBX_SINT16); - ASSERT(baseInternalFormat == GL_RGBA || - baseInternalFormat == GL_RGB || - baseInternalFormat == GL_RG || - baseInternalFormat == GL_RED || - baseInternalFormat == GL_ALPHA || - baseInternalFormat == GL_LUMINANCE || - baseInternalFormat == GL_LUMINANCE_ALPHA || - baseInternalFormat == GL_INTENSITY); - ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLshort)); - - { - /* general path */ - const GLuint *tempImage = make_temp_uint_image(ctx, dims, -baseInternalFormat, -baseFormat, -srcWidth, srcHeight, srcDepth, -srcFormat, srcType, -srcAddr, -
[Mesa-dev] [PATCH 09/12] main/texstore: Use _mesa_swizzle_and_convert when possible
This should be both faster and more accurate than our general slow-path of converting everything to float. Signed-off-by: Jason Ekstrand --- src/mesa/main/texstore.c | 179 +++ 1 file changed, 164 insertions(+), 15 deletions(-) diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c index e1f2284..13fb3a8 100644 --- a/src/mesa/main/texstore.c +++ b/src/mesa/main/texstore.c @@ -55,6 +55,7 @@ #include "bufferobj.h" #include "colormac.h" #include "format_pack.h" +#include "format_utils.h" #include "image.h" #include "macros.h" #include "mipmap.h" @@ -233,21 +234,44 @@ static int get_map_idx(GLenum value) { switch (value) { - case GL_LUMINANCE: return IDX_LUMINANCE; - case GL_ALPHA: return IDX_ALPHA; - case GL_INTENSITY: return IDX_INTENSITY; - case GL_LUMINANCE_ALPHA: return IDX_LUMINANCE_ALPHA; - case GL_RGB: return IDX_RGB; - case GL_RGBA: return IDX_RGBA; - case GL_RED: return IDX_RED; - case GL_GREEN: return IDX_GREEN; - case GL_BLUE: return IDX_BLUE; - case GL_BGR: return IDX_BGR; - case GL_BGRA: return IDX_BGRA; - case GL_ABGR_EXT: return IDX_ABGR; - case GL_RG: return IDX_RG; + case GL_LUMINANCE: + case GL_LUMINANCE_INTEGER_EXT: + return IDX_LUMINANCE; + case GL_ALPHA: + case GL_ALPHA_INTEGER: + return IDX_ALPHA; + case GL_INTENSITY: + return IDX_INTENSITY; + case GL_LUMINANCE_ALPHA: + case GL_LUMINANCE_ALPHA_INTEGER_EXT: + return IDX_LUMINANCE_ALPHA; + case GL_RGB: + case GL_RGB_INTEGER: + return IDX_RGB; + case GL_RGBA: + case GL_RGBA_INTEGER: + return IDX_RGBA; + case GL_RED: + case GL_RED_INTEGER: + return IDX_RED; + case GL_GREEN: + return IDX_GREEN; + case GL_BLUE: + return IDX_BLUE; + case GL_BGR: + case GL_BGR_INTEGER: + return IDX_BGR; + case GL_BGRA: + case GL_BGRA_INTEGER: + return IDX_BGRA; + case GL_ABGR_EXT: + return IDX_ABGR; + case GL_RG: + case GL_RG_INTEGER: + return IDX_RG; default: - _mesa_problem(NULL, "Unexpected inFormat"); + _mesa_problem(NULL, "Unexpected inFormat %s", +_mesa_lookup_enum_by_nr(value)); return 0; } } @@ -789,6 +813,7 @@ swizzle_copy(GLubyte *dst, GLuint dstComponents, const GLubyte *src, static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE }; static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE }; +static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE }; /** @@ -826,6 +851,12 @@ byteswap_mapping( GLboolean swapBytes, switch (srcType) { case GL_BYTE: case GL_UNSIGNED_BYTE: + case GL_SHORT: + case GL_UNSIGNED_SHORT: + case GL_INT: + case GL_UNSIGNED_INT: + case GL_FLOAT: + case GL_HALF_FLOAT: return map_identity; case GL_UNSIGNED_INT_8_8_8_8: case GL_UNSIGNED_INT_8_8_8_8_REV: @@ -3621,6 +3652,117 @@ texstore_compressed(TEXSTORE_PARAMS) srcFormat, srcType, srcAddr, srcPacking); } +static void +invert_swizzle(uint8_t dst[4], const uint8_t src[4]) +{ + int i, j; + + dst[0] = MESA_FORMAT_SWIZZLE_NONE; + dst[1] = MESA_FORMAT_SWIZZLE_NONE; + dst[2] = MESA_FORMAT_SWIZZLE_NONE; + dst[3] = MESA_FORMAT_SWIZZLE_NONE; + + for (i = 0; i < 4; ++i) + for (j = 0; j < 4; ++j) + if (src[j] == i && dst[i] == MESA_FORMAT_SWIZZLE_NONE) +dst[i] = j; +} + +static GLboolean +texstore_swizzle(TEXSTORE_PARAMS) +{ + const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth, + srcFormat, srcType); + const GLint srcImageStride = _mesa_image_image_stride(srcPacking, + srcWidth, srcHeight, srcFormat, srcType); + const GLubyte *srcImage = (const GLubyte *) _mesa_image_address(dims, +srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0); + const int src_components = _mesa_components_in_format(srcFormat); + + GLubyte swizzle[4], rgba2base[6], base2src[6], rgba2dst[4], dst2rgba[4]; + const GLubyte *swap; + GLenum dst_type; + int dst_components; + bool is_array, normalized, need_swap; + GLint i, img, row; + const GLubyte *src_row; + GLubyte *dst_row; + + is_array = _mesa_format_to_array(dstFormat, &dst_type, &dst_components, +rgba2dst, &normalized); + + if (!is_array) + return GL_FALSE; + + switch (srcType) { + case GL_FLOAT: + case GL_UNSIGNED_BYTE: + case GL_BYTE: + case GL_UNSIGNED_SHORT: + case GL_SHORT: + case GL_UNSIGNED_INT: + case GL_INT: + /* If wa have to swap bytes in a multi-byte datatype, that means + * we're not doing an array conversion anymore */ + if (srcPacking->SwapBytes) + return GL_FALSE; + need_swap = false; + break; + case GL_UNSIGNED_INT_8_8_8_8: + need_swap = srcPacking->SwapBytes; + if (_mesa_little_endian()) + need_swap = !need_
[Mesa-dev] [PATCH] i965: Add missing persample_shading field to brw_wm_debug_recompile.
Otherwise, the performance warning for shader recompiles will just say "something else". Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index d716e6f..d5a28dc 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -275,6 +275,8 @@ brw_wm_debug_recompile(struct brw_context *brw, old_key->stats_wm, key->stats_wm); found |= key_debug(brw, "flat shading", old_key->flat_shade, key->flat_shade); + found |= key_debug(brw, "per-sample shading", + old_key->persample_shading, key->persample_shading); found |= key_debug(brw, "number of color buffers", old_key->nr_color_regions, key->nr_color_regions); found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Fix z_offset computation in intel_miptree_unmap_depthstencil()
Reviewed-by: Jordan Justen On Wed, Jul 16, 2014 at 3:32 PM, Anuj Phogat wrote: > The bug is triggered by using glTexSubImage2d() with GL_DEPTH_STENCIL > as base internal format and non-zero x, y offsets. Currently x, y > offsets are ignored while updating the texture image. > > Fixes Khronos GLES3 CTS tests: > npot_tex_sub_image_2d > npot_tex_sub_image_3d > npot_pbo_tex_sub_image_2d > npot_pbo_tex_sub_image_2d > > Cc: > Signed-off-by: Anuj Phogat > --- > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index 2ab0faa..b36ffc7 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -2129,9 +2129,9 @@ intel_miptree_unmap_depthstencil(struct brw_context > *brw, > x + s_image_x + map->x, > y + s_image_y + map->y, > brw->has_swizzling); > - ptrdiff_t z_offset = ((y + z_image_y) * > + ptrdiff_t z_offset = ((y + z_image_y + map->y) * >(z_mt->pitch / 4) + > - (x + z_image_x)); > + (x + z_image_x + map->x)); > > if (map_z32f_x24s8) { >z_map[z_offset] = packed_map[(y * map->w + x) * 2 + 0]; > -- > 1.9.3 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] radeon/llvm: enable unsafe math for graphics shaders
On Thu, Jul 17, 2014 at 06:44:25PM +0200, Grigori Goronzy wrote: > Accuracy of some operations was recently improved in the R600 backend, > at the cost of slower code. This is required for compute shaders, > but not for graphics shaders. Add unsafe-fp-math hint to make LLVM > generate faster but possibly less accurate code. > > Piglit didn't indicate any regressions. Both patches are: Reviewed-by: Tom Stellard > --- > src/gallium/drivers/radeon/radeon_llvm_emit.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c > b/src/gallium/drivers/radeon/radeon_llvm_emit.c > index 1b17dd4..171ccaa 100644 > --- a/src/gallium/drivers/radeon/radeon_llvm_emit.c > +++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c > @@ -26,6 +26,7 @@ > #include "radeon_llvm_emit.h" > #include "radeon_elf_util.h" > #include "util/u_memory.h" > +#include "pipe/p_shader_tokens.h" > > #include > #include > @@ -50,6 +51,10 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type) >sprintf(Str, "%1d", type); > >LLVMAddTargetDependentFunctionAttr(F, "ShaderType", Str); > + > + if (type != TGSI_PROCESSOR_COMPUTE) { > +LLVMAddTargetDependentFunctionAttr(F, "unsafe-fp-math", "true"); > + } > } > > static void init_r600_target() { > -- > 1.8.3.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Release-candidate branch for upcoming 10.2.4
Radeonsi was indeed broken with those fixes (only CubeShadow samplers). The attached patch fixes it. Please add it to 10.2. Thanks. Marek On Tue, Jul 15, 2014 at 6:49 AM, Carl Worth wrote: > Hi folks, > > I've pushed out an update to the 10.2 branch and I need some specific > testing in the next three days. > > I've tested the branch on Intel (Haswell) as well as both swrast and > Gallium softpipe and found no piglit regressions compared to the 10.2.3 > release. > > The branch includes a few patches to nouveau and radeonsi which I have > not been able to test. If someone will test one of these drivers with > piglit and let me know that all looks good, I'll be happy to include the > patches in the release. Otherwise, I'll drop any untested patches before > making the final release on Friday. > > Also, there's still time in the next three days for someone to nominate > further driver-specific changes. I'll just need positive piglit test > results for any such patches, (on top of the branch as it stands now), > before I'll accept them. > > Thanks, all. > > -Carl > > -- > carl.d.wo...@intel.com > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > From e7467178faeab5c6bd5e6c4952fbce74ea3ff9e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 17 Jul 2014 22:24:50 +0200 Subject: [PATCH] radeonsi: add support for TXB2 This is needed by latest fixes for samplerCubeShadow with bias. Otherwise, a crash occurs. --- src/gallium/drivers/radeonsi/si_shader.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 09c99eb..397b6ee 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -1539,6 +1539,8 @@ static void tex_fetch_args( /* Pack LOD bias value */ if (opcode == TGSI_OPCODE_TXB) address[count++] = coords[3]; + if (opcode == TGSI_OPCODE_TXB2) + address[count++] = lp_build_emit_fetch(bld_base, inst, 1, 0); /* Pack depth comparison value */ switch (target) { @@ -2497,6 +2499,7 @@ int si_pipe_shader_create( bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action; bld_base->op_actions[TGSI_OPCODE_TXB] = txb_action; + bld_base->op_actions[TGSI_OPCODE_TXB2] = txb_action; #if HAVE_LLVM >= 0x0304 bld_base->op_actions[TGSI_OPCODE_TXD] = txd_action; #endif -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Add missing persample_shading field to brw_wm_debug_recompile.
Reviewed-by: Jordan Justen On Thu, Jul 17, 2014 at 11:18 AM, Kenneth Graunke wrote: > Otherwise, the performance warning for shader recompiles will just say > "something else". > > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_wm.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_wm.c > b/src/mesa/drivers/dri/i965/brw_wm.c > index d716e6f..d5a28dc 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm.c > +++ b/src/mesa/drivers/dri/i965/brw_wm.c > @@ -275,6 +275,8 @@ brw_wm_debug_recompile(struct brw_context *brw, >old_key->stats_wm, key->stats_wm); > found |= key_debug(brw, "flat shading", >old_key->flat_shade, key->flat_shade); > + found |= key_debug(brw, "per-sample shading", > + old_key->persample_shading, key->persample_shading); > found |= key_debug(brw, "number of color buffers", >old_key->nr_color_regions, key->nr_color_regions); > found |= key_debug(brw, "MRT alpha test or alpha-to-coverage", > -- > 2.0.0 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i915: Fix up intelInitScreen2 for DRI3
On Thursday, July 03, 2014 10:13:53 PM Adel Gadllah wrote: > Commit 442442026eb updated both i915 and i965 for DRI3 support, > but one check in intelInitScreen2 was missed for i915 causing crashes > when trying to use i915 with DRI3. > > So fix that up. > > Reported-by: Igor Gnatenko > Tested-by: František Zatloukal > Reference: https://bugzilla.redhat.com/show_bug.cgi?id=1115323 > Cc: "10.2" > Signed-off-by: Adel Gadllah > --- > src/mesa/drivers/dri/i915/intel_screen.c | 3 ++- > 1 file changed, 2 insertions(+), 1 deletion(-) > > diff --git a/src/mesa/drivers/dri/i915/intel_screen.c b/src/mesa/drivers/dri/i915/intel_screen.c > index 9b4e490..4c9726c 100644 > --- a/src/mesa/drivers/dri/i915/intel_screen.c > +++ b/src/mesa/drivers/dri/i915/intel_screen.c > @@ -1152,7 +1152,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) > { > struct intel_screen *intelScreen; > > - if (psp->dri2.loader->base.version <= 2 || > + if (psp->image.loader) { > + } else if (psp->dri2.loader->base.version <= 2 || > psp->dri2.loader->getBuffersWithFormat == NULL) { >fprintf(stderr, > "\nERROR! DRI2 loader with getBuffersWithFormat() " > Pushed, thanks. I didn't actually test it, but it does look like the image loader support is there (in commit 442442026eb), and this really was just missed. signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [Mesa-stable] Release-candidate branch for upcoming 10.2.4
Marek Olšák writes: > Radeonsi was indeed broken with those fixes (only CubeShadow > samplers). The attached patch fixes it. Please add it to 10.2. Thanks. Thanks, Marek! This is exactly the kind of breakage I was worried about, and exactly the kind of test reporting I was hoping for. So thanks, again. I'm glad that the new plan with a window for testing is proving useful. I've pushed out an update to the 10.2 branch with this fix. -Carl pgpAA2lGvIT30.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy
Signed-off-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_tex_subimage.c | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 04cbc4c..4043eb7 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -585,6 +585,16 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, } else if (format == GL_RGBA) { mem_copy = rgba8_copy; } + } else if ((texImage->TexFormat == MESA_FORMAT_R8G8B8A8_UNORM) || + (texImage->TexFormat == MESA_FORMAT_R8G8B8X8_UNORM)) { + cpp = 4; + if (format == GL_BGRA) { + /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can + * use the same function. */ + mem_copy = rgba8_copy; + } else if (format == GL_RGBA) { + mem_copy = memcpy; + } } if (!mem_copy) return false; -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Improve debug output in intelTexImage and intelTexSubimage
Signed-off-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/intel_tex_image.c| 4 +++- src/mesa/drivers/dri/i965/intel_tex_subimage.c | 6 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index de0546a..029d59b 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -173,8 +173,10 @@ intelTexImage(struct gl_context * ctx, { bool ok; - DBG("%s target %s level %d %dx%dx%d\n", __FUNCTION__, + DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", + __FUNCTION__, _mesa_get_format_name(texImage->TexFormat), _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), texImage->Level, texImage->Width, texImage->Height, texImage->Depth); ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index 03e69ae..04cbc4c 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -667,6 +667,12 @@ intelTexSubImage(struct gl_context * ctx, { bool ok; + DBG("%s mesa_format %s target %s format %s type %s level %d %dx%dx%d\n", + __FUNCTION__, _mesa_get_format_name(texImage->TexFormat), + _mesa_lookup_enum_by_nr(texImage->TexObject->Target), + _mesa_lookup_enum_by_nr(format), _mesa_lookup_enum_by_nr(type), + texImage->Level, texImage->Width, texImage->Height, texImage->Depth); + ok = intel_texsubimage_tiled_memcpy(ctx, dims, texImage, xoffset, yoffset, zoffset, width, height, depth, -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Improve debug output in intelTexImage and intelTexSubimage
These two are Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/7] i965: Replace cfg instances with calls to calculate_cfg().
Avoids regenerating it unnecessarily. Every program in shader-db improved, none by an amount less than a 1/3 reduction. One Dota2 shader decreased from 62 -> 24. cfg calculations: 429492 -> 193197 (-55.02%) --- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 20 +++- .../dri/i965/brw_fs_peephole_predicated_break.cpp| 6 +++--- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp| 6 +++--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 6 ++ 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 10e19d8..8a54969 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -594,31 +594,33 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, bool fs_visitor::opt_copy_propagate() { + calculate_cfg(); + bool progress = false; void *copy_prop_ctx = ralloc_context(NULL); - cfg_t cfg(&instructions); - exec_list *out_acp[cfg.num_blocks]; - for (int i = 0; i < cfg.num_blocks; i++) + exec_list *out_acp[cfg->num_blocks]; + + for (int i = 0; i < cfg->num_blocks; i++) out_acp[i] = new exec_list [ACP_HASH_SIZE]; /* First, walk through each block doing local copy propagation and getting * the set of copies available at the end of the block. */ - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; progress = opt_copy_propagate_local(copy_prop_ctx, block, out_acp[b]) || progress; } /* Do dataflow analysis for those available copies. */ - fs_copy_prop_dataflow dataflow(copy_prop_ctx, &cfg, out_acp); + fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp); /* Next, re-run local copy propagation, this time with the set of copies * provided by the dataflow analysis available at the start of a block. */ - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; exec_list in_acp[ACP_HASH_SIZE]; for (int i = 0; i < dataflow.num_acp; i++) { @@ -631,7 +633,7 @@ fs_visitor::opt_copy_propagate() progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress; } - for (int i = 0; i < cfg.num_blocks; i++) + for (int i = 0; i < cfg->num_blocks; i++) delete [] out_acp[i]; ralloc_free(copy_prop_ctx); diff --git a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp index bb0a2ac..3ba0b26 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_peephole_predicated_break.cpp @@ -45,10 +45,10 @@ fs_visitor::opt_peephole_predicated_break() { bool progress = false; - cfg_t cfg(&instructions); + calculate_cfg(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; /* BREAK and CONTINUE instructions, by definition, can only be found at * the ends of basic blocks. diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index db0be19..cf47cb5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -127,10 +127,10 @@ fs_visitor::opt_peephole_sel() { bool progress = false; - cfg_t cfg(&instructions); + calculate_cfg(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; /* IF instructions, by definition, can only be found at the ends of * basic blocks. diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 9ea0b14..49a4e9b 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -748,13 +748,13 @@ vec4_visitor::opt_set_dependency_control() vec4_instruction *last_mrf_write[BRW_MAX_GRF]; uint8_t mrf_channels_written[BRW_MAX_GRF]; - cfg_t cfg(&instructions); + calculate_cfg(); assert(prog_data->total_grf || !"Must be called after register allocation"); - for (int i = 0; i < cfg.num_blocks; i++) { - bblock_t *bblock = cfg.blocks[i]; + for (int i = 0; i < cfg->num_blocks; i++) { + bblock_t *bblock = cfg->blocks[i]; vec4_instruction *inst; memset(last_grf_write, 0, sizeof(last_grf_write)); diff
[Mesa-dev] [PATCH 7/7] i965: Pass a cfg pointer to generate_{code, assembly}.
--- src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 4 +++- src/mesa/drivers/dri/i965/brw_fs.cpp | 10 ++ src/mesa/drivers/dri/i965/brw_fs.h| 12 ++-- src/mesa/drivers/dri/i965/brw_fs_generator.cpp| 22 +- src/mesa/drivers/dri/i965/brw_vec4.cpp| 6 -- src/mesa/drivers/dri/i965/brw_vec4.h | 8 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 12 src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 10 +- src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 22 +- src/mesa/drivers/dri/i965/gen8_vec4_generator.cpp | 12 src/mesa/drivers/dri/i965/intel_asm_annotation.c | 2 +- src/mesa/drivers/dri/i965/intel_asm_annotation.h | 2 +- 12 files changed, 56 insertions(+), 66 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index c1676a9..8fa2e0e 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -24,6 +24,7 @@ #include "glsl/ralloc.h" #include "brw_blorp_blit_eu.h" #include "brw_blorp.h" +#include "brw_cfg.h" brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw, bool debug_flag) @@ -43,7 +44,8 @@ brw_blorp_eu_emitter::~brw_blorp_eu_emitter() const unsigned * brw_blorp_eu_emitter::get_program(unsigned *program_size) { - return generator.generate_assembly(NULL, &insts, program_size); + cfg_t cfg(&insts); + return generator.generate_assembly(NULL, &cfg, program_size); } /** diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 56a0183..3ce909b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3202,6 +3202,8 @@ fs_visitor::run() */ assert(sanity_param_count == fp->Base.Parameters->NumParameters); + calculate_cfg(); + return !failed; } @@ -3245,7 +3247,7 @@ brw_wm_fs_emit(struct brw_context *brw, return NULL; } - exec_list *simd16_instructions = NULL; + cfg_t *simd16_cfg = NULL; fs_visitor v2(brw, mem_ctx, key, prog_data, prog, fp, 16); if (brw->gen >= 5 && likely(!(INTEL_DEBUG & DEBUG_NO16))) { if (!v.simd16_unsupported) { @@ -3255,7 +3257,7 @@ brw_wm_fs_emit(struct brw_context *brw, perf_debug("SIMD16 shader failed to compile, falling back to " "SIMD8 at a 10-20%% performance cost: %s", v2.fail_msg); } else { -simd16_instructions = &v2.instructions; +simd16_cfg = v2.cfg; } } else { perf_debug("SIMD16 shader unsupported, falling back to " @@ -3266,12 +3268,12 @@ brw_wm_fs_emit(struct brw_context *brw, const unsigned *assembly = NULL; if (brw->gen >= 8) { gen8_fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src); - assembly = g.generate_assembly(&v.instructions, simd16_instructions, + assembly = g.generate_assembly(v.cfg, simd16_cfg, final_assembly_size); } else { fs_generator g(brw, mem_ctx, key, prog_data, prog, fp, v.do_dual_src, v.runtime_check_aads_emit, INTEL_DEBUG & DEBUG_WM); - assembly = g.generate_assembly(&v.instructions, simd16_instructions, + assembly = g.generate_assembly(v.cfg, simd16_cfg, final_assembly_size); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9ba3f38..009a6d5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -578,12 +578,12 @@ public: bool debug_flag); ~fs_generator(); - const unsigned *generate_assembly(exec_list *simd8_instructions, - exec_list *simd16_instructions, + const unsigned *generate_assembly(const cfg_t *simd8_cfg, + const cfg_t *simd16_cfg, unsigned *assembly_size); private: - void generate_code(exec_list *instructions); + void generate_code(const cfg_t *cfg); void fire_fb_write(fs_inst *inst, GLuint base_reg, struct brw_reg implied_header, @@ -706,12 +706,12 @@ public: bool dual_source_output); ~gen8_fs_generator(); - const unsigned *generate_assembly(exec_list *simd8_instructions, - exec_list *simd16_instructions, + const unsigned *generate_assembly(const cfg_t *simd8_cfg, + const cfg_t *simd16_cfg, unsigned *assembly_size); private: - void generate_code(exec_list *instructions); + void generate_code(const cfg_t *cfg); void generate_fb_write(fs_inst *inst);
[Mesa-dev] [PATCH 0/7] i965: Preserve the CFG (first pass)
We'd like to avoid regenerating the control flow graph for every pass that uses it. This series adds a cfg pointer to the backend visitor class that we use to save the CFG across optimization passes. It's invalidated and recreated by invalidate/calculate_cfg and these functions are called by the similarly named live intervals functions. Just by doing this, we reduce the number of times we calculate the CFG in a shader-db run by 55%. Ultimately I'd like the CFG to be a fundamental data structure in our backend, where each basic block contains its own list of instructions rather than basic blocks containing pointers into a large list. My WIP branch is getting a bit big, so here's a digestable chunk that's ready for master. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/7] i965/vec4: Use foreach_inst_in_block a couple more places.
--- src/mesa/drivers/dri/i965/brw_vec4.cpp | 5 + src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 5 + 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 49a4e9b..045e1c5 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -755,14 +755,11 @@ vec4_visitor::opt_set_dependency_control() for (int i = 0; i < cfg->num_blocks; i++) { bblock_t *bblock = cfg->blocks[i]; - vec4_instruction *inst; memset(last_grf_write, 0, sizeof(last_grf_write)); memset(last_mrf_write, 0, sizeof(last_mrf_write)); - for (inst = (vec4_instruction *)bblock->start; - inst != (vec4_instruction *)bblock->end->next; - inst = (vec4_instruction *)inst->next) { + foreach_inst_in_block (vec4_instruction, inst, bblock) { /* If we read from a register that we were doing dependency control * on, don't do dependency control across the read. */ diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp index 86360d2..29d2e02 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp @@ -132,10 +132,7 @@ vec4_visitor::opt_cse_local(bblock_t *block) void *cse_ctx = ralloc_context(NULL); int ip = block->start_ip; - for (vec4_instruction *inst = (vec4_instruction *)block->start; -inst != block->end->next; -inst = (vec4_instruction *) inst->next) { - + foreach_inst_in_block (vec4_instruction, inst, block) { /* Skip some cases. */ if (is_expression(inst) && !inst->predicate && inst->mlen == 0 && (inst->dst.file != HW_REG || inst->dst.is_null())) -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/7] i965/cfg: Add a foreach_block_and_inst macro.
Will let us abstract how the instructions are stored. --- src/mesa/drivers/dri/i965/brw_cfg.h | 4 1 file changed, 4 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index cdbadde..01fcc1b 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -103,6 +103,10 @@ struct cfg_t { int num_blocks; }; +#define foreach_block_and_inst(__block, __type, __inst, __cfg) \ + foreach_block (__block, __cfg) \ + foreach_inst_in_block (__type, __inst, __block) + #define foreach_inst_in_block(__type, __inst, __block) \ for (__type *__inst = (__type *)__block->start; \ __inst != __block->end->next; \ -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 6/7] i965: Add and use foreach_block macro.
Use this as an opportunity to rename 'block_num' to 'num'. block->num is clear, and block->block_num has always been redundant. --- src/mesa/drivers/dri/i965/brw_cfg.cpp | 17 ++--- src/mesa/drivers/dri/i965/brw_cfg.h| 5 +- .../drivers/dri/i965/brw_dead_control_flow.cpp | 3 +- .../drivers/dri/i965/brw_fs_copy_propagation.cpp | 89 ++ src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 4 +- .../dri/i965/brw_fs_dead_code_eliminate.cpp| 5 +- .../drivers/dri/i965/brw_fs_live_variables.cpp | 50 ++-- .../dri/i965/brw_fs_peephole_predicated_break.cpp | 9 +-- .../dri/i965/brw_fs_saturate_propagation.cpp | 5 +- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +- src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 4 +- .../drivers/dri/i965/brw_vec4_live_variables.cpp | 50 ++-- src/mesa/drivers/dri/i965/intel_asm_annotation.c | 8 +- 14 files changed, 116 insertions(+), 143 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 4a5c912..d806b83 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -51,7 +51,7 @@ link(void *mem_ctx, bblock_t *block) } bblock_t::bblock_t() : - start_ip(0), end_ip(0), block_num(0) + start_ip(0), end_ip(0), num(0) { start = NULL; end = NULL; @@ -284,7 +284,7 @@ cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int ip) } block->start_ip = ip; - block->block_num = num_blocks++; + block->num = num_blocks++; block_list.push_tail(&block->link); *cur = block; } @@ -295,7 +295,7 @@ cfg_t::make_block_array() blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks); int i = 0; - foreach_list_typed(bblock_t, block, link, &block_list) { + foreach_block (block, this) { blocks[i++] = block; } assert(i == num_blocks); @@ -304,19 +304,18 @@ cfg_t::make_block_array() void cfg_t::dump(backend_visitor *v) { - for (int b = 0; b < this->num_blocks; b++) { -bblock_t *block = this->blocks[b]; - fprintf(stderr, "START B%d", b); + foreach_block (block, this) { + fprintf(stderr, "START B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->parents) { fprintf(stderr, " <-B%d", - link->block->block_num); + link->block->num); } fprintf(stderr, "\n"); block->dump(v); - fprintf(stderr, "END B%d", b); + fprintf(stderr, "END B%d", block->num); foreach_list_typed(bblock_link, link, link, &block->children) { fprintf(stderr, " ->B%d", - link->block->block_num); + link->block->num); } fprintf(stderr, "\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index 324df6c..f7203e2 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -71,7 +71,7 @@ struct bblock_t { struct exec_list parents; struct exec_list children; - int block_num; + int num; /* If the current basic block ends in an IF, ELSE, or ENDIF instruction, * these pointers will hold the locations of the other associated control @@ -109,6 +109,9 @@ struct cfg_t { foreach_block (__block, __cfg) \ foreach_inst_in_block (__type, __inst, __block) +#define foreach_block(__block, __cfg) \ + foreach_list_typed (bblock_t, __block, link, &(__cfg)->block_list) + #define foreach_inst_in_block(__type, __inst, __block) \ for (__type *__inst = (__type *)__block->start; \ __inst != __block->end->next; \ diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 14c6898..1591991 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -41,8 +41,7 @@ dead_control_flow_eliminate(backend_visitor *v) v->calculate_cfg(); - for (int b = 0; b < v->cfg->num_blocks; b++) { - bblock_t *block = v->cfg->blocks[b]; + foreach_block (block, v->cfg) { bool found = false; /* ENDIF instructions, by definition, can only be found at the start of diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index 8a54969..52d3328 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -104,9 +104,9 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); num_acp = 0; - for (int b = 0; b < cfg->num_blocks; b++) { + foreach_block (block, cfg) {
[Mesa-dev] [PATCH 1/7] i965: Add cfg to backend_visitor.
--- src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp | 6 +++--- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp| 7 +++ .../drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 8 +++- src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp | 12 +--- .../drivers/dri/i965/brw_fs_saturate_propagation.cpp| 8 +++- src/mesa/drivers/dri/i965/brw_shader.cpp| 17 - src/mesa/drivers/dri/i965/brw_shader.h | 5 + src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp | 16 +--- 9 files changed, 48 insertions(+), 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 63a3e5b..14c6898 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -39,10 +39,10 @@ dead_control_flow_eliminate(backend_visitor *v) { bool progress = false; - cfg_t cfg(&v->instructions); + v->calculate_cfg(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < v->cfg->num_blocks; b++) { + bblock_t *block = v->cfg->blocks[b]; bool found = false; /* ENDIF instructions, by definition, can only be found at the start of diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9c76bd2..9ba3f38 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -330,7 +330,7 @@ public: void assign_constant_locations(); void demote_pull_constants(); void invalidate_live_intervals(); - void calculate_live_intervals(const cfg_t *cfg = NULL); + void calculate_live_intervals(); void calculate_register_pressure(); bool opt_algebraic(); bool opt_cse(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index d435d84..63d87f9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -315,11 +315,10 @@ fs_visitor::opt_cse() { bool progress = false; - cfg_t cfg(&instructions); - calculate_live_intervals(&cfg); + calculate_live_intervals(); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; progress = opt_cse_local(block) || progress; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp index d41a42c..c00ec1b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp @@ -39,15 +39,13 @@ fs_visitor::dead_code_eliminate() { bool progress = false; - cfg_t cfg(&instructions); - - calculate_live_intervals(&cfg); + calculate_live_intervals(); int num_vars = live_intervals->num_vars; BITSET_WORD *live = ralloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars)); - for (int b = 0; b < cfg.num_blocks; b++) { - bblock_t *block = cfg.blocks[b]; + for (int b = 0; b < cfg->num_blocks; b++) { + bblock_t *block = cfg->blocks[b]; memcpy(live, live_intervals->bd[b].liveout, sizeof(BITSET_WORD) * BITSET_WORDS(num_vars)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index 585dc3d..57f3ce4 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -295,6 +295,8 @@ fs_visitor::invalidate_live_intervals() { ralloc_free(live_intervals); live_intervals = NULL; + + invalidate_cfg(); } /** @@ -304,7 +306,7 @@ fs_visitor::invalidate_live_intervals() * information about whole VGRFs. */ void -fs_visitor::calculate_live_intervals(const cfg_t *cfg) +fs_visitor::calculate_live_intervals() { if (this->live_intervals) return; @@ -320,12 +322,8 @@ fs_visitor::calculate_live_intervals(const cfg_t *cfg) virtual_grf_end[i] = -1; } - if (cfg) { - this->live_intervals = new(mem_ctx) fs_live_variables(this, cfg); - } else { - cfg_t cfg(&instructions); - this->live_intervals = new(mem_ctx) fs_live_variables(this, &cfg); - } + calculate_cfg(); + this->live_intervals = new(mem_ctx) fs_live_variables(this, cfg); /* Merge the per-component live ranges to whole VGRF live ranges. */ for (int i = 0; i < live_intervals->num_vars; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp index 1287adb..0e04d3f 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_saturate_propagation.cpp @@ -93,12 +93,10 @
[Mesa-dev] [PATCH 5/7] i965/cfg: Embed link in bblock_t for main block list.
The next patch adds a foreach_block (block, cfg) macro, which works better if it provides a direct bblock_t pointer, rather than a bblock_link pointer that you have to use to find the actual block. --- src/mesa/drivers/dri/i965/brw_cfg.cpp | 10 +- src/mesa/drivers/dri/i965/brw_cfg.h | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 07111f5..4a5c912 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp @@ -67,8 +67,8 @@ bblock_t::bblock_t() : void bblock_t::add_successor(void *mem_ctx, bblock_t *successor) { - successor->parents.push_tail(link(mem_ctx, this)); - children.push_tail(link(mem_ctx, successor)); + successor->parents.push_tail(::link(mem_ctx, this)); + children.push_tail(::link(mem_ctx, successor)); } void @@ -285,7 +285,7 @@ cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int ip) block->start_ip = ip; block->block_num = num_blocks++; - block_list.push_tail(link(mem_ctx, block)); + block_list.push_tail(&block->link); *cur = block; } @@ -295,8 +295,8 @@ cfg_t::make_block_array() blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks); int i = 0; - foreach_list_typed(bblock_link, link, link, &block_list) { - blocks[i++] = link->block; + foreach_list_typed(bblock_t, block, link, &block_list) { + blocks[i++] = block; } assert(i == num_blocks); } diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h b/src/mesa/drivers/dri/i965/brw_cfg.h index 01fcc1b..324df6c 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.h +++ b/src/mesa/drivers/dri/i965/brw_cfg.h @@ -61,6 +61,8 @@ struct bblock_t { void dump(backend_visitor *v); #endif + struct exec_node link; + struct backend_instruction *start; struct backend_instruction *end; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/6] Enable Pixman for Mesa
Juha-Pekka, The patch I just sent to the list here: http://lists.freedesktop.org/archives/mesa-dev/2014-July/063501.html Should also help with chrome performance. We have had a fast-path inside the intel driver for some time to do BGRA/RGBA -> BGRA conversions. For some reason, chrome seems to have switched to using RGBA textures (they used to use BGRA). The above patch enables the fastpath for converting to RGBA as well as BGRA. Because this fastpath is in the driver itself and handles tiling directly, it's much faster than anything that goes through the paths in main/texstore.c. It would be interesting to note what that does to power consumption. --Jason Ekstrand On Wed, Jun 25, 2014 at 5:38 AM, Juha-Pekka Heikkila < juhapekka.heikk...@gmail.com> wrote: > This is my old set which enable using Pixman on Mesa for some texture > conversion fast paths. As is this passes Piglit quick set on my IVB. > > For general case this does not offer essentially better fps or such but > these were recently tested on Chromebook with HSW inside where it showed > promising drop on Watt drain. With/without Pixman Mesa was compiled > with march=corei7 > > Number here indicate % change in Watts, for comparison was used averages > of multiple runs. While Watt drain reduced performance remained +/- on > the same level. > > Javascript - Octane v20.457% > Page Rendering - Page Loading-0.280% > HTML5 Games - JSGameBench-2.275% > HTML5 Games - AngryBirds -4.483% > Canvas 2D - CanvasEarth -4.003% > Canvas2D - FishTank 250 fish -3.176% > WebGL - EarthScreen -2.959% > Browser Performance - RoboHornet -5.833% > WebGL - Aquarium 50 fish -4.448% > WebGL - Aquarium 100 fish-4.094% > > /Juha-Pekka > > Juha-Pekka Heikkila (6): > mesa: Add Pixman library to configure.ac > mesa: Add Pixman usage to texstore.c > mesa: Pixman texture conversion rgb to abgr > mesa: Pixman texture conversion argb to abgr > mesa: Pixman texture conversion argb to rgb > mesa: Pixman texture conversion rgb to rgb565 and rgb565_rev > > configure.ac | 32 +++ > src/mesa/main/texstore.c | 105 > +-- > 2 files changed, 134 insertions(+), 3 deletions(-) > > -- > 1.8.1.2 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965/disasm: Don't disassemble the URB complete field on Broadwell.
It doesn't exist, so attempting to read it will trigger generation assertions in the brw_inst API. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_disasm.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index d7362c1..f66865f 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -1410,8 +1410,10 @@ brw_disassemble_inst(FILE *file, struct brw_context *brw, brw_inst *inst, err |= control(file, "urb used", urb_used, brw_inst_urb_used(brw, inst), &space); } -err |= control(file, "urb complete", urb_complete, - brw_inst_urb_complete(brw, inst), &space); +if (brw->gen < 8) { + err |= control(file, "urb complete", urb_complete, + brw_inst_urb_complete(brw, inst), &space); +} break; case BRW_SFID_THREAD_SPAWNER: break; -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly
https://bugs.freedesktop.org/show_bug.cgi?id=79949 Mau changed: What|Removed |Added CC||mav...@gmail.com -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly
https://bugs.freedesktop.org/show_bug.cgi?id=79949 --- Comment #7 from Axel Davy --- It could be solved by this commit: http://cgit.freedesktop.org/mesa/mesa/commit/?h=10.2&id=3ca21195939477fa1f9b113ce4f6da36a8480321 Please test mesa git or mesa 10.2.3 -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy
On Thursday, July 17, 2014 02:41:31 PM Jason Ekstrand wrote: > Signed-off-by: Jason Ekstrand > --- > src/mesa/drivers/dri/i965/intel_tex_subimage.c | 10 ++ > 1 file changed, 10 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > index 04cbc4c..4043eb7 100644 > --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c > +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c > @@ -585,6 +585,16 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, >} else if (format == GL_RGBA) { > mem_copy = rgba8_copy; >} > + } else if ((texImage->TexFormat == MESA_FORMAT_R8G8B8A8_UNORM) || > + (texImage->TexFormat == MESA_FORMAT_R8G8B8X8_UNORM)) { > + cpp = 4; > + if (format == GL_BGRA) { > + /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can > + * use the same function. */ */ goes on a separate line. Patch 2 is: Reviewed-by: Kenneth Graunke (patch 1 looks fine too but I didn't check it very thoroughly) > + mem_copy = rgba8_copy; > + } else if (format == GL_RGBA) { > + mem_copy = memcpy; > + } > } > if (!mem_copy) >return false; > signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] i965: Disable hex offset printing in disassembly.
Printing the hex offsets makes it basically impossible to diff assembly: if you add even a single instruction, the entire shader shows up as a difference. So, every time I want to compare assembly, I have to strip this out. The hex offsets might be useful when debugging compaction, or when inspecting the program cache buffer. Since it's occasionally useful, but uncommon, this patch disables it by default, but makes it easy to re-enable it temporarily when the need arises. Signed-off-by: Kenneth Graunke Cc: Matt Turner --- src/mesa/drivers/dri/i965/brw_eu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.c b/src/mesa/drivers/dri/i965/brw_eu.c index f4c7495..a400b44 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.c +++ b/src/mesa/drivers/dri/i965/brw_eu.c @@ -241,7 +241,8 @@ brw_disassemble(struct brw_context *brw, brw_inst *insn = assembly + offset; brw_inst uncompacted; bool compacted = brw_inst_cmpt_control(brw, insn); - fprintf(out, "0x%08x: ", offset); + if (0) + fprintf(out, "0x%08x: ", offset); if (compacted) { brw_compact_inst *compacted = (void *)insn; -- 2.0.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/23] glsl/glcpp: Stop using a lexer start condition () for token skipping.
On Thu, Jun 26, 2014 at 3:19 PM, Carl Worth wrote: > Here, "skipping" refers to the lexer not emitting any tokens for portions of > the file within an #if condition (or similar) that evaluates to false. > > Previously, the lexer had a special start condition used to control > this skipping. This start condition was not handled like a normal start > condition. Instead, there was a particularly ugly block of code set to be > included at the top of the generated lexing loop that would change from > to or from to depending on various pieces of > parser state, (such as parser->skip_state and parser->lexing_directive). > > Not only was that an ugly approach, but the start condition was > complicating several glcpp bug fixes I attempted recently that want to use > start conditions for other purposes, (such as a new start condition). > > The recently added RETURN_TOKEN macro gives us a convenient way to implement > skipping without using a lexer start condition. Now, at the top of the > generated lexer, we examine all the necessary parser state and set a new > parser->skipping bit. Then, in RETURN_TOKEN, we examine parser->skipping to > determine whether to actually emit the token or not. > > Besides this, there are only a couple of other places where we need to examine > the skipping bit (other than when returning a token): > > * To avoid emitting an error for #error if skipped. > > * To avoid entering the start condition for a #define that is > skipped. > > With all of this in place in the present commit, there are hopefully no > behavioral changes with this patch, ("make check" still passes all of the > glcpp tests at least). > --- > src/glsl/glcpp/glcpp-lex.l | 160 > ++- > src/glsl/glcpp/glcpp-parse.y | 1 + > src/glsl/glcpp/glcpp.h | 1 + > 3 files changed, 99 insertions(+), 63 deletions(-) > > diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l > index 37fcc84..cb06bb8 100644 > --- a/src/glsl/glcpp/glcpp-lex.l > +++ b/src/glsl/glcpp/glcpp-lex.l > @@ -61,19 +61,52 @@ void glcpp_set_column (int column_no , yyscan_t > yyscanner); > yylloc->source = 0; \ > } while(0) > > -#define RETURN_TOKEN(token)\ > +/* It's ugly to have macros that have return statements inside of > + * them, but flex-based lexer generation is all built around the > + * return statement. > + * > + * To mitigate the ugliness, we defer as much of the logic as possible > + * to an actual function, not a macro (see > + * glcpplex_update_state_per_token) and we make the word RETURN > + * prominent in all of the macros which may return. > + * > + * The most-commonly-used macro is RETURN_TOKEN which will perform all > + * necessary state updates based on the provided token,, then > + * conditionally return the token. It will not return a token if the > + * parser is currently skipping tokens, (such as within #if > + * 0...#else). > + * > + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that > + * makes the token returning unconditional. This is needed for things > + * like #if and the tokens of its condition, (since these must be > + * evaluated by the parser even when otherwise skipping). > + * > + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top > + * of RETURN_TOKEN that performs a string copy of yytext before the > + * return. > + */ > +#define RETURN_TOKEN_NEVER_SKIP(token) \ > do {\ > if (token == NEWLINE) \ > parser->last_token_was_newline = 1; \ > else\ > parser->last_token_was_newline = 0; \ > return (token); \ > + } while (0) > + > +#define RETURN_TOKEN(token)\ > + do {\ > + if (! parser->skipping) { \ > + RETURN_TOKEN_NEVER_SKIP(token); \ It looks like parser->last_token_was_newline will not be updated while skipping. Should we update that during the skipping even though we're not returning tokens? > + } \ > } while(0) > > -#define RETURN_STRING_TOKEN(token) \ > - do {\ > - yylval->str = ralloc_strdup (yyextra, yytext); \ > - RETURN_TOKEN (token); \ > +#define RETURN_STRING_TOKEN(token) \ > + do {\ > + if (! parser->skipping)
[Mesa-dev] [PATCH] i965: Don't print WE_normal in disassembly.
Dropping this helps most lines fit in an 80 column terminal. The absence of WE_normal also helps call attention to WE_all, where something unusual is going on. Signed-off-by: Kenneth Graunke Cc: Matt Turner --- src/mesa/drivers/dri/i965/brw_disasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index f66865f..c6f981a 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -213,7 +213,7 @@ static const char *const accwr[2] = { }; static const char *const wectrl[2] = { - [0] = "WE_normal", + [0] = "", [1] = "WE_all" }; -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Don't print WE_normal in disassembly.
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Disable hex offset printing in disassembly.
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/disasm: Don't disassemble the URB complete field on Broadwell.
Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] glsl/glcpp: A bunch of pre-processor cleanups
Made it ~25% through. :) I'll be busy for a bit, but I'll continue looking at the rest later. 01/23 glsl/glcpp: Emit proper error for #define with a non-identifier Reviewed-by: Jordan Justen 02/23 glsl/glcpp: Add support for comments between #define and macro identifier Reviewed-by: Jordan Justen 03/23 glsl/glcpp: Remove some un-needed calls to NEWLINE_CATCHUP * Reference 6005e9cb in comment? Reviewed-by: Jordan Justen 04/23 glsl/glcpp: Add testing for EOF sans newline (and fix for , ) Reviewed-by: Jordan Justen 05/23 glsl/glcpp: Drop extra, final newline from most output * In the "\n {" section, you set "parser->last_token_was_newline = 1;" Doesn't "RETURN_TOKEN (NEWLINE);" do this as well? Reviewed-by: Jordan Justen 06/23 glsl/glcpp: Abstract a bit of common code for returning string tokens Reviewed-by: Jordan Justen On Thu, Jun 26, 2014 at 3:19 PM, Carl Worth wrote: > Here's my latest series of patches to improve conformance of glcpp, (the glsl > preprocessor in mesa). > > Most of these changes are fixes that only a test-suite author could love. Most > fix nit-picky tests that do things that no sance application would actually > do. They're all reasonable things to do, but few are likely to impact many > real applications. > > The entire series (as well as some earlier patches already reviewed) can be > found on the glcpp-fixup branch of my mesa tree: > > git://people.freedesktop.org/~cworth/mesa > > Here's a run-down of what the changes are in this series: > > Patch 01: Give an error for "#define 123" or similar non-identifier > > Not a useful thing to do, of course, but an error we need. > > Patch 02: Support comment here: "#define /* Ha! */ FOO" > > Patches 03-12: Many cleanups/rewriting while working on the next patch > > Patch 13: Support comment here: "# /* Tricky! */ define FOO" > > Comments appearing in these places are not likely, but are clearly > valid according to the language specification. There was a bunch of > work necessary to make this fix easy, (and even with all the > preliminary work, the final patch was longer than I wanted). > > I am happy that the lexer state at the end of this cleanup is much > simpler and easier to read than it was before. > > Patch 14: Emit internal error for unrecognized character > > This is to make un-subtle all classes of subtle bugs where the default > flex rule was simply printing unrecongized characters to stdout and > dropping them from the GLSL source. > > This is not actually in glcpp but in the lexer for the main glsl > compiler. > > Patch 15: Emit error for bogus extra characters after #extension > > This is an example of a fix for one of those subtle bugs from the flex > default-rule. This is a patch from Ken that was sent some time ago. > > Patches 16-17: Trivial fixups (renaming of token identifiers and new comment) > > Patch 18: Emit an error for duplicate macro parameter, eg "#define FOO(a, a)" > > Patch 19: Emit error if "++" or "--" appear in preprocessor condition > > Patches 20-21: Two new tests for bugs that I wrote (and fixed) while working >on some of the above. > > Patch 22: Emit internal error for unrecognized character > > This is just like patch 14, but for the lexer in glcpp itself. > > Patch 23: Treat '\r' as equivalent to '\n' > > The '\r' character was previously hitting the default lex, "print and > throw away" rule so was being entirely ignored. With patch 22, '\r' > would instead generate an internal error. Fix this by making '\r' > equivalent to '\n'. > > I'd like to be even more spec-compliant for '\r', but I think this is > OK for now. I'd also like to add some more-exhaustive testing for > '\r', (such as running all of glcpp-test on the test cases with '\n' > changed to "\r\n"). > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] gbm: Log at least one dlerror() when we fail to open any drivers.
We don't want to log every single error (such as all the ones where the file wasn't even present in our list of search paths), but if you didn't find any driver, then seeing at least one error is useful (since the common case as a developer is a single DEFAULT_DRIVER_DIR or GBM_DRIVERS_PATH entry). --- src/gbm/backends/dri/gbm_dri.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index f421a03..4a19011 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -245,6 +245,7 @@ dri_load_driver(struct gbm_dri_device *dri) if (dri->driver == NULL) { fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n", search_paths); + fprintf(stderr, "gbm: Last dlopen error: %s\n", dlerror()); return -1; } -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] gallium: Add a uif() helper function to complement fui()
I found myself often wanting this when I'm printing out a uint32_t mapping of some GPU data, and I want to put in an interpretation of that value as a float. --- src/gallium/auxiliary/util/u_math.h | 8 1 file changed, 8 insertions(+) diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index b9ed197..68ca3f2 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -616,6 +616,14 @@ fui( float f ) return fi.ui; } +static INLINE float +uif(uint32_t ui) +{ +union fi fi; +fi.ui = ui; +return fi.f; +} + /** * Convert ubyte to float in [0, 1]. -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/3] gbm: Fix a debug log message
--- src/gbm/backends/dri/gbm_dri.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c index 347bc99..f421a03 100644 --- a/src/gbm/backends/dri/gbm_dri.c +++ b/src/gbm/backends/dri/gbm_dri.c @@ -243,7 +243,7 @@ dri_load_driver(struct gbm_dri_device *dri) } if (dri->driver == NULL) { - fprintf(stderr, "gbm: failed to open any driver (search paths %s)", + fprintf(stderr, "gbm: failed to open any driver (search paths %s)\n", search_paths); return -1; } -- 2.0.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] common-code patches before vc4 driver
Here are the patches I have for common code in my vc4 driver tree. I think they should be obvious enough. I'm curious what people feel about merging vc4. I've got a series at this point that's clean enough in my opinion (copyrights fixed up, and I think a tolerable mix of "giant code dump for starting out the project" and "mostly sensible incremental work from there".) Should I just go ahead whenever? Does anybody want to engage in a review process with me, or should I just go ahead on my own, like Rob's doing with freedreno? Of course, the driver code I've written so far isn't using an actual stable kernel ABI -- I still need to handle little things like validating shaders and uniforms for security (and to relocate sampler configuration parameters), and asynchronous execution of command lists. So anything I land would be getting ABI-breaking reworks later on. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCHi v3] r600g: Implement GL_ARB_texture_gather
Pushed. Thanks. Marek On Wed, Jul 16, 2014 at 4:31 PM, Glenn Kennard wrote: > Only supported on evergreen and later. Currently limited > to single component textures as the hardware GATHER4 > instruction ignores texture swizzles. > > Piglit quick run passes on radeon 6670 with all > applicable textureGather tests, no regressions. > > Signed-off-by: Glenn Kennard > --- > Changes from v2: > Remove accidental disabling of unrelated caps that snuck in. > Oddly enough not caught by comparing piglit "quick" runs. > Changes from v1: > Removed PIPE_CAP_TEXTURE_GATHER_SM5 cap > > docs/GL3.txt | 2 +- > docs/relnotes/10.3.html| 2 +- > src/gallium/drivers/r600/r600_pipe.c | 2 +- > src/gallium/drivers/r600/r600_shader.c | 47 > +- > 4 files changed, 44 insertions(+), 9 deletions(-) > > diff --git a/docs/GL3.txt b/docs/GL3.txt > index a2f438b..20e57b0 100644 > --- a/docs/GL3.txt > +++ b/docs/GL3.txt > @@ -118,7 +118,7 @@ GL 4.0: >GL_ARB_tessellation_shader started (Fabian) >GL_ARB_texture_buffer_object_rgb32 DONE (i965, nvc0, > r600, radeonsi, softpipe) >GL_ARB_texture_cube_map_arrayDONE (i965, nv50, > nvc0, r600, radeonsi, softpipe) > - GL_ARB_texture_gatherDONE (i965, nv50, > nvc0, radeonsi) > + GL_ARB_texture_gatherDONE (i965, nv50, > nvc0, radeonsi, r600) >GL_ARB_texture_query_lod DONE (i965, nv50, > nvc0, radeonsi) >GL_ARB_transform_feedback2 DONE (i965, nv50, > nvc0, r600, radeonsi) >GL_ARB_transform_feedback3 DONE (i965, nv50, > nvc0, r600, radeonsi) > diff --git a/docs/relnotes/10.3.html b/docs/relnotes/10.3.html > index 2e718fc..1c0fab6 100644 > --- a/docs/relnotes/10.3.html > +++ b/docs/relnotes/10.3.html > @@ -49,7 +49,7 @@ Note: some of the new features are only available with > certain drivers. > GL_ARB_sample_shading on radeonsi > GL_ARB_stencil_texturing on nv50, nvc0, r600, and radeonsi > GL_ARB_texture_cube_map_array on radeonsi > -GL_ARB_texture_gather on radeonsi > +GL_ARB_texture_gather on radeonsi, r600 > GL_ARB_texture_query_levels on nv50, nvc0, llvmpipe, r600, radeonsi, > softpipe > GL_ARB_texture_query_lod on radeonsi > GL_ARB_viewport_array on nvc0 > diff --git a/src/gallium/drivers/r600/r600_pipe.c > b/src/gallium/drivers/r600/r600_pipe.c > index ca6399f..5bf9c00 100644 > --- a/src/gallium/drivers/r600/r600_pipe.c > +++ b/src/gallium/drivers/r600/r600_pipe.c > @@ -303,6 +303,7 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE: > case PIPE_CAP_CUBE_MAP_ARRAY: > case PIPE_CAP_TGSI_VS_LAYER_VIEWPORT: > + case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > return family >= CHIP_CEDAR ? 1 : 0; > > /* Unsupported features. */ > @@ -312,7 +313,6 @@ static int r600_get_param(struct pipe_screen* pscreen, > enum pipe_cap param) > case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: > case PIPE_CAP_VERTEX_COLOR_CLAMPED: > case PIPE_CAP_USER_VERTEX_BUFFERS: > - case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: > case PIPE_CAP_TEXTURE_GATHER_SM5: > case PIPE_CAP_TEXTURE_QUERY_LOD: > case PIPE_CAP_SAMPLE_SHADING: > diff --git a/src/gallium/drivers/r600/r600_shader.c > b/src/gallium/drivers/r600/r600_shader.c > index 6952e3c..db928f3 100644 > --- a/src/gallium/drivers/r600/r600_shader.c > +++ b/src/gallium/drivers/r600/r600_shader.c > @@ -4477,7 +4477,8 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > > if (inst->Instruction.Opcode == TGSI_OPCODE_TEX2 || > inst->Instruction.Opcode == TGSI_OPCODE_TXB2 || > - inst->Instruction.Opcode == TGSI_OPCODE_TXL2) > + inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || > + inst->Instruction.Opcode == TGSI_OPCODE_TG4) > sampler_src_reg = 2; > > src_gpr = tgsi_tex_get_src_gpr(ctx, 0); > @@ -5079,6 +5080,13 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > case FETCH_OP_SAMPLE_G: > opcode = FETCH_OP_SAMPLE_C_G; > break; > + /* Texture gather variants */ > + case FETCH_OP_GATHER4: > + tex.op = FETCH_OP_GATHER4_C; > + break; > + case FETCH_OP_GATHER4_O: > + tex.op = FETCH_OP_GATHER4_C_O; > + break; > } > } > > @@ -5089,9 +5097,21 @@ static int tgsi_tex(struct r600_shader_ctx *ctx) > tex.resource_id = tex.sampler_id + R600_MAX_CONST_BUFFERS; > tex.src_gpr = src_gpr; > tex.dst_gpr = ctx->file_offset[inst->Dst[0].Register.File] + > inst->Dst[0].Register.Index; > -
Re: [Mesa-dev] [PATCH 2/2] Add support for RGBA8 and RGBX8 textures in intel_texsubimage_tiled_memcpy
Both patches are Reviewed-by: Chad Versace ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/3] gallium: Add a uif() helper function to complement fui()
On 18.07.2014 08:56, Eric Anholt wrote: > I found myself often wanting this when I'm printing out a uint32_t mapping > of some GPU data, and I want to put in an interpretation of that value as > a float. > --- > src/gallium/auxiliary/util/u_math.h | 8 > 1 file changed, 8 insertions(+) > > diff --git a/src/gallium/auxiliary/util/u_math.h > b/src/gallium/auxiliary/util/u_math.h > index b9ed197..68ca3f2 100644 > --- a/src/gallium/auxiliary/util/u_math.h > +++ b/src/gallium/auxiliary/util/u_math.h > @@ -616,6 +616,14 @@ fui( float f ) > return fi.ui; > } > > +static INLINE float > +uif(uint32_t ui) > +{ > +union fi fi; > +fi.ui = ui; > +return fi.f; > +} > + > > /** > * Convert ubyte to float in [0, 1]. > Reviewed-by: Michel Dänzer The GBM changes look good to me as well. -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.
Hi Kenneth, Ian, Could you look through my patch? Regards, Pavel -Original Message- From: Popov, Pavel E Sent: Thursday, July 17, 2014 10:21 PM To: mesa-dev@lists.freedesktop.org Cc: Popov, Pavel E Subject: [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer. According to spec (OpenGL 4.0 specification, pages 254-255) we have a different bits set for one buffer and for multiple buffers. For glDrawBuffer we may have up to four bits set but for glDrawBuffers we can only have one bit set. The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL arguments when _mesa_update_framebuffer or _mesa_update_draw_buffers is called. In this situation realization for glDrawBuffers is used for any number of buffers. Even for one. But glDrawBuffer have to be used for one buffer instead of glDrawBuffers. Piglit test 'gl30basic' fails with assert with debug Mesa and pass with release 'main/buffers.c:520: _mesa_drawbuffers: Assertion `__builtin_popcount(destMask[buf]) == 1' failed.' Probably some other tests also can be affected. Signed-off-by: Pavel Popov --- src/mesa/main/buffers.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c index b13a7af..a640360 100644 --- a/src/mesa/main/buffers.c +++ b/src/mesa/main/buffers.c @@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, struct gl_framebuffer *fb = ctx->DrawBuffer; GLbitfield mask[MAX_DRAW_BUFFERS]; GLuint buf; + GLuint m = n; if (!destMask) { /* compute destMask values now */ @@ -489,15 +490,17 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const GLenum *buffers, mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]); ASSERT(mask[output] != BAD_MASK); mask[output] &= supportedMask; + if (mask[output] == 0) +m--; } destMask = mask; } /* -* If n==1, destMask[0] may have up to four bits set. +* If m==1, destMask[0] may have up to four bits set. * Otherwise, destMask[x] can only have one bit set. */ - if (n == 1) { + if (m == 1) { GLuint count = 0, destMask0 = destMask[0]; while (destMask0) { GLint bufIndex = ffs(destMask0) - 1; -- 1.9.1 Closed Joint Stock Company Intel A/O Registered legal address: Krylatsky Hills Business Park, 17 Krylatskaya Str., Bldg 4, Moscow 121614, Russian Federation This e-mail and any attachments may contain confidential material for the sole use of the intended recipient(s). Any review or distribution by others is strictly prohibited. If you are not the intended recipient, please contact the sender and delete all copies. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 36414] Screen mess "white smoke" when running vdrift
https://bugs.freedesktop.org/show_bug.cgi?id=36414 meng changed: What|Removed |Added Status|RESOLVED|VERIFIED -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 36414] Screen mess "white smoke" when running vdrift
https://bugs.freedesktop.org/show_bug.cgi?id=36414 --- Comment #12 from meng --- As the issue is application's bug, verified it. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] [RFC] r600g/compute: Adding support for defragmenting compute_memory_pool
On Wed, Jul 16, 2014 at 11:12:42PM +0200, Bruno Jiménez wrote: > Hi, > > This series finally adds support for defragmenting the pool for > OpenCL buffers in the r600g driver. It is mostly a rewritten of > the series that I wrote some months ago. > > For defragmenting the pool I have thought of two different > possibilities: > > - Creating a new pool and moving every item here in the correct > position. This has the advantage of being very simple to > implement and that it allows the pool to be grown at the > same time. But it has a couple of problems, namely that it > has a high memory peak usage (sum of current pool + new pool) > and that in the case of having a pool not very fragmented you > have to copy every item to its new place. > - Using the same pool by moving the items in it. This has the > advantage of using less memory (sum of current pool + biggest > item in it) and that it is easier to handle the case of > only having few elements out of place. The disadvantages > are that it doesn't allow growing the pool at the same time > and that it may involve twice the number of item-copies in > the worst case. > > I have chosen to implement the second option, but if you think > that it is better the first one I can rewrite the series for it. > (^_^) > > The worst case I have mentioned is this: Imagine that you have > a series of items in which the first is, at least, 1 'unit' > smaller than the rest. You now free this item and create a new > one with the same size [why would anyone do this? I don't know] > For now, the defragmenter code is so dumb that it will move > every item to the front of the pool without trying first to > put this new item in the available space. > > Hopefully situations like this won't be very common. > > If you want me to explain any detail about any of the patches > just ask. And as said, if you prefer the first version of the > defragmenter, just ask. [In fact, after having written this, > I may add it for the case grow+defrag] > > Also, no regressions found in piglit. > > Thanks in advance! > Bruno > > Bruno Jiménez (5): > r600g/compute: Add a function for moving items in the pool > r600g/compute: Add a function for defragmenting the pool > r600g/compute: Defrag the pool if it's necesary > r600g/compute: Quick exit if there's nothing to add to the pool > r600g/compute: Remove unneeded code from compute_memory_promote_item > > src/gallium/drivers/r600/compute_memory_pool.c | 196 > ++--- > src/gallium/drivers/r600/compute_memory_pool.h | 13 +- > 2 files changed, 156 insertions(+), 53 deletions(-) Hi, A took a brief look at these patches and they look pretty good. I will look at them again tomorrow and then commit if I don't see any issues. -Tom > > -- > 2.0.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 79949] [DRI3] GTK+ Programs Not Updating Correctly
https://bugs.freedesktop.org/show_bug.cgi?id=79949 --- Comment #8 from Joseph Booker --- Thanks. It doesn't seem to address this. With xf86-video-intel 2.99.912, the same problem shows with mesa 10.2.3 and mesa master branch (on commit f14d217f5c72651b9f24a83b11ace16837db4603 ). If it helps, the screen flickers between old/new content occurs when I switch tabs to a tab already loaded, or when I alternate page up/down (so a section of the page recently loaded is reshown). Also, it stops (and doesn't seem to start) when I scroll with my touchpad or use the arrow keys. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT
On 17.07.2014 19:09, Christian König wrote: > Am 17.07.2014 12:01, schrieb Michel Dänzer: >> In order to try and improve X(Shm)PutImage performance with glamor, I >> implemented support for write-combined CPU mappings of BOs in GTT. >> >> This did provide a nice speedup, but to my surprise, using VRAM instead >> of write-combined GTT turned out to be even faster in general on my >> Kaveri machine, both for the internal GPU and for discrete GPUs. >> >> However, I've kept the changes from GTT to VRAM separated, in case this >> turns out to be a loss on other setups. >> >> Kernel patches: >> >> [PATCH 1/5] drm/radeon: Remove radeon_gart_restore() >> [PATCH 2/5] drm/radeon: Pass GART page flags to >> [PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in >> [PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and > > Those four are Reviewed-by: Christian König Thanks! >> [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI > > I'm still not very keen with this change since I still don't understand > the reason why it's faster than with GTT. Definitely needs more testing > on a wider range of systems. Sure. If anyone wants to give this patch a spin and see if they can measure any performance difference, good or bad, that would be interesting. > Maybe limit it to APUs for now? But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even bigger win with dedicated GPUs than with the Kaveri built-in GPU on my system. I suspect it may depend on the bandwidth available for PCIe vs. system memory though. -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings
On 17.07.2014 21:00, Marek Olšák wrote: > On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer wrote: >> From: Michel Dänzer >> >> This is hopefully safe: The kernel makes sure writes to these mappings >> finish before the GPU might start reading from them, and the GPU caches >> are invalidated at the start of a command stream. >> > The resource flags actually tell you what you can do. If the COHERENT > flag is set, the mapping must be cached. Why is that required? As I explain above, we should satisfy the requirements of the ARB_buffer_storage extension AFAICT. As pointed out by you and Grigori in other posts, I should probably just drop the special treatment of persistent mappings though, so the placement and flags are derived from the buffer usage. -- Earthling Michel Dänzer| http://www.amd.com Libre software enthusiast |Mesa and X developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965: Set minimum point size to 1.0 for non-sprite non-aa points
Hi Ian, Looks like this patch is not reasonable for Mesa which now concentrates on OpenGL 3.3 Core. Am I right? I prepared this patch to pass Piglit test 'spec_OpenGL_2.0_vs-point_size-zero' which was created for earlier versions of OpenGL. Also I found these analogous patches for 'svga', 'r300g' and 'r600g' implementations from Marek: 2012-01-30 r600g: set minimum point size to 1.0 for non-sprite non-aa points Marek Olšák 2012-01-30 r300g: set minimum point size to 1.0 for non-sprite non-aa points Marek Olšák 2012-01-30 svga: set POINTSIZEMIN to 1.0 for non-sprite non-aa points Marek Olšák Regards, Pavel -Original Message- From: Popov, Pavel E Sent: Wednesday, June 04, 2014 7:34 PM To: mesa-dev@lists.freedesktop.org Cc: Popov, Pavel E Subject: [PATCH] i965: Set minimum point size to 1.0 for non-sprite non-aa points Both point size states were covered (glPointSize function and gl_PointSize value). Piglit test 'spec_OpenGL_2.0_vs-point_size-zero' which uses gl_PointSize value in a shader passes. Modification of this test which uses glPointSize function also passes. Signed-off-by: Pavel Popov --- src/mesa/drivers/dri/i965/gen6_clip_state.c | 3 ++- src/mesa/drivers/dri/i965/gen6_sf_state.c | 6 -- src/mesa/drivers/dri/i965/gen7_sf_state.c | 6 -- src/mesa/drivers/dri/i965/gen8_sf_state.c | 6 -- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 0ba190e..cf11331 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -128,7 +128,8 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_MODE_NORMAL | GEN6_CLIP_XY_TEST | dw2); - OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | + /* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 3.0 and earlier) */ + OUT_BATCH(U_FIXED(!(ctx->Point.SmoothFlag || ctx->Point.PointSprite) + ? 1.0 : 0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | (fb->MaxNumLayers > 0 ? 0 : GEN6_CLIP_FORCE_ZERO_RTAINDEX) | ((ctx->Const.MaxViewports - 1) & GEN6_CLIP_MAX_VP_INDEX_MASK)); diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c b/src/mesa/drivers/dri/i965/gen6_sf_state.c index ec14be4..b8d66ab 100644 --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c @@ -346,8 +346,10 @@ upload_sf_state(struct brw_context *brw) /* Clamp to ARB_point_parameters user limits */ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - /* Clamp to the hardware limits and convert to fixed point */ - dw4 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + /* Clamp to the hardware limits and convert to fixed point. +* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 3.0 and earlier). +*/ + dw4 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || + ctx->Point.PointSprite) ? 1.0 : 0.125, 255.875), 3); /* * Window coordinates in an FBO are inverted, which means point diff --git a/src/mesa/drivers/dri/i965/gen7_sf_state.c b/src/mesa/drivers/dri/i965/gen7_sf_state.c index 7fe1435..a192dcb 100644 --- a/src/mesa/drivers/dri/i965/gen7_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sf_state.c @@ -220,8 +220,10 @@ upload_sf_state(struct brw_context *brw) /* Clamp to ARB_point_parameters user limits */ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + /* Clamp to the hardware limits and convert to fixed point. +* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 3.0 and earlier). +*/ + dw3 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || + ctx->Point.PointSprite) ? 1.0 : 0.125, 255.875), 3); /* _NEW_LIGHT */ if (ctx->Light.ProvokingVertex != GL_FIRST_VERTEX_CONVENTION) { diff --git a/src/mesa/drivers/dri/i965/gen8_sf_state.c b/src/mesa/drivers/dri/i965/gen8_sf_state.c index 0a69e70..d130a00 100644 --- a/src/mesa/drivers/dri/i965/gen8_sf_state.c +++ b/src/mesa/drivers/dri/i965/gen8_sf_state.c @@ -132,8 +132,10 @@ upload_sf(struct brw_context *brw) /* Clamp to ARB_point_parameters user limits */ point_size = CLAMP(ctx->Point.Size, ctx->Point.MinSize, ctx->Point.MaxSize); - /* Clamp to the hardware limits and convert to fixed point */ - dw3 |= U_FIXED(CLAMP(point_size, 0.125, 255.875), 3); + /* Clamp to the hardware limits and convert to fixed point. +* Use min size 1.0 if antialiasing and point sprites are disabled (OpenGL 3.0 and earlier). +*/ + dw3 |= U_FIXED(CLAMP(point_size, !(ctx->Point.SmoothFlag || + ctx->Point.PointSpr