Re: [Mesa-dev] [PATCH 5/5] i965 gen7: add support for layered color renderbuffers
On Sat, May 18, 2013 at 10:11 AM, Jordan Justen wrote: > Rather than pointing the surface_state directly at a single > sub-image of the texture for rendering, we now point the > surface_state at the top level of the texture, and configure > the surface_state as needed based on this. > > We now also need to stop setting the FORCE_ZERO_RTAINDEX bit > in the clip date so render target array values other than zero > will be used. > > Signed-off-by: Jordan Justen > --- > src/mesa/drivers/dri/i965/brw_defines.h |2 + > src/mesa/drivers/dri/i965/gen7_clip_state.c |3 +- > src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 63 > +++-- > 3 files changed, 48 insertions(+), 20 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_defines.h > b/src/mesa/drivers/dri/i965/brw_defines.h > index fedd78c..d61151f 100644 > --- a/src/mesa/drivers/dri/i965/brw_defines.h > +++ b/src/mesa/drivers/dri/i965/brw_defines.h > @@ -539,6 +539,8 @@ > #define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) > #define GEN7_SURFACE_MSFMT_MSS (0 << 6) > #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL(1 << 6) > +#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 18 > +#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 7 > > /* Surface state DW5 */ > #define BRW_SURFACE_X_OFFSET_SHIFT 25 > diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c > b/src/mesa/drivers/dri/i965/gen7_clip_state.c > index 29a5ed5..1256f32 100644 > --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c > @@ -107,8 +107,7 @@ upload_clip_state(struct brw_context *brw) > GEN6_CLIP_XY_TEST | > dw2); > OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | > - U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | > - GEN6_CLIP_FORCE_ZERO_RTAINDEX); > + U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT); > ADVANCE_BATCH(); > } > > diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > index 6c01545..5f15eff 100644 > --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c > @@ -23,6 +23,7 @@ > #include "main/mtypes.h" > #include "main/blend.h" > #include "main/samplerobj.h" > +#include "main/texformat.h" > #include "program/prog_parameter.h" > > #include "intel_mipmap_tree.h" > @@ -529,12 +530,13 @@ gen7_update_renderbuffer_surface(struct brw_context > *brw, > struct gl_context *ctx = &intel->ctx; > struct intel_renderbuffer *irb = intel_renderbuffer(rb); > struct intel_region *region = irb->mt->region; > - uint32_t tile_x, tile_y; > uint32_t format; > /* _NEW_BUFFERS */ > gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); > - > - assert(!layered); > + uint32_t surftype; > + bool is_array = false; > + int depth = rb->Depth > 0 ? rb->Depth - 1 : 0; > + int min_array_element = 0; > > uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, > 8 * 4, 32, &brw->wm.surf_offset[unit]); > @@ -550,7 +552,23 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, > __FUNCTION__, _mesa_get_format_name(rb_format)); > } > > - surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | > + if (rb->TexImage) { > + surftype = translate_tex_target(rb->TexImage->TexObject->Target); > + is_array = _mesa_tex_target_is_array(rb->TexImage->TexObject->Target); > + if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { > + assert(rb->Depth > 0); > + surftype = BRW_SURFACE_2D; > + depth = (6 * (depth + 1)) - 1; > + } else if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) { > + surftype = BRW_SURFACE_2D; > + depth = 5; > + is_array = true; > + } > + } else { > + surftype = BRW_SURFACE_2D; > + } > + > + surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT | > format << BRW_SURFACE_FORMAT_SHIFT | > (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0 >: GEN7_SURFACE_ARYSPC_FULL) | > @@ -561,24 +579,33 @@ gen7_update_renderbuffer_surface(struct brw_context > *brw, > if (irb->mt->align_w == 8) >surf[0] |= GEN7_SURFACE_HALIGN_8; > > - /* reloc */ > - surf[1] = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) + > - region->bo->offset; /* reloc */ > + if (is_array) { > + surf[0] |= GEN7_SURFACE_IS_ARRAY; > + } > + > + if (!layered) { > + if (irb->mt->num_samples > 1) { > + min_array_element = irb->mt_layer / irb->mt->num_samples; > + } else { > + min_array_element = irb->mt_layer; > + } > + } > + > + surf[1] = region->bo->offset; > > assert(brw->has_surface_tile_offset)
Re: [Mesa-dev] [PATCH 2/2] r600g/compute: Use common transfer_{map, unmap} functions for global resources
On Sat, May 18, 2013 at 2:17 AM, Tom Stellard wrote: > From: Tom Stellard > > --- > src/gallium/drivers/r600/evergreen_compute.c | 68 > ++-- > 1 file changed, 24 insertions(+), 44 deletions(-) > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c > b/src/gallium/drivers/r600/evergreen_compute.c > index 4d490c4..6d26b8e 100644 > --- a/src/gallium/drivers/r600/evergreen_compute.c > +++ b/src/gallium/drivers/r600/evergreen_compute.c > @@ -903,67 +903,47 @@ void *r600_compute_global_transfer_map( > { > struct r600_context *rctx = (struct r600_context*)ctx_; > struct compute_memory_pool *pool = rctx->screen->global_pool; > - struct pipe_transfer *transfer = > util_slab_alloc(&rctx->pool_transfers); > struct r600_resource_global* buffer = > (struct r600_resource_global*)resource; > - uint32_t* map; > > - compute_memory_finalize_pending(pool, ctx_); > - > - assert(resource->target == PIPE_BUFFER); > - > - COMPUTE_DBG(rctx->screen, "* r600_compute_global_get_transfer()\n" > + COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" > "level = %u, usage = %u, box(x = %u, y = %u, z = %u " > "width = %u, height = %u, depth = %u)\n", level, > usage, > box->x, box->y, box->z, box->width, box->height, > box->depth); > + COMPUTE_DBG(rctx->screen, "Buffer: %u (buffer offset in global > memory) " > + "+ %u (box.x)\n", buffer->chunk->start_in_dw, box->x); > > - transfer->resource = resource; > - transfer->level = level; > - transfer->usage = usage; > - transfer->box = *box; > - transfer->stride = 0; > - transfer->layer_stride = 0; > - > - assert(transfer->resource->target == PIPE_BUFFER); > - assert(transfer->resource->bind & PIPE_BIND_GLOBAL); > - assert(transfer->box.x >= 0); > - assert(transfer->box.y == 0); > - assert(transfer->box.z == 0); > > - ///TODO: do it better, mapping is not possible if the pool is too big > - > - COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"); > - > - if (!(map = r600_buffer_mmap_sync_with_rings(rctx, > buffer->chunk->pool->bo, transfer->usage))) { > - util_slab_free(&rctx->pool_transfers, transfer); > - return NULL; > - } > + compute_memory_finalize_pending(pool, ctx_); > > - *ptransfer = transfer; > + assert(resource->target == PIPE_BUFFER); > + assert(resource->bind & PIPE_BIND_GLOBAL); > + assert(box->x >= 0); > + assert(box->y == 0); > + assert(box->z == 0); > > - COMPUTE_DBG(rctx->screen, "Buffer: %p + %u (buffer offset in global > memory) " > - "+ %u (box.x)\n", map, buffer->chunk->start_in_dw, > transfer->box.x); > - return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x; > + ///TODO: do it better, mapping is not possible if the pool is too big > + return pipe_buffer_map_range(ctx_, (struct > pipe_resource*)buffer->chunk->pool->bo, > + box->x + (buffer->chunk->start_in_dw * 4), > + box->width, usage, ptransfer); > } > > void r600_compute_global_transfer_unmap( > struct pipe_context *ctx_, > struct pipe_transfer* transfer) > { > - struct r600_context *ctx = NULL; > - struct r600_resource_global* buffer = NULL; > - > - assert(transfer->resource->target == PIPE_BUFFER); > - assert(transfer->resource->bind & PIPE_BIND_GLOBAL); > - > - ctx = (struct r600_context *)ctx_; > - buffer = (struct r600_resource_global*)transfer->resource; > - > - COMPUTE_DBG(ctx->screen, "* r600_compute_global_transfer_unmap()\n"); > - > - ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf); > - util_slab_free(&ctx->pool_transfers, transfer); > + /* struct r600_resource_global are not real resources, they just map > +* to an offset within the compute memory pool. The function > +* r600_compute_global_transfer_map() maps the memory pool > +* resource rather than the struct r600_resource_global passed to > +* it as an argument and then initalizes ptransfer->resource with > +* the memory pool resource (via pipe_buffer_map_range). > +* When transfer_unamp is called it uses the memory pool's *unmap For the series: Reviewed-by: Marek Olšák Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64730] [llvmpipe] piglit array-texture regression
https://bugs.freedesktop.org/show_bug.cgi?id=64730 Roland Scheidegger changed: What|Removed |Added Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #1 from Roland Scheidegger --- The workaround was bogus since it didn't only prevent the wrong first_layer rebase (which wasn't an issue for opengl) but had the effect of not assigning depth correctly (so was always one, effectively reducing all array textures to non-array ones). This is however already fixed by 87978518e996d02e055174d7152fff150fe3cd13. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64730] New: [llvmpipe] piglit array-texture regression
https://bugs.freedesktop.org/show_bug.cgi?id=64730 Priority: medium Bug ID: 64730 Keywords: regression CC: jfons...@vmware.com Assignee: mesa-dev@lists.freedesktop.org Summary: [llvmpipe] piglit array-texture regression Severity: normal Classification: Unclassified OS: Linux (All) Reporter: v...@freedesktop.org Hardware: x86-64 (AMD64) Status: NEW Version: git Component: Other Product: Mesa mesa: 46ea8041074df79561f9771e2ecf198f2cbd088f (master) $ ./bin/array-texture -auto Probe at (150,50) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 1 Probe at (250,50) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 2 Probe at (350,50) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 3 Probe at (450,50) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 4 Probe at (550,50) Expected: 1.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 5 Probe at (650,50) Expected: 1.00 1.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 6 Probe at (150,150) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 1 Probe at (250,150) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 2 Probe at (350,150) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 3 Probe at (450,150) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 4 Probe at (550,150) Expected: 1.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 5 Probe at (650,150) Expected: 1.00 1.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 2D image/slice 6 Probe at (150,250) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 1 Probe at (250,250) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 2 Probe at (350,250) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 3 Probe at (450,250) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 4 Probe at (550,250) Expected: 1.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 5 Probe at (650,250) Expected: 1.00 1.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 6 Probe at (150,350) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 1 Probe at (250,350) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 2 Probe at (350,350) Expected: 0.00 0.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 3 Probe at (450,350) Expected: 0.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 4 Probe at (550,350) Expected: 1.00 1.00 0.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 5 Probe at (650,350) Expected: 1.00 1.00 1.00 Observed: 1.00 0.00 0.00 array-texture: failed for 1D image/slice 6 PIGLIT: {'result': 'fail' } 4f518e173847e8538bb4f0f9216e3f6417853d7a is the first bad commit commit 4f518e173847e8538bb4f0f9216e3f6417853d7a Author: José Fonseca Date: Thu May 16 15:13:51 2013 +0100 llvmpipe: Temporary workaround to prevent segfault on array textures. :04 04 98a1ae9d47b556629ed257353cd5c93f6d660517 3a99f61bbde526d04c60d10629f5801e05e8d235 Msrc bisect run success -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync
https://bugs.freedesktop.org/show_bug.cgi?id=64649 --- Comment #7 from romula...@gmail.com --- (In reply to comment #4) >Does it look like agraceful exit or a segfault? No segfault so a graceful exit. -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync
https://bugs.freedesktop.org/show_bug.cgi?id=64649 --- Comment #6 from romula...@gmail.com --- Created attachment 79488 --> https://bugs.freedesktop.org/attachment.cgi?id=79488&action=edit full steam logfile -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync
https://bugs.freedesktop.org/show_bug.cgi?id=64649 --- Comment #5 from romula...@gmail.com --- 01:00.0 VGA compatible controller: Advanced Micro Devices [AMD] nee ATI RV770 [Radeon HD 4870] -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] mesa/texformat: add _mesa_tex_target_is_array function
Signed-off-by: Jordan Justen --- src/mesa/main/texformat.c | 13 + src/mesa/main/texformat.h |2 ++ 2 files changed, 15 insertions(+) diff --git a/src/mesa/main/texformat.c b/src/mesa/main/texformat.c index ed40b7e..a7df868 100644 --- a/src/mesa/main/texformat.c +++ b/src/mesa/main/texformat.c @@ -929,3 +929,16 @@ _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, return MESA_FORMAT_NONE; } +GLboolean +_mesa_tex_target_is_array(GLenum target) +{ + switch (target) { + case GL_TEXTURE_1D_ARRAY_EXT: + case GL_TEXTURE_2D_ARRAY_EXT: + case GL_TEXTURE_2D_MULTISAMPLE_ARRAY: + case GL_TEXTURE_CUBE_MAP_ARRAY: + return GL_TRUE; + default: + return GL_FALSE; + } +} diff --git a/src/mesa/main/texformat.h b/src/mesa/main/texformat.h index efe2699..d6ff541 100644 --- a/src/mesa/main/texformat.h +++ b/src/mesa/main/texformat.h @@ -36,5 +36,7 @@ extern gl_format _mesa_choose_tex_format(struct gl_context *ctx, GLenum target, GLint internalFormat, GLenum format, GLenum type); +extern GLboolean +_mesa_tex_target_is_array(GLenum target); #endif -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] i965 gen7: add support for layered color renderbuffers
Rather than pointing the surface_state directly at a single sub-image of the texture for rendering, we now point the surface_state at the top level of the texture, and configure the surface_state as needed based on this. We now also need to stop setting the FORCE_ZERO_RTAINDEX bit in the clip date so render target array values other than zero will be used. Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_defines.h |2 + src/mesa/drivers/dri/i965/gen7_clip_state.c |3 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 63 +++-- 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index fedd78c..d61151f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -539,6 +539,8 @@ #define GEN7_SURFACE_MULTISAMPLECOUNT_8 (3 << 3) #define GEN7_SURFACE_MSFMT_MSS (0 << 6) #define GEN7_SURFACE_MSFMT_DEPTH_STENCIL(1 << 6) +#define GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT 18 +#define GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT 7 /* Surface state DW5 */ #define BRW_SURFACE_X_OFFSET_SHIFT 25 diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 29a5ed5..1256f32 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -107,8 +107,7 @@ upload_clip_state(struct brw_context *brw) GEN6_CLIP_XY_TEST | dw2); OUT_BATCH(U_FIXED(0.125, 3) << GEN6_CLIP_MIN_POINT_WIDTH_SHIFT | - U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT | - GEN6_CLIP_FORCE_ZERO_RTAINDEX); + U_FIXED(255.875, 3) << GEN6_CLIP_MAX_POINT_WIDTH_SHIFT); ADVANCE_BATCH(); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 6c01545..5f15eff 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -23,6 +23,7 @@ #include "main/mtypes.h" #include "main/blend.h" #include "main/samplerobj.h" +#include "main/texformat.h" #include "program/prog_parameter.h" #include "intel_mipmap_tree.h" @@ -529,12 +530,13 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, struct gl_context *ctx = &intel->ctx; struct intel_renderbuffer *irb = intel_renderbuffer(rb); struct intel_region *region = irb->mt->region; - uint32_t tile_x, tile_y; uint32_t format; /* _NEW_BUFFERS */ gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); - - assert(!layered); + uint32_t surftype; + bool is_array = false; + int depth = rb->Depth > 0 ? rb->Depth - 1 : 0; + int min_array_element = 0; uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &brw->wm.surf_offset[unit]); @@ -550,7 +552,23 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, __FUNCTION__, _mesa_get_format_name(rb_format)); } - surf[0] = BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT | + if (rb->TexImage) { + surftype = translate_tex_target(rb->TexImage->TexObject->Target); + is_array = _mesa_tex_target_is_array(rb->TexImage->TexObject->Target); + if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP_ARRAY) { + assert(rb->Depth > 0); + surftype = BRW_SURFACE_2D; + depth = (6 * (depth + 1)) - 1; + } else if (rb->TexImage->TexObject->Target == GL_TEXTURE_CUBE_MAP) { + surftype = BRW_SURFACE_2D; + depth = 5; + is_array = true; + } + } else { + surftype = BRW_SURFACE_2D; + } + + surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT | format << BRW_SURFACE_FORMAT_SHIFT | (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL) | @@ -561,24 +579,33 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, if (irb->mt->align_w == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; - /* reloc */ - surf[1] = intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y) + - region->bo->offset; /* reloc */ + if (is_array) { + surf[0] |= GEN7_SURFACE_IS_ARRAY; + } + + if (!layered) { + if (irb->mt->num_samples > 1) { + min_array_element = irb->mt_layer / irb->mt->num_samples; + } else { + min_array_element = irb->mt_layer; + } + } + + surf[1] = region->bo->offset; assert(brw->has_surface_tile_offset); - /* Note that the low bits of these fields are missing, so -* there's the possibility of getting in trouble. -*/ - assert(tile_x % 4 == 0); - assert(tile_y % 2 == 0); - surf[5] = SET_FIELD(tile_x / 4, BRW_SURFACE_X_OFFSET) | - SET_FIELD(tile_y / 2, BRW_SURFACE_
[Mesa-dev] [PATCH 2/5] intel_fbo: set gl_renderbuffer Depth field
Set the renderbuffer's Depth field to match the texture's Depth when rendering to a texture. Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/intel/intel_fbo.c |9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index a8a7ab3..243c00a 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -482,14 +482,17 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx, static bool intel_renderbuffer_update_wrapper(struct intel_context *intel, struct intel_renderbuffer *irb, - struct gl_texture_image *image, - uint32_t layer) + struct gl_texture_image *image, + uint32_t layer, + bool layered) { struct gl_renderbuffer *rb = &irb->Base.Base; struct intel_texture_image *intel_image = intel_texture_image(image); struct intel_mipmap_tree *mt = intel_image->mt; int level = image->Level; + rb->Depth = image->Depth; + rb->AllocStorage = intel_nop_alloc_storage; intel_miptree_check_level_layer(mt, level, layer); @@ -598,7 +601,7 @@ intel_render_texture(struct gl_context * ctx, intel_miptree_check_level_layer(mt, att->TextureLevel, layer); - if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) { + if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer, att->Layered)) { _swrast_render_texture(ctx, fb, att); return; } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] intel: add layered parameter to update_renderbuffer_surface
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c |6 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |3 +++ src/mesa/drivers/dri/intel/intel_context.h|1 + 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index bbe8579..efc15e9 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1315,6 +1315,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit) static void brw_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, + bool layered, unsigned int unit) { struct intel_context *intel = &brw->intel; @@ -1328,6 +1329,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw, /* _NEW_BUFFERS */ gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); + assert(!layered); + if (rb->TexImage && !brw->has_surface_tile_offset) { intel_renderbuffer_tile_offsets(irb, &tile_x, &tile_y); @@ -1424,7 +1427,8 @@ brw_update_renderbuffer_surfaces(struct brw_context *brw) if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) { for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) { if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) { - intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], i); + intel->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i], + ctx->DrawBuffer->Layered, i); } else { intel->vtbl.update_null_renderbuffer_surface(brw, i); } diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 435f9dc..6c01545 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -522,6 +522,7 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit) static void gen7_update_renderbuffer_surface(struct brw_context *brw, struct gl_renderbuffer *rb, +bool layered, unsigned int unit) { struct intel_context *intel = &brw->intel; @@ -533,6 +534,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, /* _NEW_BUFFERS */ gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb)); + assert(!layered); + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 8 * 4, 32, &brw->wm.surf_offset[unit]); memset(surf, 0, 8 * 4); diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h index c0f07ff..5420e76 100644 --- a/src/mesa/drivers/dri/intel/intel_context.h +++ b/src/mesa/drivers/dri/intel/intel_context.h @@ -196,6 +196,7 @@ struct intel_context unsigned surf_index); void (*update_renderbuffer_surface)(struct brw_context *brw, struct gl_renderbuffer *rb, + bool layered, unsigned unit); void (*update_null_renderbuffer_surface)(struct brw_context *brw, unsigned unit); -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] intel: print image depth in debug message
Signed-off-by: Jordan Justen --- src/mesa/drivers/dri/intel/intel_fbo.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_fbo.c b/src/mesa/drivers/dri/intel/intel_fbo.c index 69f8629..a8a7ab3 100644 --- a/src/mesa/drivers/dri/intel/intel_fbo.c +++ b/src/mesa/drivers/dri/intel/intel_fbo.c @@ -603,9 +603,9 @@ intel_render_texture(struct gl_context * ctx, return; } - DBG("Begin render %s texture tex=%u w=%d h=%d refcount=%d\n", + DBG("Begin render %s texture tex=%u w=%d h=%d d=%d refcount=%d\n", _mesa_get_format_name(image->TexFormat), - att->Texture->Name, image->Width, image->Height, + att->Texture->Name, image->Width, image->Height, image->Depth, rb->RefCount); /* update drawing region, etc */ -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 0/5] Gen7 Layered Color Renderbuffer support
git://people.freedesktop.org/~jljusten/mesa ivb-layered-color-renderbuffer-v1 This series updates gen7 to allow layered color render buffers. With these changes we can support the AMD_vertex_shader_layer extension for color renderbuffers. Once depth is also supported, then we can actually enable the AMD_vertex_shader_layer extension. Layered rendering is also required for geometry shader support. Jordan Justen (5): intel: print image depth in debug message intel_fbo: set gl_renderbuffer Depth field intel: add layered parameter to update_renderbuffer_surface mesa/texformat: add _mesa_tex_target_is_array function i965 gen7: add support for layered color renderbuffers src/mesa/drivers/dri/i965/brw_defines.h |2 + src/mesa/drivers/dri/i965/brw_wm_surface_state.c |6 +- src/mesa/drivers/dri/i965/gen7_clip_state.c |3 +- src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 62 +++-- src/mesa/drivers/dri/intel/intel_context.h|1 + src/mesa/drivers/dri/intel/intel_fbo.c| 13 +++-- src/mesa/main/texformat.c | 13 + src/mesa/main/texformat.h |2 + 8 files changed, 78 insertions(+), 24 deletions(-) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] llvmpipe: fix stencil issues
From: Roland Scheidegger Two (somewhat related) issues: 1) We did mask checks between depth/stencil testing and depth/stencil write. This meant that if the depth/stencil test killed off all fragments we never actually wrote the new stencil value. This issue affected all early/late test/write combinations. 2) We actually did early depth/stencil test and late depth/stencil write even when the shader could kill the fragment (alpha test or discard). Since it matters for the new stencil value if the fragment is killed by depth/stencil test or by the shader (in which case it will not reach the depth/stencil test) this simply cannot work. So fix these issues by moving the mask check after depth/stencil write (only for early write it would work for late write too but probably not worth the mask check there) and disable early depth test when it can't work correctly. This addresses https://bugs.freedesktop.org/show_bug.cgi?id=41787 though replaying the trace it still looks somewhat wrong to me, so maybe more bugs... Verified this fixes affected piglit tests (glean stencil2 and some from hiz group) if the simple_shader optimization in generate_fs_loop() is forced to false (otherwise we skip mask checks hence don't hit issue 1 - I don't think there's anything in piglit which would exhibit issue 2). --- src/gallium/drivers/llvmpipe/lp_bld_depth.c | 31 ++- src/gallium/drivers/llvmpipe/lp_state_fs.c | 20 - 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c index 2376ca7..afc2d9d 100644 --- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c +++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c @@ -1096,23 +1096,18 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, stencil_shift, ""); /* Finally, merge the z/stencil values */ - if ((depth->enabled && depth->writemask) || - (stencil[0].enabled && (stencil[0].writemask || - (stencil[1].enabled && stencil[1].writemask { - - if (format_desc->block.bits <= 32) { - if (have_z && have_s) -*z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); - else if (have_z) -*z_value = z_dst; - else -*z_value = stencil_vals; - *s_value = *z_value; - } - else { + if (format_desc->block.bits <= 32) { + if (have_z && have_s) + *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); + else if (have_z) *z_value = z_dst; - *s_value = stencil_vals; - } + else + *z_value = stencil_vals; + *s_value = *z_value; + } + else { + *z_value = z_dst; + *s_value = stencil_vals; } if (s_pass_mask) @@ -1120,9 +1115,5 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm, if (depth->enabled && stencil[0].enabled) lp_build_mask_update(mask, z_pass); - - if (do_branch) - lp_build_mask_check(mask); - } diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index 1dfc75a..ae63615 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -266,13 +266,20 @@ generate_fs_loop(struct gallivm_state *gallivm, assert(zs_format_desc); if (!shader->info.base.writes_z) { - if (key->alpha.enabled || shader->info.base.uses_kill) + if (key->alpha.enabled || shader->info.base.uses_kill) { /* With alpha test and kill, can do the depth test early * and hopefully eliminate some quads. But need to do a * special deferred depth write once the final mask value - * is known. + * is known. This only works though if there's either no + * stencil test or the stencil value isn't written. */ -depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; +if (key->stencil[0].enabled && (key->stencil[0].writemask || +(key->stencil[1].enabled && + key->stencil[1].writemask))) + depth_mode = LATE_DEPTH_TEST | LATE_DEPTH_WRITE; +else + depth_mode = EARLY_DEPTH_TEST | LATE_DEPTH_WRITE; + } else depth_mode = EARLY_DEPTH_TEST | EARLY_DEPTH_WRITE; } @@ -281,9 +288,9 @@ generate_fs_loop(struct gallivm_state *gallivm, } if (!(key->depth.enabled && key->depth.writemask) && - !((key->stencil[0].enabled && (key->stencil[0].writemask || + !(key->stencil[0].enabled && (key->stencil[0].writemask || (key->stencil[1].enabled && - key->stencil[1].writemask) + key->stencil[1].writ
[Mesa-dev] [PATCH 2/2] r600g/compute: Use common transfer_{map, unmap} functions for global resources
From: Tom Stellard --- src/gallium/drivers/r600/evergreen_compute.c | 68 ++-- 1 file changed, 24 insertions(+), 44 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 4d490c4..6d26b8e 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -903,67 +903,47 @@ void *r600_compute_global_transfer_map( { struct r600_context *rctx = (struct r600_context*)ctx_; struct compute_memory_pool *pool = rctx->screen->global_pool; - struct pipe_transfer *transfer = util_slab_alloc(&rctx->pool_transfers); struct r600_resource_global* buffer = (struct r600_resource_global*)resource; - uint32_t* map; - compute_memory_finalize_pending(pool, ctx_); - - assert(resource->target == PIPE_BUFFER); - - COMPUTE_DBG(rctx->screen, "* r600_compute_global_get_transfer()\n" + COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n" "level = %u, usage = %u, box(x = %u, y = %u, z = %u " "width = %u, height = %u, depth = %u)\n", level, usage, box->x, box->y, box->z, box->width, box->height, box->depth); + COMPUTE_DBG(rctx->screen, "Buffer: %u (buffer offset in global memory) " + "+ %u (box.x)\n", buffer->chunk->start_in_dw, box->x); - transfer->resource = resource; - transfer->level = level; - transfer->usage = usage; - transfer->box = *box; - transfer->stride = 0; - transfer->layer_stride = 0; - - assert(transfer->resource->target == PIPE_BUFFER); - assert(transfer->resource->bind & PIPE_BIND_GLOBAL); - assert(transfer->box.x >= 0); - assert(transfer->box.y == 0); - assert(transfer->box.z == 0); - ///TODO: do it better, mapping is not possible if the pool is too big - - COMPUTE_DBG(rctx->screen, "* r600_compute_global_transfer_map()\n"); - - if (!(map = r600_buffer_mmap_sync_with_rings(rctx, buffer->chunk->pool->bo, transfer->usage))) { - util_slab_free(&rctx->pool_transfers, transfer); - return NULL; - } + compute_memory_finalize_pending(pool, ctx_); - *ptransfer = transfer; + assert(resource->target == PIPE_BUFFER); + assert(resource->bind & PIPE_BIND_GLOBAL); + assert(box->x >= 0); + assert(box->y == 0); + assert(box->z == 0); - COMPUTE_DBG(rctx->screen, "Buffer: %p + %u (buffer offset in global memory) " - "+ %u (box.x)\n", map, buffer->chunk->start_in_dw, transfer->box.x); - return ((char*)(map + buffer->chunk->start_in_dw)) + transfer->box.x; + ///TODO: do it better, mapping is not possible if the pool is too big + return pipe_buffer_map_range(ctx_, (struct pipe_resource*)buffer->chunk->pool->bo, + box->x + (buffer->chunk->start_in_dw * 4), + box->width, usage, ptransfer); } void r600_compute_global_transfer_unmap( struct pipe_context *ctx_, struct pipe_transfer* transfer) { - struct r600_context *ctx = NULL; - struct r600_resource_global* buffer = NULL; - - assert(transfer->resource->target == PIPE_BUFFER); - assert(transfer->resource->bind & PIPE_BIND_GLOBAL); - - ctx = (struct r600_context *)ctx_; - buffer = (struct r600_resource_global*)transfer->resource; - - COMPUTE_DBG(ctx->screen, "* r600_compute_global_transfer_unmap()\n"); - - ctx->ws->buffer_unmap(buffer->chunk->pool->bo->cs_buf); - util_slab_free(&ctx->pool_transfers, transfer); + /* struct r600_resource_global are not real resources, they just map +* to an offset within the compute memory pool. The function +* r600_compute_global_transfer_map() maps the memory pool +* resource rather than the struct r600_resource_global passed to +* it as an argument and then initalizes ptransfer->resource with +* the memory pool resource (via pipe_buffer_map_range). +* When transfer_unamp is called it uses the memory pool's +* vtable which calls r600_buffer_transfer_map() rather than +* this function. +*/ + assert (!"This function should not be called"); } void r600_compute_global_transfer_flush_region( -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] r600g/compute: Use common transfer_{map, unmap} functions for kernel inputs
From: Tom Stellard --- src/gallium/drivers/r600/evergreen_compute.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_compute.c b/src/gallium/drivers/r600/evergreen_compute.c index 5f67759..4d490c4 100644 --- a/src/gallium/drivers/r600/evergreen_compute.c +++ b/src/gallium/drivers/r600/evergreen_compute.c @@ -271,6 +271,8 @@ void evergreen_compute_upload_input( uint32_t * global_size_start; uint32_t * local_size_start; uint32_t * kernel_parameters_start; + struct pipe_box box; + struct pipe_transfer *transfer = NULL; if (shader->input_size == 0) { return; @@ -278,11 +280,16 @@ void evergreen_compute_upload_input( if (!shader->kernel_param) { /* Add space for the grid dimensions */ - shader->kernel_param = r600_compute_buffer_alloc_vram( - ctx->screen, input_size); + shader->kernel_param = (struct r600_resource *) + pipe_buffer_create(ctx_->screen, PIPE_BIND_CUSTOM, + PIPE_USAGE_IMMUTABLE, input_size); } - num_work_groups_start = r600_buffer_mmap_sync_with_rings(ctx, shader->kernel_param, PIPE_TRANSFER_WRITE); + u_box_1d(0, input_size, &box); + num_work_groups_start = ctx_->transfer_map(ctx_, + (struct pipe_resource*)shader->kernel_param, + 0, PIPE_TRANSFER_WRITE | PIPE_TRANSFER_DISCARD_RANGE, + &box, &transfer); global_size_start = num_work_groups_start + (3 * (sizeof(uint) /4)); local_size_start = global_size_start + (3 * (sizeof(uint)) / 4); kernel_parameters_start = local_size_start + (3 * (sizeof(uint)) / 4); @@ -306,7 +313,7 @@ void evergreen_compute_upload_input( ((unsigned*)num_work_groups_start)[i]); } - ctx->ws->buffer_unmap(shader->kernel_param->cs_buf); + ctx_->transfer_unmap(ctx_, transfer); /* ID=0 is reserved for the parameters */ evergreen_cs_set_constant_buffer(ctx, 0, 0, input_size, -- 1.8.1.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.
On Fri, May 17, 2013 at 2:31 PM, Jose Fonseca wrote: > > > - Original Message - >> On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca wrote: >> > Vinson, >> > >> > Why is this necessary? >> > >> > (I'd prefer that LLVM is statically linked by default. ) >> > >> > Jose >> >> The SCons build fails on systems that only provide a LLVM shared >> library. 'llvm-config --libs' always enumerates the libraries instead >> of providing the shared library on such systems. >> >> $ cat /etc/redhat-release >> Fedora release 18 (Spherical Cow) >> $ llvm-config --libs >> -lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive >> -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize >> -lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc >> -lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen >> -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter >> -lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen >> -lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter >> -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen >> -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter >> -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo >> -lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo >> -lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen >> -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter >> -lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen >> -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen >> -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter >> -lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser >> -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info >> -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen >> -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info >> -lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser >> -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine >> -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT >> -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC >> -lLLVMObject -lLLVMCore -lLLVMSupport >> $ ls `llvm-config --libdir` >> BugpointPasses.so libclang.so libLLVM-3.2svn.so libLTO.so >> libprofile_rt.so LLVMgold.so > > Then Fedora 18's llvm-config is busted, as `llvm-config --libs` should return > libLLVM-3.2svn.so > I'm using upstream llvm git master, and the shared library isn't listed in llvm-config here either. ~/src/llvm$ llvm-config --libs -lLLVMR600CodeGen -lLLVMR600Desc -lLLVMR600Info -lLLVMR600AsmPrinter -lLLVMTableGen -lLLVMDebugInfo -lLLVMOption -lLLVMX86Disassembler -lLLVMX86AsmParser -lLLVMX86CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMX86Desc -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMIRReader -lLLVMAsmParser -lLLVMMCDisassembler -lLLVMMCParser -lLLVMInstrumentation -lLLVMArchive -lLLVMBitReader -lLLVMInterpreter -lLLVMipo -lLLVMVectorize -lLLVMLinker -lLLVMBitWriter -lLLVMMCJIT -lLLVMJIT -lLLVMCodeGen -lLLVMObjCARCOpts -lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore -lLLVMSupport ~/src/llvm$ which llvm-config /usr/local/bin/llvm-config ~/src/llvm$ ls /usr/local/lib/libLLVM* /usr/local/lib/libLLVM-3.4svn.so /usr/local/lib/libLLVMAnalysis.a /usr/local/lib/libLLVMArchive.a /usr/local/lib/libLLVMAsmParser.a I'm guessing that 'llvm-config --libs' is only listing static libraries --Aaron > So I believe this issue should be filled against Fedora, not worked around > here. Honestly, only shipping LLVM in a .so is already a bad idea, but > breaking llvm-config is even worse -- what the point of scripts like > llvm-config if their output can't be trusted? > > BTW, configure.ac doesn't have this hack. Does it fail the same way too? > > Jose > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] Gen6+ hardware contexts & query object improvements
Kenneth Graunke writes: > Hello! > > This patch series bumps the kernel requirement to 3.6 for Gen6+, > meaning that we actually get to rely on hardware context support. > That's a little painful, but even Debian ships 3.8 now, and this > isn't going to make it into an actual release for several more > months. > > It then splits our query code into Gen4-5 and Gen6+ versions. The new > Gen6+ version is a lot simpler since hardware contexts guarantee that > our statistics registers don't get polluted with data from other > programs running on the system. It should be more efficient, which > may help games like Minecraft (though I haven't measured). > > Finally, it implements the GL_PRIMITIVES_GENERATED and > GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries via hardware counters > rather than manually counting on the CPU. This paves the way for > geometry shader support (which can output multiple primitives, breaking > our CPU-side tracking), and should allow us to enable hardware primitive > restart in a few more cases once a few more things are tidied. > > The next step is to eliminate the use of SOL reset and save/restore the > transform feedback offsets directly. Then we can turn on hardware > primitive restart more aggressively and implement a few more transform > feedback extensions. Uncommented patches, or obvious changes to the commented patches are: Reviewed-by: Eric Anholt pgpBjWfUsoRLd.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 08/10] i965: Handle rasterizer discard in the clipper rather than SOL on Gen7.
Kenneth Graunke writes: > In order to implement the GL_PRIMITIVES_GENERATED query in a sane > fashion on our hardware, we can't discard primitives until the clipper. > The patch after next explains the rationale. > > By setting the clipper to REJECT_ALL mode, all primitives get thrown away, > so rendering is still appropriately disabled. > > This may negatively impact performance in the rasterizer discard case, > but it's unclear how much and this hasn't been observed to be a > bottleneck in any application we've looked at. The clipper is the very > next stage in the pipeline, so I don't think it will be terrible. I'm tempted to drop a perf_debug() in this and the next patch to remind us, if some important app starts doing discard, that we could use the previous rasterizer discard support outside of a GL_PRIMITIVES_GENERATED query. pgpm9IL5zea2x.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 03/10] i965: Delete Gen7+ check for Kernel 3.3 now that we require 3.6+.
Kenneth Graunke writes: > It's just not necessary. I'd squash this with the previous, and lower-case "Kernel" -- it seems to be the convention when it's not part of some other proper noun. pgpObc4KqS0OC.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 07/10] i965: Disable clipper statistics when meta operations are in progress.
Kenneth Graunke writes: > We don't currently use the clipper statistics, but we'll soon use > CL_INVOCATIONS_COUNT to implement the GL_PRIMITIVES_GENERATED query. > The number of primitives generated is not supposed to be altered during > operations such as glGenerateMipmap. > > Prevents spec/EXT_transform_feedback/generatemipmap prims_generated > from breaking when we start using pipeline statistics registers to > implement the GL_PRIMITIVES_GENERATED query in a few commits. Missing state flagging -- I guess we should check for new _mesa_meta_in_progress() at the start of brw_state_upload, and flag a BRW_NEW_META_IN_PROGRESS for it. pgpzsXQ4J5i8d.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.
Eric Anholt writes: > Kenneth Graunke writes: > >> do_vec_index_to_swizzle() should remove any vector extract operations >> with a constant index. It's unconditionally called from >> do_common_optimization(). >> >> do_vec_index_to_cond_assign() should remove the rest, and it is >> unconditionally called from brw_link_shader(). This means that we >> should never see ir_binop_vector_extract in the backend. >> >> Silences compiler warnings. > > I found warnings also in brw_fs_visitor.cpp for the same, plus > vector_insert, and was about to send that patch out. I'm confused how > you aren't seeing those. Err, didn't see patch 2. Still, brw_fs_visitor.cpp. pgpb66QzZhLT7.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] A simple GLES2 shader application performance slower with h/w renderer
> vblank_mode was broken for a long time in EGL, but current 9.0 and 9.1 > have it fixed. Not sure what version you're on. I am using version 8.0.5. I have been unable to build the 9.1 for softpipe renderer using the same options that I was using for building 8.0.5. I have posted separately on the same mailing list for the same but I have not received any response so far. It would be great if you could suggest something. Let me know and I can forward you the thread where I had posted about this issue. Thanks for pointing out that it has been fixed in latest revision. Regards, Divick ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.
Kenneth Graunke writes: > do_vec_index_to_swizzle() should remove any vector extract operations > with a constant index. It's unconditionally called from > do_common_optimization(). > > do_vec_index_to_cond_assign() should remove the rest, and it is > unconditionally called from brw_link_shader(). This means that we > should never see ir_binop_vector_extract in the backend. > > Silences compiler warnings. I found warnings also in brw_fs_visitor.cpp for the same, plus vector_insert, and was about to send that patch out. I'm confused how you aren't seeing those. pgpnmhn9FSfoy.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.
- Original Message - > On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca wrote: > > Vinson, > > > > Why is this necessary? > > > > (I'd prefer that LLVM is statically linked by default. ) > > > > Jose > > The SCons build fails on systems that only provide a LLVM shared > library. 'llvm-config --libs' always enumerates the libraries instead > of providing the shared library on such systems. > > $ cat /etc/redhat-release > Fedora release 18 (Spherical Cow) > $ llvm-config --libs > -lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive > -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize > -lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc > -lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen > -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter > -lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen > -lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter > -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen > -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter > -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo > -lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo > -lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen > -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter > -lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen > -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen > -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter > -lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser > -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info > -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen > -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info > -lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser > -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine > -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT > -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC > -lLLVMObject -lLLVMCore -lLLVMSupport > $ ls `llvm-config --libdir` > BugpointPasses.so libclang.so libLLVM-3.2svn.so libLTO.so > libprofile_rt.so LLVMgold.so Then Fedora 18's llvm-config is busted, as `llvm-config --libs` should return libLLVM-3.2svn.so So I believe this issue should be filled against Fedora, not worked around here. Honestly, only shipping LLVM in a .so is already a bad idea, but breaking llvm-config is even worse -- what the point of scripts like llvm-config if their output can't be trusted? BTW, configure.ac doesn't have this hack. Does it fail the same way too? Jose ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.
On Fri, May 17, 2013 at 7:44 AM, Jose Fonseca wrote: > Vinson, > > Why is this necessary? > > (I'd prefer that LLVM is statically linked by default. ) > > Jose The SCons build fails on systems that only provide a LLVM shared library. 'llvm-config --libs' always enumerates the libraries instead of providing the shared library on such systems. $ cat /etc/redhat-release Fedora release 18 (Spherical Cow) $ llvm-config --libs -lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker -lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo -lLLVMVectorize -lLLVMBitWriter -lLLVMTableGen -lLLVMHexagonCodeGen -lLLVMHexagonDesc -lLLVMHexagonAsmPrinter -lLLVMHexagonInfo -lLLVMNVPTXCodeGen -lLLVMNVPTXDesc -lLLVMNVPTXInfo -lLLVMNVPTXAsmPrinter -lLLVMMBlazeDisassembler -lLLVMMBlazeAsmParser -lLLVMMBlazeCodeGen -lLLVMMBlazeDesc -lLLVMMBlazeInfo -lLLVMMBlazeAsmPrinter -lLLVMCppBackendCodeGen -lLLVMCppBackendInfo -lLLVMMSP430CodeGen -lLLVMMSP430Desc -lLLVMMSP430Info -lLLVMMSP430AsmPrinter -lLLVMXCoreCodeGen -lLLVMXCoreDesc -lLLVMXCoreInfo -lLLVMCellSPUCodeGen -lLLVMCellSPUDesc -lLLVMCellSPUInfo -lLLVMMipsDisassembler -lLLVMMipsAsmParser -lLLVMMipsCodeGen -lLLVMMipsDesc -lLLVMMipsInfo -lLLVMMipsAsmPrinter -lLLVMARMDisassembler -lLLVMARMAsmParser -lLLVMARMCodeGen -lLLVMARMDesc -lLLVMARMInfo -lLLVMARMAsmPrinter -lLLVMPowerPCCodeGen -lLLVMPowerPCDesc -lLLVMPowerPCInfo -lLLVMPowerPCAsmPrinter -lLLVMSparcCodeGen -lLLVMSparcDesc -lLLVMSparcInfo -lLLVMX86AsmParser -lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMX86Desc -lLLVMX86Info -lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMR600CodeGen -lLLVMSelectionDAG -lLLVMAsmPrinter -lLLVMR600Desc -lLLVMR600Info -lLLVMR600AsmPrinter -lLLVMMCDisassembler -lLLVMMCParser -lLLVMInterpreter -lLLVMCodeGen -lLLVMScalarOpts -lLLVMInstCombine -lLLVMTransformUtils -lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld -lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore -lLLVMSupport $ ls `llvm-config --libdir` BugpointPasses.so libclang.so libLLVM-3.2svn.so libLTO.so libprofile_rt.so LLVMgold.so > > - Original Message - >> This patch fixes SCons builds on Fedora 18. >> >> Signed-off-by: Vinson Lee >> --- >> scons/llvm.py | 10 +- >> 1 file changed, 9 insertions(+), 1 deletion(-) >> >> diff --git a/scons/llvm.py b/scons/llvm.py >> index 7cd609c..432ece6 100644 >> --- a/scons/llvm.py >> +++ b/scons/llvm.py >> @@ -198,7 +198,15 @@ def generate(env): >> if llvm_version >= distutils.version.LooseVersion('3.2'): >> env.Append(CXXFLAGS = ('-fno-rtti',)) >> >> -env.ParseConfig('llvm-config --libs ' + ' '.join(components)) >> +llvm_shared_library = os.path.join( >> +env.backtick('llvm-config --libdir').strip(), >> +'libLLVM-%s%s' % (llvm_version, env['SHLIBSUFFIX']) >> +) >> +if os.path.exists(llvm_shared_library): >> +env.Append(LIBS = ['LLVM-%s' % llvm_version]) >> +else: >> +env.ParseConfig('llvm-config --libs ' + ' >> '.join(components)) >> + >> env.ParseConfig('llvm-config --ldflags') >> except OSError: >> print 'scons: llvm-config version %s failed' % llvm_version >> -- >> 1.8.2.1 >> >> ___ >> mesa-dev mailing list >> mesa-dev@lists.freedesktop.org >> http://lists.freedesktop.org/mailman/listinfo/mesa-dev >> ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] A simple GLES2 shader application performance slower with h/w renderer
Divick Kishore writes: >>> By default we sync to vblank, which for you is 60. The software >>> rasterizer lacks this feature. > > I meant that even with h/w rasterizer I get fps = 60 with vblank=0 set. > > >> The weird thing is that he said he ran it with vblank_mode=0. Makes me >> wonder if we have a bug in our handling of that with EGL still. > > I can attach the sample application along if it helps to reproduce the issue. vblank_mode was broken for a long time in EGL, but current 9.0 and 9.1 have it fixed. Not sure what version you're on. pgpQiKeBNnTPX.pgp Description: PGP signature ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.
On Fri, May 17, 2013 at 10:43 AM, Kenneth Graunke wrote: > do_vec_index_to_swizzle() should remove any vector extract operations > with a constant index. It's unconditionally called from > do_common_optimization(). > > do_vec_index_to_cond_assign() should remove the rest, and it is > unconditionally called from brw_link_shader(). This means that we > should never see ir_binop_vector_extract in the backend. > > Silences compiler warnings. > > Cc: Ian Romanick > Cc: Paul Berry > Signed-off-by: Kenneth Graunke > --- > src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 + > src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 > 2 files changed, 5 insertions(+) > > diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp > b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp > index 0f3d4ab..ea714ec 100644 > --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp > +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp > @@ -402,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment > *ir) > case ir_unop_unpack_unorm_2x16: > case ir_unop_unpack_unorm_4x8: > case ir_unop_unpack_half_2x16: > + case ir_binop_vector_extract: > case ir_quadop_bitfield_insert: > case ir_quadop_vector: >assert(!"should have been lowered"); > diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > index f14529a..f7dd333 100644 > --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp > @@ -1671,6 +1671,10 @@ vec4_visitor::visit(ir_expression *ir) >break; > } > > + case ir_binop_vector_extract: > + assert(!"should have been lowered by vec_index_to_cond_assign"); > + break; > + > case ir_triop_lrp: >op[0] = fix_3src_operand(op[0]); >op[1] = fix_3src_operand(op[1]); > -- > 1.8.2.3 Both are Reviewed-by: Matt Turner ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] i965: Add cases for ir_binop_vector_extract that assert.
do_vec_index_to_swizzle() should remove any vector extract operations with a constant index. It's unconditionally called from do_common_optimization(). do_vec_index_to_cond_assign() should remove the rest, and it is unconditionally called from brw_link_shader(). This means that we should never see ir_binop_vector_extract in the backend. Silences compiler warnings. Cc: Ian Romanick Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 2 files changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 0f3d4ab..ea714ec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -402,6 +402,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_unpack_unorm_2x16: case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: + case ir_binop_vector_extract: case ir_quadop_bitfield_insert: case ir_quadop_vector: assert(!"should have been lowered"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f14529a..f7dd333 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1671,6 +1671,10 @@ vec4_visitor::visit(ir_expression *ir) break; } + case ir_binop_vector_extract: + assert(!"should have been lowered by vec_index_to_cond_assign"); + break; + case ir_triop_lrp: op[0] = fix_3src_operand(op[0]); op[1] = fix_3src_operand(op[1]); -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] i965: Add cases for ir_triop_vector_insert that assert.
brw_link_shader() unconditionally calls lower_vector_insert() with true as the second parameter. This means that both constant and variable indexed expressions will get lowered, so we should never see this in the backend. Cc: Ian Romanick Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp | 1 + src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 2 files changed, 5 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index ea714ec..4afae24 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -403,6 +403,7 @@ ir_channel_expressions_visitor::visit_leave(ir_assignment *ir) case ir_unop_unpack_unorm_4x8: case ir_unop_unpack_half_2x16: case ir_binop_vector_extract: + case ir_triop_vector_insert: case ir_quadop_bitfield_insert: case ir_quadop_vector: assert(!"should have been lowered"); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index f7dd333..b2f8f00 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -1702,6 +1702,10 @@ vec4_visitor::visit(ir_expression *ir) emit(BFE(result_dst, op[2], op[1], op[0])); break; + case ir_triop_vector_insert: + assert(!"should have been lowered by lower_vector_insert"); + break; + case ir_quadop_bitfield_insert: assert(!"not reached: should be handled by " "bitfield_insert_to_bfm_bfi\n"); -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] mesa: declare UniformBufferBindings as an array with a static size
Some Gallium drivers were crashing, because the array was not large enough. v2: clamp the per-shader maximum in st/mesa, then sum them all up NOTE: This is a candidate for the stable branches. --- src/mesa/main/bufferobj.c | 10 ++ src/mesa/main/config.h |3 +++ src/mesa/main/mtypes.h |3 ++- src/mesa/state_tracker/st_extensions.c |7 ++- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/mesa/main/bufferobj.c b/src/mesa/main/bufferobj.c index 1566cb4..ffb67b9 100644 --- a/src/mesa/main/bufferobj.c +++ b/src/mesa/main/bufferobj.c @@ -619,13 +619,10 @@ _mesa_init_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->CopyWriteBuffer, ctx->Shared->NullBufferObj); - ctx->UniformBufferBindings = calloc(ctx->Const.MaxUniformBufferBindings, - sizeof(*ctx->UniformBufferBindings)); - _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, ctx->Shared->NullBufferObj); - for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) { + for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->UniformBufferBindings[i].BufferObject, ctx->Shared->NullBufferObj); @@ -647,14 +644,11 @@ _mesa_free_buffer_objects( struct gl_context *ctx ) _mesa_reference_buffer_object(ctx, &ctx->UniformBuffer, NULL); - for (i = 0; i < ctx->Const.MaxUniformBufferBindings; i++) { + for (i = 0; i < MAX_COMBINED_UNIFORM_BUFFERS; i++) { _mesa_reference_buffer_object(ctx, &ctx->UniformBufferBindings[i].BufferObject, NULL); } - - free(ctx->UniformBufferBindings); - ctx->UniformBufferBindings = NULL; } static bool diff --git a/src/mesa/main/config.h b/src/mesa/main/config.h index ea87b75..4a4fdc9 100644 --- a/src/mesa/main/config.h +++ b/src/mesa/main/config.h @@ -168,6 +168,9 @@ /*@{*/ #define MAX_PROGRAM_LOCAL_PARAMS 4096 #define MAX_UNIFORMS 4096 +#define MAX_UNIFORM_BUFFERS15 /* + 1 default uniform buffer */ +/* 6 is for vertex, hull, domain, geometry, fragment, and compute shader. */ +#define MAX_COMBINED_UNIFORM_BUFFERS (MAX_UNIFORM_BUFFERS * 6) /*@}*/ /** diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 817633c..96c06ae 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -3527,7 +3527,8 @@ struct gl_context * associated with uniform blocks by glUniformBlockBinding()'s state in the * shader program. */ - struct gl_uniform_buffer_binding *UniformBufferBindings; + struct gl_uniform_buffer_binding + UniformBufferBindings[MAX_COMBINED_UNIFORM_BUFFERS]; /*@}*/ diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index 982e652..46acc87 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -197,6 +197,7 @@ void st_init_limits(struct st_context *st) screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_CONST_BUFFERS); if (pc->MaxUniformBlocks) pc->MaxUniformBlocks -= 1; /* The first one is for ordinary uniforms. */ + pc->MaxUniformBlocks = _min(pc->MaxUniformBlocks, MAX_UNIFORM_BUFFERS); pc->MaxCombinedUniformComponents = (pc->MaxUniformComponents + c->MaxUniformBlockSize / 4 * @@ -279,15 +280,11 @@ void st_init_limits(struct st_context *st) st->ctx->Extensions.ARB_uniform_buffer_object = GL_TRUE; c->UniformBufferOffsetAlignment = screen->get_param(screen, PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT); - /* FIXME: _mesa_init_buffer_objects() already has been, and - * ctx->UniformBufferBindings allocated, so unfortunately we can't just - * change MaxUniformBufferBindings a posteriori. */ -#if 0 c->MaxCombinedUniformBlocks = c->MaxUniformBufferBindings = c->VertexProgram.MaxUniformBlocks + c->GeometryProgram.MaxUniformBlocks + c->FragmentProgram.MaxUniformBlocks; -#endif + assert(c->MaxCombinedUniformBlocks <= MAX_COMBINED_UNIFORM_BUFFERS); } } -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 10/10] i965: Implement transform feedback query support in hardware on Gen6+.
Now that we have hardware contexts and can use MI_STORE_REGISTER_MEM, we can use the GPU's pipeline statistics counters rather than going out of our way to count primitives in software. Aside from being simpler, this also paves the way for Geometry Shaders, which can output an arbitrary number of primitives on the GPU. The GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN query is easy: it corresponds to the SO_NUM_PRIMS_WRITTEN/SO_NUM_PRIMS_WRITTEN0_IVB counters. The GL_PRIMITIVES_GENERATED query is trickier. Gen provides several statistics registers which /almost/ match the semantics required: - IA_PRIMITIVES_COUNT The number of primitives fetched by the VF or IA (input assembler). This undercounts when GS is enabled, as it can output many primitives. - GS_PRIMITIVES_COUNT The number of primitives output by the GS. Unfortunately, this doesn't increment unless the GS unit is actually enabled, and it usually isn't. - SO_PRIM_STORAGE_NEEDED*_IVB The amount of space needed to write primitives output by transform feedback. These naturally only work when transform feedback is on. We'd also have to add the counters for all four streams. - CL_INVOCATION_COUNT The number of primitives processed by the clipper. This doesn't work if the GS or SOL throw away primitives for rasterizer discard. However, it does increment even if the clipper is in REJECT_ALL mode. Dynamically switching between counters would be painfully complicated, especially since GS, rasterizer discard, and transform feedback can all be switched on and off repeatedly during a single query. The most usable counter is CL_INVOCATION_COUNT. The previous two patches reworked rasterizer discard support so that all primitives hit the clipper, making this work. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_queryobj.c | 105 +++--- 1 file changed, 66 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index 28af8d7..a032227 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -94,6 +94,57 @@ write_depth_count(struct intel_context *intel, drm_intel_bo *query_bo, int idx) ADVANCE_BATCH(); } +/* + * Write an arbitrary 64-bit register to a buffer via MI_STORE_REGISTER_MEM. + * + * Only TIMESTAMP and PS_DEPTH_COUNT have special PIPE_CONTROL support; other + * counters have to be read via the generic MI_STORE_REGISTER_MEM. This + * function also performs a pipeline flush for proper synchronization. + */ +static void +write_reg(struct intel_context *intel, + drm_intel_bo *query_bo, uint32_t reg, int idx) +{ + assert(intel->gen >= 6); + + intel_batchbuffer_emit_mi_flush(intel); + + /* MI_STORE_REGISTER_MEM only stores a single 32-bit value, so to +* read a full 64-bit register, we need to do two of them. +*/ + BEGIN_BATCH(3); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg); + OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + idx * sizeof(uint64_t)); + ADVANCE_BATCH(); + + BEGIN_BATCH(3); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(reg + sizeof(uint32_t)); + OUT_RELOC(query_bo, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, + sizeof(uint32_t) + idx * sizeof(uint64_t)); + ADVANCE_BATCH(); +} + +static void +write_primitives_generated(struct intel_context *intel, + drm_intel_bo *query_bo, int idx) +{ + write_reg(intel, query_bo, CL_INVOCATION_COUNT, idx); +} + +static void +write_xfb_primitives_written(struct intel_context *intel, + drm_intel_bo *query_bo, int idx) +{ + if (intel->gen >= 7) { + write_reg(intel, query_bo, SO_NUM_PRIMS_WRITTEN0_IVB, idx); + } else { + write_reg(intel, query_bo, SO_NUM_PRIMS_WRITTEN, idx); + } +} + /** * Wait on the query object's BO and calculate the final result. */ @@ -152,21 +203,20 @@ gen6_queryobj_get_results(struct gl_context *ctx, query->Base.Result &= (1ull << 36) - 1; break; - case GL_SAMPLES_PASSED_ARB: - query->Base.Result += results[1] - results[0]; - break; - case GL_ANY_SAMPLES_PASSED: case GL_ANY_SAMPLES_PASSED_CONSERVATIVE: - query->Base.Result = results[0] != results[1]; + if (results[0] != results[1]) + query->Base.Result = true; break; + case GL_SAMPLES_PASSED_ARB: case GL_PRIMITIVES_GENERATED: case GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN: - /* We don't actually query the hardware for this value, so query->bo - * should always be NULL and execution should never reach here. + /* We need to use += rather than = here since some BLT-based operations + * may have added additional samples to our occlusion query value. + * It shouldn't matter for geometry queries, but is harmless. */ -
[Mesa-dev] [PATCH 09/10] i965: Handle rasterizer discard in the clipper rather than GS on Gen6.
This has more of a negative impact than the previous patch, as on Gen6 passing primitives through to the clipper means we actually have to make the GS thread write them to the URB. I don't see another good solution though, and rasterizer discard is not the most common of cases, so hopefully it won't be too terrible. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_gs.c | 9 + src/mesa/drivers/dri/i965/brw_gs_emit.c | 30 - src/mesa/drivers/dri/i965/gen6_clip_state.c | 6 +- 3 files changed, 6 insertions(+), 39 deletions(-) This patch prevents breakage in patch 10. See patch 10 for the rationale. diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index a432b76..f354dd9 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -214,12 +214,6 @@ static void populate_key( struct brw_context *brw, swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset]; } } - /* On Gen6, GS is also used for rasterizer discard. */ - /* BRW_NEW_RASTERIZER_DISCARD */ - if (ctx->RasterDiscard) { - key->need_gs_prog = true; - key->rasterizer_discard = true; - } } else { /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP * into simpler primitives. @@ -259,8 +253,7 @@ const struct brw_tracked_state brw_gs_prog = { .dirty = { .mesa = (_NEW_LIGHT), .brw = (BRW_NEW_PRIMITIVE | -BRW_NEW_TRANSFORM_FEEDBACK | -BRW_NEW_RASTERIZER_DISCARD), +BRW_NEW_TRANSFORM_FEEDBACK), .cache = CACHE_NEW_VS_PROG }, .emit = brw_upload_gs_prog diff --git a/src/mesa/drivers/dri/i965/brw_gs_emit.c b/src/mesa/drivers/dri/i965/brw_gs_emit.c index 87ff9f0..cbfc6aa 100644 --- a/src/mesa/drivers/dri/i965/brw_gs_emit.c +++ b/src/mesa/drivers/dri/i965/brw_gs_emit.c @@ -201,28 +201,6 @@ static void brw_gs_emit_vue(struct brw_gs_compile *c, } /** - * De-allocate the URB entry that was previously allocated to this thread - * (without writing any vertex data to it), and terminate the thread. This is - * used to implement RASTERIZER_DISCARD functionality. - */ -static void brw_gs_terminate(struct brw_gs_compile *c) -{ - struct brw_compile *p = &c->func; - brw_urb_WRITE(p, - retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), /* dest */ - 0, /* msg_reg_nr */ - c->reg.header, /* src0 */ - false, /* allocate */ - false, /* used */ - 1, /* msg_length */ - 0, /* response_length */ - true, /* eot */ - true, /* writes_complete */ - 0, /* offset */ - BRW_URB_SWIZZLE_NONE); -} - -/** * Send an FF_SYNC message to ensure that all previously spawned GS threads * have finished sending primitives down the pipeline, and to allocate a URB * entry for the first output vertex. Only needed when intel->needs_ff_sync @@ -484,14 +462,6 @@ gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, brw_gs_ff_sync(c, 1); - /* If RASTERIZER_DISCARD is enabled, we have nothing further to do, so -* release the URB that was just allocated, and terminate the thread. -*/ - if (key->rasterizer_discard) { - brw_gs_terminate(c); - return; - } - brw_gs_overwrite_header_dw2_from_r0(c); switch (num_verts) { case 1: diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 1811a3f..b5e22dc 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -77,6 +77,10 @@ upload_clip_state(struct brw_context *brw) dw2 |= GEN6_CLIP_GB_TEST; } + /* BRW_NEW_RASTERIZER_DISCARD */ + if (ctx->RasterDiscard) + dw2 |= GEN6_CLIP_MODE_REJECT_ALL; + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); @@ -94,7 +98,7 @@ upload_clip_state(struct brw_context *brw) const struct brw_tracked_state gen6_clip_state = { .dirty = { .mesa = _NEW_TRANSFORM | _NEW_LIGHT | _NEW_BUFFERS, - .brw = (BRW_NEW_CONTEXT), + .brw = BRW_NEW_CONTEXT | BRW_NEW_RASTERIZER_DISCARD, .cache = CACHE_NEW_WM_PROG }, .emit = upload_clip_state, -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 08/10] i965: Handle rasterizer discard in the clipper rather than SOL on Gen7.
In order to implement the GL_PRIMITIVES_GENERATED query in a sane fashion on our hardware, we can't discard primitives until the clipper. The patch after next explains the rationale. By setting the clipper to REJECT_ALL mode, all primitives get thrown away, so rendering is still appropriately disabled. This may negatively impact performance in the rasterizer discard case, but it's unclear how much and this hasn't been observed to be a bottleneck in any application we've looked at. The clipper is the very next stage in the pipeline, so I don't think it will be terrible. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen7_clip_state.c | 6 +- src/mesa/drivers/dri/i965/gen7_sol_state.c | 7 +-- 2 files changed, 6 insertions(+), 7 deletions(-) This patch prevents breakage in patch 10. See patch 10 for the rationale. diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 2aa8c7f..68c08a3 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -102,6 +102,10 @@ upload_clip_state(struct brw_context *brw) dw2 |= GEN6_CLIP_GB_TEST; } + /* BRW_NEW_RASTERIZER_DISCARD */ + if (ctx->RasterDiscard) + dw2 |= GEN6_CLIP_MODE_REJECT_ALL; + BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); OUT_BATCH(dw1); @@ -122,7 +126,7 @@ const struct brw_tracked_state gen7_clip_state = { _NEW_POLYGON | _NEW_LIGHT | _NEW_TRANSFORM), - .brw = BRW_NEW_CONTEXT, + .brw = BRW_NEW_CONTEXT | BRW_NEW_RASTERIZER_DISCARD, .cache = CACHE_NEW_WM_PROG }, .emit = upload_clip_state, diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index f570336..1e484dc 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -195,10 +195,6 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, uint32_t dw1 = 0, dw2 = 0; int i; - /* BRW_NEW_RASTERIZER_DISCARD */ - if (ctx->RasterDiscard) - dw1 |= SO_RENDERING_DISABLE; - if (active) { int urb_entry_read_offset = 0; int urb_entry_read_length = (vue_map->num_slots + 1) / 2 - @@ -267,8 +263,7 @@ const struct brw_tracked_state gen7_sol_state = { .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM | BRW_NEW_VUE_MAP_GEOM_OUT | -BRW_NEW_TRANSFORM_FEEDBACK | -BRW_NEW_RASTERIZER_DISCARD) +BRW_NEW_TRANSFORM_FEEDBACK) }, .emit = upload_sol_state, }; -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 07/10] i965: Disable clipper statistics when meta operations are in progress.
We don't currently use the clipper statistics, but we'll soon use CL_INVOCATIONS_COUNT to implement the GL_PRIMITIVES_GENERATED query. The number of primitives generated is not supposed to be altered during operations such as glGenerateMipmap. Prevents spec/EXT_transform_feedback/generatemipmap prims_generated from breaking when we start using pipeline statistics registers to implement the GL_PRIMITIVES_GENERATED query in a few commits. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_clip_state.c | 4 +++- src/mesa/drivers/dri/i965/gen7_clip_state.c | 6 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_clip_state.c b/src/mesa/drivers/dri/i965/gen6_clip_state.c index 51ffbae..1811a3f 100644 --- a/src/mesa/drivers/dri/i965/gen6_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen6_clip_state.c @@ -30,6 +30,7 @@ #include "brw_defines.h" #include "brw_util.h" #include "intel_batchbuffer.h" +#include "drivers/common/meta.h" #include "main/fbobject.h" static void @@ -37,6 +38,7 @@ upload_clip_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; + uint32_t dw1 = _mesa_meta_in_progress(ctx) ? 0 : GEN6_CLIP_STATISTICS_ENABLE; uint32_t dw2 = 0; /* _NEW_BUFFERS */ @@ -77,7 +79,7 @@ upload_clip_state(struct brw_context *brw) BEGIN_BATCH(4); OUT_BATCH(_3DSTATE_CLIP << 16 | (4 - 2)); - OUT_BATCH(GEN6_CLIP_STATISTICS_ENABLE); + OUT_BATCH(dw1); OUT_BATCH(GEN6_CLIP_ENABLE | GEN6_CLIP_API_OGL | GEN6_CLIP_MODE_NORMAL | diff --git a/src/mesa/drivers/dri/i965/gen7_clip_state.c b/src/mesa/drivers/dri/i965/gen7_clip_state.c index 29a5ed5..2aa8c7f 100644 --- a/src/mesa/drivers/dri/i965/gen7_clip_state.c +++ b/src/mesa/drivers/dri/i965/gen7_clip_state.c @@ -26,6 +26,7 @@ #include "brw_defines.h" #include "brw_util.h" #include "intel_batchbuffer.h" +#include "drivers/common/meta.h" #include "main/fbobject.h" static void @@ -33,12 +34,15 @@ upload_clip_state(struct brw_context *brw) { struct intel_context *intel = &brw->intel; struct gl_context *ctx = &intel->ctx; - uint32_t dw1 = GEN6_CLIP_STATISTICS_ENABLE, dw2 = 0; + uint32_t dw1 = 0, dw2 = 0; /* _NEW_BUFFERS */ struct gl_framebuffer *fb = ctx->DrawBuffer; bool render_to_fbo = _mesa_is_user_fbo(fb); + if (!_mesa_meta_in_progress(ctx)) + dw1 |= GEN6_CLIP_STATISTICS_ENABLE; + /* CACHE_NEW_WM_PROG */ if (brw->wm.prog_data->barycentric_interp_modes & BRW_WM_NONPERSPECTIVE_BARYCENTRIC_BITS) { -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 06/10] i965: Add #defines for the pipeline statistics counter registers.
These come from the Ivybridge PRM, Volume 1, Part 3. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_reg.h | 13 + 1 file changed, 13 insertions(+) diff --git a/src/mesa/drivers/dri/intel/intel_reg.h b/src/mesa/drivers/dri/intel/intel_reg.h index acbbcfb..1cfb464 100644 --- a/src/mesa/drivers/dri/intel/intel_reg.h +++ b/src/mesa/drivers/dri/intel/intel_reg.h @@ -266,6 +266,19 @@ #define FENCE_XMAJOR 1 #define FENCE_YMAJOR 2 +/* Pipeline Statistics Counter Registers */ +#define IA_VERTICES_COUNT 0x2310 +#define IA_PRIMITIVES_COUNT 0x2318 +#define VS_INVOCATION_COUNT 0x2320 +#define HS_INVOCATION_COUNT 0x2300 +#define DS_INVOCATION_COUNT 0x2308 +#define GS_INVOCATION_COUNT 0x2328 +#define GS_PRIMITIVES_COUNT 0x2330 +#define CL_INVOCATION_COUNT 0x2338 +#define CL_PRIMITIVES_COUNT 0x2340 +#define PS_INVOCATION_COUNT 0x2348 +#define PS_DEPTH_COUNT 0x2350 + #define SO_NUM_PRIM_STORAGE_NEEDED 0x2280 #define SO_PRIM_STORAGE_NEEDED0_IVB0x5240 #define SO_PRIM_STORAGE_NEEDED1_IVB0x5248 -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 05/10] i965: Rely on hardware contexts for query objects on Gen6+.
Hardware contexts greatly simplify the query object code. The pipeline statistics counters get saved and restored with the context, which means that we don't need to worry about other workloads polluting them. This means that we can simply write a single pair of values (one at BeginQuery and one at EndQuery) rather than a series of pairs. This also means we don't need to worry about the BO getting full. We also don't need to delay BO allocation and starting snapshot until the first draw. The generation split here is a little off: technically, Ironlake can also support hardware contexts. However, the kernel currently doesn't, and even if it were to do so someday, we'd need to wait a while before bumping the kernel requirement to take advantage of it. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 + src/mesa/drivers/dri/i965/brw_context.c| 2 + src/mesa/drivers/dri/i965/brw_context.h| 3 + src/mesa/drivers/dri/i965/brw_queryobj.c | 83 ++- src/mesa/drivers/dri/i965/gen6_queryobj.c | 354 + 5 files changed, 383 insertions(+), 60 deletions(-) create mode 100644 src/mesa/drivers/dri/i965/gen6_queryobj.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index a0ffd3a..d67a5a4 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -104,6 +104,7 @@ i965_FILES = \ gen6_depthstencil.c \ gen6_gs_state.c \ gen6_multisample_state.c \ + gen6_queryobj.c \ gen6_sampler_state.c \ gen6_scissor_state.c \ gen6_sf_state.c \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 2f5fedb..beade5c 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -88,6 +88,8 @@ static void brwInitDriverFunctions(struct intel_screen *screen, brwInitFragProgFuncs( functions ); brw_init_queryobj_functions(functions); + if (screen->gen >= 6) + gen6_reinit_queryobj_functions(functions); functions->QuerySamplesForFormat = brw_query_samples_for_format; functions->BeginTransformFeedback = brw_begin_transform_feedback; diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 9baf57b..9ef6aca 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -1164,6 +1164,9 @@ void brw_init_queryobj_functions(struct dd_function_table *functions); void brw_emit_query_begin(struct brw_context *brw); void brw_emit_query_end(struct brw_context *brw); +/** gen6_queryobj.c */ +void gen6_reinit_queryobj_functions(struct dd_function_table *functions); + /*== * brw_state_dump.c */ diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 40f926b..1c1e0b4 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -94,40 +94,21 @@ write_timestamp(struct intel_context *intel, drm_intel_bo *query_bo, int idx) static void write_depth_count(struct intel_context *intel, drm_intel_bo *query_bo, int idx) { - if (intel->gen >= 6) { - /* Emit Sandybridge workaround flush: */ - if (intel->gen == 6) - intel_emit_post_sync_nonzero_flush(intel); - - BEGIN_BATCH(5); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2)); - OUT_BATCH(PIPE_CONTROL_DEPTH_STALL | -PIPE_CONTROL_WRITE_DEPTH_COUNT); - OUT_RELOC(query_bo, -I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, -PIPE_CONTROL_GLOBAL_GTT_WRITE | -(idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } else { - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | -PIPE_CONTROL_DEPTH_STALL | -PIPE_CONTROL_WRITE_DEPTH_COUNT); - /* This object could be mapped cacheable, but we don't have an exposed - * mechanism to support that. Since it's going uncached, tell GEM that - * we're writing to it. The usual clflush should be all that's required - * to pick up the results. - */ - OUT_RELOC(query_bo, -I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, -PIPE_CONTROL_GLOBAL_GTT_WRITE | -(idx * sizeof(uint64_t))); - OUT_BATCH(0); - OUT_BATCH(0); - ADVANCE_BATCH(); - } + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2) | + PIPE_CONTROL_DEPTH_STALL | PIPE_CONTROL_WRITE_DEPTH_COUNT); + /* This object could be mapped cacheable, but we don't have an exposed +* mechanism to support that. Since it's going uncached, tell GEM that +* we're writing to it. The
[Mesa-dev] [PATCH 04/10] i965: Disable pixel statistics in BLORP.
BLORP is used for operations like glClear, glCopyTexImage, and glBlitFramebuffer which aren't supposed to contribute fragments toward occlusion queries. This prevents Piglit tests from breaking in the next commit. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_blorp.cpp | 1 - src/mesa/drivers/dri/i965/gen7_blorp.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index 0ed5bee..c7bb815 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -750,7 +750,6 @@ gen6_blorp_emit_wm_config(struct brw_context *brw, assert(0); break; } - dw4 |= GEN6_WM_STATISTICS_ENABLE; dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0; dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5; dw5 |= (brw->max_wm_threads - 1) << GEN6_WM_MAX_THREADS_SHIFT; diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index f55805c..f83c7f2 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -516,7 +516,6 @@ gen7_blorp_emit_wm_config(struct brw_context *brw, assert(0); break; } - dw1 |= GEN7_WM_STATISTICS_ENABLE; dw1 |= GEN7_WM_LINE_AA_WIDTH_1_0; dw1 |= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5; dw1 |= 0 << GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT; /* No interp */ -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 02/10] i965: Require hardware contexts (and thus Kernel 3.6) on Gen6+.
Hardware contexts are necessary to reasonably support OpenGL 3.2. In particular, we currently maintain software counters for transform feedback buffer offsets and counters, which relies on knowing the number of primitives generated. Geometry shaders violate that assumption. At the time of writing, Debian has moved to Kernel 3.8, which means most people probably have a newer kernel by now. It's also worth noting that this patch won't land until Mesa 10 which is currently targeted for September. By that point, even more people will have a newer kernel. This patch leaves the code for flagging BRW_NEW_CONTEXT on new batchbuffers if hw_ctx == NULL since that still occurs pre-Gen6. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 69b7e4d..2f5fedb 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -143,6 +143,23 @@ brwCreateContext(int api, return false; } + if (intel->gen >= 6) { + /* Create a new hardware context. Using a hardware context means that + * our GPU state will be saved/restored on context switch, allowing us + * to assume that the GPU is in the same state we left it in. + * + * This is required for transform feedback buffer offsets, query objects, + * and also allows us to reduce how much state we have to emit. + */ + intel->hw_ctx = drm_intel_gem_context_create(intel->bufmgr); + + if (!intel->hw_ctx) { + fprintf(stderr, "Gen6+ requires Kernel 3.6 or later.\n"); + ralloc_free(brw); + return false; + } + } + brw_init_surface_formats(brw); /* Initialize swrast, tnl driver tables: */ @@ -374,7 +391,6 @@ brwCreateContext(int api, brw->prim_restart.in_progress = false; brw->prim_restart.enable_cut_index = false; - intel->hw_ctx = drm_intel_gem_context_create(intel->bufmgr); brw_init_state( brw ); -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 03/10] i965: Delete Gen7+ check for Kernel 3.3 now that we require 3.6+.
It's just not necessary. Cc: Eric Anholt Cc: Paul Berry Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_screen.c | 7 --- 1 file changed, 7 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index f3dc908..e058c7e 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -1300,13 +1300,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->gen = 2; } - if (intelScreen->gen == 7 && - !intel_get_boolean(intelScreen->driScrnPriv, - I915_PARAM_HAS_GEN7_SOL_RESET)) { - fprintf(stderr, "i965 requires Kernel 3.3 or later.\n"); - return false; - } - intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6; intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7; -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 01/10] i965: Bump kernel requirement to 3.3 on Ivybridge.
Kernel 3.3 introduced the SOL reset execbuf parameter, needed for GL 3.0 on Ivybridge. Bumping the requirement will give an obvious error message rather than simply reporting GL 2.1. Signed-off-by: Kenneth Graunke --- src/mesa/drivers/dri/intel/intel_extensions.c | 5 + src/mesa/drivers/dri/intel/intel_screen.c | 26 +++--- src/mesa/drivers/dri/intel/intel_screen.h | 2 -- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/mesa/drivers/dri/intel/intel_extensions.c b/src/mesa/drivers/dri/intel/intel_extensions.c index 8d8e325..5cb2fa3 100644 --- a/src/mesa/drivers/dri/intel/intel_extensions.c +++ b/src/mesa/drivers/dri/intel/intel_extensions.c @@ -91,12 +91,9 @@ intelInitExtensions(struct gl_context *ctx) ctx->Const.GLSLVersion = 120; _mesa_override_glsl_version(ctx); - if (intel->gen == 6 || - (intel->gen == 7 && intel->intelScreen->kernel_has_gen7_sol_reset)) - ctx->Extensions.EXT_transform_feedback = true; - if (intel->gen >= 6) { ctx->Extensions.EXT_framebuffer_multisample = true; + ctx->Extensions.EXT_transform_feedback = true; ctx->Extensions.ARB_blend_func_extended = !driQueryOptionb(&intel->optionCache, "disable_blend_func_extended"); ctx->Extensions.ARB_draw_buffers_blend = true; ctx->Extensions.ARB_ES3_compatibility = true; diff --git a/src/mesa/drivers/dri/intel/intel_screen.c b/src/mesa/drivers/dri/intel/intel_screen.c index ad1b351..f3dc908 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.c +++ b/src/mesa/drivers/dri/intel/intel_screen.c @@ -1195,17 +1195,10 @@ set_max_gl_versions(struct intel_screen *screen) switch (screen->gen) { case 7: - if (screen->kernel_has_gen7_sol_reset) { - screen->max_gl_core_version = 31; - screen->max_gl_compat_version = 30; - screen->max_gl_es1_version = 11; - screen->max_gl_es2_version = 30; - } else { - screen->max_gl_core_version = 0; - screen->max_gl_compat_version = 21; - screen->max_gl_es1_version = 11; - screen->max_gl_es2_version = 20; - } + screen->max_gl_core_version = 31; + screen->max_gl_compat_version = 30; + screen->max_gl_es1_version = 11; + screen->max_gl_es2_version = 30; break; case 6: screen->max_gl_core_version = 31; @@ -1293,10 +1286,6 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->deviceID = drm_intel_bufmgr_gem_get_devid(intelScreen->bufmgr); - intelScreen->kernel_has_gen7_sol_reset = - intel_get_boolean(intelScreen->driScrnPriv, - I915_PARAM_HAS_GEN7_SOL_RESET); - if (IS_GEN7(intelScreen->deviceID)) { intelScreen->gen = 7; } else if (IS_GEN6(intelScreen->deviceID)) { @@ -1311,6 +1300,13 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp) intelScreen->gen = 2; } + if (intelScreen->gen == 7 && + !intel_get_boolean(intelScreen->driScrnPriv, + I915_PARAM_HAS_GEN7_SOL_RESET)) { + fprintf(stderr, "i965 requires Kernel 3.3 or later.\n"); + return false; + } + intelScreen->hw_has_separate_stencil = intelScreen->gen >= 6; intelScreen->hw_must_use_separate_stencil = intelScreen->gen >= 7; diff --git a/src/mesa/drivers/dri/intel/intel_screen.h b/src/mesa/drivers/dri/intel/intel_screen.h index 7da9895..4833937 100644 --- a/src/mesa/drivers/dri/intel/intel_screen.h +++ b/src/mesa/drivers/dri/intel/intel_screen.h @@ -60,8 +60,6 @@ struct intel_screen bool hw_has_separate_stencil; bool hw_must_use_separate_stencil; - bool kernel_has_gen7_sol_reset; - bool hw_has_llc; bool hw_has_swizzling; -- 1.8.2.3 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] Gen6+ hardware contexts & query object improvements
Hello! This patch series bumps the kernel requirement to 3.6 for Gen6+, meaning that we actually get to rely on hardware context support. That's a little painful, but even Debian ships 3.8 now, and this isn't going to make it into an actual release for several more months. It then splits our query code into Gen4-5 and Gen6+ versions. The new Gen6+ version is a lot simpler since hardware contexts guarantee that our statistics registers don't get polluted with data from other programs running on the system. It should be more efficient, which may help games like Minecraft (though I haven't measured). Finally, it implements the GL_PRIMITIVES_GENERATED and GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN queries via hardware counters rather than manually counting on the CPU. This paves the way for geometry shader support (which can output multiple primitives, breaking our CPU-side tracking), and should allow us to enable hardware primitive restart in a few more cases once a few more things are tidied. The next step is to eliminate the use of SOL reset and save/restore the transform feedback offsets directly. Then we can turn on hardware primitive restart more aggressively and implement a few more transform feedback extensions. Thanks in advance for the review. --Ken ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] gallivm: Eliminate 8.8 fixed point intermediates from AoS sampling path.
From: José Fonseca This change was meant as a stepping stone to use PMADDUBSW SSSE3 instruction, but actually this refactoring by itself yields a 10% speedup on texture intensive shaders (e.g, Google Earth's ocean water w/o S3TC on a Ivy Bridge machine), while giving yielding exactly the same results, whereas PMADDUBSW only gave an extra 5%, at the expense of 2bits of precision in the interpolation. I belive that the speedup of this change comes from the reduced register pressure (as 8.8 fixed point intermediates take twice the space of 8bit unorm). Also, not dealing with 8.8 simplifies lp_bld_sample_aos.c code substantially -- it's no longer necessary to have code duplicated for low and high register halfs. Note about lp_build_sample_mipmap(): the path for num_quads > 1 is never executed (as it is faster on AVX to split the 256bit wide texture computation into two 128bit chunks, in order to leverage integer opcodes). This path might be useful in the future, so in order to verify mu change did not break that path, this I had to apply this change: @@ -1662,11 +1662,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm, /* * we only try 8-wide sampling with soa as it appears to * be a loss with aos with AVX (but it should work). * (It should be faster if we'd support avx2) */ - if (num_quads == 1 || !use_aos) { + if (/* num_quads == 1 || ! */ use_aos) { if (num_quads > 1) { if (mip_filter == PIPE_TEX_MIPFILTER_NONE) { LLVMValueRef index0 = lp_build_const_int32(gallivm, 0); /* and then run texfilt mesademo: LP_NATIVE_VECTOR_WIDTH=256 ./texfilt Ran whole piglit without regressions. --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 60 ++-- src/gallium/auxiliary/gallivm/lp_bld_arit.h | 28 +- src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 321 - src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c | 15 +- src/gallium/drivers/llvmpipe/lp_bld_blend.c |4 +- 5 files changed, 186 insertions(+), 242 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 8f8410c..3291ec4 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -974,7 +974,7 @@ lp_build_lerp_simple(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, LLVMValueRef v1, - bool normalized) + unsigned flags) { unsigned half_width = bld->type.width/2; LLVMBuilderRef builder = bld->gallivm->builder; @@ -987,14 +987,17 @@ lp_build_lerp_simple(struct lp_build_context *bld, delta = lp_build_sub(bld, v1, v0); - if (normalized) { + if (flags & LP_BLD_LERP_WIDE_NORMALIZED) { if (!bld->type.sign) { - /* - * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the - * most-significant-bit to the lowest-significant-bit, so that - * later we can just divide by 2**n instead of 2**n - 1. - */ - x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1)); + if (!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)) { +/* + * Scale x from [0, 2**n - 1] to [0, 2**n] by adding the + * most-significant-bit to the lowest-significant-bit, so that + * later we can just divide by 2**n instead of 2**n - 1. + */ + +x = lp_build_add(bld, x, lp_build_shr_imm(bld, x, half_width - 1)); + } /* (x * delta) >> n */ res = lp_build_mul(bld, x, delta); @@ -1005,15 +1008,18 @@ lp_build_lerp_simple(struct lp_build_context *bld, * use the 2**n - 1 divison approximation in lp_build_mul_norm * instead. */ + assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)); res = lp_build_mul_norm(bld->gallivm, bld->type, x, delta); } } else { + assert(!(flags & LP_BLD_LERP_PRESCALED_WEIGHTS)); res = lp_build_mul(bld, x, delta); } res = lp_build_add(bld, v0, res); - if ((normalized && !bld->type.sign) || bld->type.fixed) { + if (((flags & LP_BLD_LERP_WIDE_NORMALIZED) && !bld->type.sign) || + bld->type.fixed) { /* We need to mask out the high order bits when lerping 8bit normalized colors stored on 16bits */ /* XXX: This step is necessary for lerping 8bit colors stored on 16bits, * but it will be wrong for true fixed point use cases. Basically we need @@ -1033,7 +1039,8 @@ LLVMValueRef lp_build_lerp(struct lp_build_context *bld, LLVMValueRef x, LLVMValueRef v0, - LLVMValueRef v1) + LLVMValueRef v1, + unsigned flags) { const struct lp_type type = bld->type; LLVMValueRef res; @@ -1042,6 +1049,8 @@ lp_build_lerp(struct
[Mesa-dev] [PATCH 1/2] gallivm: Add and use lp_build_lerp_3d.
From: José Fonseca --- src/gallium/auxiliary/gallivm/lp_bld_arit.c | 20 src/gallium/auxiliary/gallivm/lp_bld_arit.h | 15 ++ src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c | 51 ++--- 3 files changed, 60 insertions(+), 26 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c index 524a8e7..8f8410c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c @@ -1095,6 +1095,26 @@ lp_build_lerp_2d(struct lp_build_context *bld, } +LLVMValueRef +lp_build_lerp_3d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef z, + LLVMValueRef v000, + LLVMValueRef v001, + LLVMValueRef v010, + LLVMValueRef v011, + LLVMValueRef v100, + LLVMValueRef v101, + LLVMValueRef v110, + LLVMValueRef v111) +{ + LLVMValueRef v0 = lp_build_lerp_2d(bld, x, y, v000, v001, v010, v011); + LLVMValueRef v1 = lp_build_lerp_2d(bld, x, y, v100, v101, v110, v111); + return lp_build_lerp(bld, z, v0, v1); +} + + /** * Generate min(a, b) * Do checks for special cases. diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h index 60b9907..45886d5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h @@ -106,6 +106,21 @@ lp_build_lerp_2d(struct lp_build_context *bld, LLVMValueRef v11); LLVMValueRef +lp_build_lerp_3d(struct lp_build_context *bld, + LLVMValueRef x, + LLVMValueRef y, + LLVMValueRef z, + LLVMValueRef v000, + LLVMValueRef v001, + LLVMValueRef v010, + LLVMValueRef v011, + LLVMValueRef v100, + LLVMValueRef v101, + LLVMValueRef v110, + LLVMValueRef v111); + + +LLVMValueRef lp_build_min(struct lp_build_context *bld, LLVMValueRef a, LLVMValueRef b); diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c index 16d5718..9eaca02 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c @@ -982,8 +982,7 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, s_fpart_hi, neighbors_hi[0][0][0], neighbors_hi[0][0][1]); - } - else { + } else if (dims == 2) { /* 2-D lerp */ packed_lo = lp_build_lerp_2d(&h16, s_fpart_lo, t_fpart_lo, @@ -998,30 +997,30 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, neighbors_hi[0][0][1], neighbors_hi[0][1][0], neighbors_hi[0][1][1]); - - if (dims >= 3) { -LLVMValueRef packed_lo2, packed_hi2; - -/* lerp in the second z slice */ -packed_lo2 = lp_build_lerp_2d(&h16, - s_fpart_lo, t_fpart_lo, - neighbors_lo[1][0][0], - neighbors_lo[1][0][1], - neighbors_lo[1][1][0], - neighbors_lo[1][1][1]); - -packed_hi2 = lp_build_lerp_2d(&h16, - s_fpart_hi, t_fpart_hi, - neighbors_hi[1][0][0], - neighbors_hi[1][0][1], - neighbors_hi[1][1][0], - neighbors_hi[1][1][1]); -/* interp between two z slices */ -packed_lo = lp_build_lerp(&h16, r_fpart_lo, - packed_lo, packed_lo2); -packed_hi = lp_build_lerp(&h16, r_fpart_hi, - packed_hi, packed_hi2); - } + } else { + /* 3-D lerp */ + assert(dims == 3); + packed_lo = lp_build_lerp_3d(&h16, + s_fpart_lo, t_fpart_lo, r_fpart_lo, + neighbors_lo[0][0][0], + neighbors_lo[0][0][1], + neighbors_lo[0][1][0], + neighbors_lo[0][1][1], + neighbors_lo[1][0][0], + neighbors_lo[1][0][1], +
Re: [Mesa-dev] [PATCH] llvmpipe: get rid of unused tiled/linear logic
Thanks for doing this Roland. - Original Message - > From: Roland Scheidegger > > We do rendering to linear color buffers for quite some time, and since > switching to linear depth buffers all the tiled/linear logic was unused. > So get rid of (most) of it - there's still some LAYOUT_NONE things and > late allocation of resources which probably could be simplified. Yeah, we should remove llvmpipe_resource::layout too. But maybe in a follow on change. Jose > --- > src/gallium/drivers/llvmpipe/Makefile.am |3 +- > src/gallium/drivers/llvmpipe/SConscript |3 +- > src/gallium/drivers/llvmpipe/lp_rast_priv.h |4 +- > src/gallium/drivers/llvmpipe/lp_texture.c| 388 > +++--- > src/gallium/drivers/llvmpipe/lp_texture.h| 10 - > src/gallium/drivers/llvmpipe/lp_tile_image.c | 294 --- > src/gallium/drivers/llvmpipe/lp_tile_image.h | 61 > 7 files changed, 50 insertions(+), 713 deletions(-) > delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_image.c > delete mode 100644 src/gallium/drivers/llvmpipe/lp_tile_image.h > > diff --git a/src/gallium/drivers/llvmpipe/Makefile.am > b/src/gallium/drivers/llvmpipe/Makefile.am > index f1ba5d1..9059053 100644 > --- a/src/gallium/drivers/llvmpipe/Makefile.am > +++ b/src/gallium/drivers/llvmpipe/Makefile.am > @@ -72,8 +72,7 @@ libllvmpipe_la_SOURCES = \ > lp_state_vs.c \ > lp_surface.c \ > lp_tex_sample.c \ > - lp_texture.c \ > - lp_tile_image.c > + lp_texture.c > > libllvmpipe_la_LDFLAGS = $(LLVM_LDFLAGS) > > diff --git a/src/gallium/drivers/llvmpipe/SConscript > b/src/gallium/drivers/llvmpipe/SConscript > index a81cf23..22314c2 100644 > --- a/src/gallium/drivers/llvmpipe/SConscript > +++ b/src/gallium/drivers/llvmpipe/SConscript > @@ -52,8 +52,7 @@ llvmpipe = env.ConvenienceLibrary( > 'lp_state_vs.c', > 'lp_surface.c', > 'lp_tex_sample.c', > - 'lp_texture.c', > - 'lp_tile_image.c', > + 'lp_texture.c' > ]) > > env.Alias('llvmpipe', llvmpipe) > diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h > b/src/gallium/drivers/llvmpipe/lp_rast_priv.h > index 7d01da1..85febff 100644 > --- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h > +++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h > @@ -36,10 +36,12 @@ > #include "lp_scene.h" > #include "lp_state.h" > #include "lp_texture.h" > -#include "lp_tile_image.h" > #include "lp_limits.h" > > > +#define TILE_VECTOR_HEIGHT 4 > +#define TILE_VECTOR_WIDTH 4 > + > /* If we crash in a jitted function, we can examine jit_line and jit_state > * to get some info. This is not thread-safe, however. > */ > diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c > b/src/gallium/drivers/llvmpipe/lp_texture.c > index 0804619..d10a4ce 100644 > --- a/src/gallium/drivers/llvmpipe/lp_texture.c > +++ b/src/gallium/drivers/llvmpipe/lp_texture.c > @@ -46,7 +46,6 @@ > #include "lp_context.h" > #include "lp_flush.h" > #include "lp_screen.h" > -#include "lp_tile_image.h" > #include "lp_texture.h" > #include "lp_setup.h" > #include "lp_state.h" > @@ -334,11 +333,6 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, >struct sw_winsys *winsys = screen->winsys; >winsys->displaytarget_destroy(winsys, lpr->dt); > > - if (lpr->tiled_img.data) { > - align_free(lpr->tiled_img.data); > - lpr->tiled_img.data = NULL; > - } > - >FREE(lpr->layout[0]); > } > else if (llvmpipe_resource_is_texture(pt)) { > @@ -351,12 +345,6 @@ llvmpipe_resource_destroy(struct pipe_screen *pscreen, > lpr->linear_img.data = NULL; >} > > - /* free tiled image data */ > - if (lpr->tiled_img.data) { > - align_free(lpr->tiled_img.data); > - lpr->tiled_img.data = NULL; > - } > - >/* free layout flag arrays */ >for (level = 0; level < Elements(lpr->layout); level++) { > FREE(lpr->layout[level]); > @@ -398,7 +386,6 @@ llvmpipe_resource_map(struct pipe_resource *resource, >tex_usage == LP_TEX_USAGE_WRITE_ALL); > > assert(layout == LP_TEX_LAYOUT_NONE || > - layout == LP_TEX_LAYOUT_TILED || >layout == LP_TEX_LAYOUT_LINEAR); > > if (lpr->dt) { > @@ -850,27 +837,10 @@ static unsigned > tex_image_face_size(const struct llvmpipe_resource *lpr, unsigned level, > enum lp_texture_layout layout) > { > - const unsigned width = u_minify(lpr->base.width0, level); > - const unsigned height = u_minify(lpr->base.height0, level); > - > - assert(layout == LP_TEX_LAYOUT_TILED || > - layout == LP_TEX_LAYOUT_LINEAR); > + assert(layout == LP_TEX_LAYOUT_LINEAR); > > - if (layout == LP_TEX_LAYOUT_TILED) { > - /* for tiled layout, force a 32bpp format */ > - const enum pipe_format format = PIPE_FORMAT_B8G8R8A8_UNORM; > - const unsigned block_size =
Re: [Mesa-dev] [PATCH 03/13] gallium: Introduce 32-bit bytewise format names
- Original Message - > From: Richard Sandiford > > RGBA has R at byte 0 and A at byte 3, regardless of platform > endianness. Maybe I'm missing something, but this naming convention seems to me the exact opposite of what was decided [1], which is: - R at byte 0, ..., and A at byte 3, regardless of platform endianness would be called "R8G8B8A8" - R at bit 0, ..., A at bit 24, encoded as integers that match the platform endianness would be called "RGBA" which would be consistent with (as in a superset of) D3D10 format naming. I'm afraid I must insist on this, as I don't want D3D10 formats to change in anyway. It should be hard to do this -- you can easily craft a script that swaps these using statements like: git ls-files | xargs sed -i -e 's@foo@boo@' But other than this naming convention issue, the actual implementation looks quite nice. Jose [1] http://lists.freedesktop.org/archives/mesa-dev/2013-February/034378.html , from "Actually, on second thought ..." > > Reviewed-by: Adam Jackson > --- > src/gallium/include/pipe/p_format.h | 38 > + > 1 file changed, 30 insertions(+), 8 deletions(-) > > diff --git a/src/gallium/include/pipe/p_format.h > b/src/gallium/include/pipe/p_format.h > index 098b25b..1289983 100644 > --- a/src/gallium/include/pipe/p_format.h > +++ b/src/gallium/include/pipe/p_format.h > @@ -33,6 +33,7 @@ > extern "C" { > #endif > > +#include "p_config.h" > > enum pipe_type { > PIPE_TYPE_UNORM = 0, > @@ -53,10 +54,10 @@ enum pipe_type { > > enum pipe_format { > PIPE_FORMAT_NONE= 0, > - PIPE_FORMAT_B8G8R8A8_UNORM = 1, > - PIPE_FORMAT_B8G8R8X8_UNORM = 2, > - PIPE_FORMAT_A8R8G8B8_UNORM = 3, > - PIPE_FORMAT_X8R8G8B8_UNORM = 4, > + PIPE_FORMAT_ARGB_UNORM = 1, > + PIPE_FORMAT_XRGB_UNORM = 2, > + PIPE_FORMAT_BGRA_UNORM = 3, > + PIPE_FORMAT_BGRX_UNORM = 4, > PIPE_FORMAT_B5G5R5A1_UNORM = 5, > PIPE_FORMAT_B4G4R4A4_UNORM = 6, > PIPE_FORMAT_B5G6R5_UNORM= 7, > @@ -119,8 +120,8 @@ enum pipe_format { > PIPE_FORMAT_R8_UNORM= 64, > PIPE_FORMAT_R8G8_UNORM = 65, > PIPE_FORMAT_R8G8B8_UNORM= 66, > - PIPE_FORMAT_R8G8B8A8_UNORM = 67, > - PIPE_FORMAT_X8B8G8R8_UNORM = 68, > + PIPE_FORMAT_ABGR_UNORM = 67, > + PIPE_FORMAT_RGBX_UNORM = 68, > PIPE_FORMAT_R8_USCALED = 69, > PIPE_FORMAT_R8G8_USCALED= 70, > PIPE_FORMAT_R8G8B8_USCALED = 71, > @@ -180,7 +181,7 @@ enum pipe_format { > PIPE_FORMAT_R5SG5SB6U_NORM = 120, > > /* TODO: re-order these */ > - PIPE_FORMAT_A8B8G8R8_UNORM = 121, > + PIPE_FORMAT_RGBA_UNORM = 121, > PIPE_FORMAT_B5G5R5X1_UNORM = 122, > PIPE_FORMAT_R10G10B10A2_USCALED = 123, > PIPE_FORMAT_R11G11B10_FLOAT = 124, > @@ -193,7 +194,7 @@ enum pipe_format { > PIPE_FORMAT_B10G10R10A2_UNORM = 131, > PIPE_FORMAT_R10SG10SB10SA2U_NORM= 132, > PIPE_FORMAT_R8G8Bx_SNORM= 133, > - PIPE_FORMAT_R8G8B8X8_UNORM = 134, > + PIPE_FORMAT_XBGR_UNORM = 134, > PIPE_FORMAT_B4G4R4X4_UNORM = 135, > > /* some stencil samplers formats */ > @@ -343,6 +344,27 @@ enum pipe_format { > PIPE_FORMAT_COUNT > }; > > +#if defined(PIPE_ARCH_LITTLE_ENDIAN) > +#define PIPE_FORMAT_R8G8B8A8_UNORM PIPE_FORMAT_ABGR_UNORM > +#define PIPE_FORMAT_R8G8B8X8_UNORM PIPE_FORMAT_XBGR_UNORM > +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_XRGB_UNORM > +#define PIPE_FORMAT_B8G8R8A8_UNORM PIPE_FORMAT_ARGB_UNORM > +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_XRGB_UNORM > +#define PIPE_FORMAT_A8R8G8B8_UNORM PIPE_FORMAT_BGRA_UNORM > +#define PIPE_FORMAT_X8R8G8B8_UNORM PIPE_FORMAT_BGRX_UNORM > +#define PIPE_FORMAT_A8B8G8R8_UNORM PIPE_FORMAT_RGBA_UNORM > +#define PIPE_FORMAT_X8B8G8R8_UNORM PIPE_FORMAT_RGBX_UNORM > +#elif defined(PIPE_ARCH_BIG_ENDIAN) > +#define PIPE_FORMAT_R8G8B8A8_UNORM PIPE_FORMAT_RGBA_UNORM > +#define PIPE_FORMAT_R8G8B8X8_UNORM PIPE_FORMAT_RGBX_UNORM > +#define PIPE_FORMAT_B8G8R8A8_UNORM PIPE_FORMAT_BGRA_UNORM > +#define PIPE_FORMAT_B8G8R8X8_UNORM PIPE_FORMAT_BGRX_UNORM > +#define PIPE_FORMAT_A8R8G8B8_UNORM PIPE_FORMAT_ARGB_UNORM > +#define PIPE_FORMAT_X8R8G8B8_UNORM PIPE_FORMAT_XRGB_UNORM > +#define PIPE_FORMAT_A8B8G8R8_UNORM PIPE_FORMAT_ABGR_UNORM > +#define PIPE_FORMAT_X8B8G8R8_UNORM PIPE_FORMAT_XBGR_UNORM > +#endif > + > enum pipe_video_chroma_format > { > PIPE_VIDEO_CHROMA_FORMAT_420, > -- > 1.8.2.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
Re: [Mesa-dev] [PATCH 1/5] radeonsi: increase array size for shader inputs and outputs
Reviewed-by: Marek Olšák Marek On Fri, May 17, 2013 at 11:27 AM, Michel Dänzer wrote: > From: Marek Olšák > > and add assertions to prevent buffer overflow. This fixes corruption > of the si_shader struct. > > NOTE: This is a candidate for the 9.1 branch. > > [ Cherry-pick of r600g commit da33f9b919039442e9ab51f9b1d1c83a73607133 ] > > Signed-off-by: Michel Dänzer > --- > src/gallium/drivers/radeonsi/radeonsi_shader.c | 2 ++ > src/gallium/drivers/radeonsi/radeonsi_shader.h | 4 ++-- > 2 files changed, 4 insertions(+), 2 deletions(-) > > diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c > b/src/gallium/drivers/radeonsi/radeonsi_shader.c > index f942436..bea2895 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c > +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c > @@ -589,6 +589,7 @@ static void si_llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > switch (d->Declaration.File) { > case TGSI_FILE_INPUT: > i = shader->ninput++; > + assert(i < Elements(shader->input)); > shader->input[i].name = d->Semantic.Name; > shader->input[i].sid = d->Semantic.Index; > shader->input[i].interpolate = d->Interp.Interpolate; > @@ -597,6 +598,7 @@ static void si_llvm_emit_epilogue(struct > lp_build_tgsi_context * bld_base) > > case TGSI_FILE_OUTPUT: > i = shader->noutput++; > + assert(i < Elements(shader->output)); > shader->output[i].name = d->Semantic.Name; > shader->output[i].sid = d->Semantic.Index; > shader->output[i].interpolate = d->Interp.Interpolate; > diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h > b/src/gallium/drivers/radeonsi/radeonsi_shader.h > index 1552dc2..0fbd601 100644 > --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h > +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h > @@ -101,10 +101,10 @@ struct si_pipe_shader_selector { > > struct si_shader { > unsignedninput; > - struct si_shader_io input[32]; > + struct si_shader_io input[40]; > > unsignednoutput; > - struct si_shader_io output[32]; > + struct si_shader_io output[40]; > > unsignedninterp; > booluses_kill; > -- > 1.8.3.rc1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] st/mesa: Remove unused variable 'texImage'.
_mesa_get_attachment_teximage has no side effects so looks good to me. Jose - Original Message - > All uses of 'texImage' were removed in commit > 77a405dba7f70f8a47655e90774a5ecf5c88a6ed. > > Fixes "Unused pointer value" defect reported by Coverity. > > Signed-off-by: Vinson Lee > --- > src/mesa/state_tracker/st_cb_fbo.c | 4 > 1 file changed, 4 deletions(-) > > diff --git a/src/mesa/state_tracker/st_cb_fbo.c > b/src/mesa/state_tracker/st_cb_fbo.c > index aa245d3..457cec1 100644 > --- a/src/mesa/state_tracker/st_cb_fbo.c > +++ b/src/mesa/state_tracker/st_cb_fbo.c > @@ -394,7 +394,6 @@ st_render_texture(struct gl_context *ctx, > struct st_renderbuffer *strb = st_renderbuffer(rb); > struct pipe_resource *pt; > struct st_texture_object *stObj; > - const struct gl_texture_image *texImage; > struct pipe_surface surf_tmpl; > > if (!st_finalize_texture(ctx, pipe, att->Texture)) > @@ -403,9 +402,6 @@ st_render_texture(struct gl_context *ctx, > pt = st_get_texobj_resource(att->Texture); > assert(pt); > > - /* get pointer to texture image we're rendeing to */ > - texImage = _mesa_get_attachment_teximage(att); > - > /* get the texture for the texture object */ > stObj = st_texture_object(att->Texture); > > -- > 1.8.2.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] scons: Use LLVM shared library if found.
Vinson, Why is this necessary? (I'd prefer that LLVM is statically linked by default. ) Jose - Original Message - > This patch fixes SCons builds on Fedora 18. > > Signed-off-by: Vinson Lee > --- > scons/llvm.py | 10 +- > 1 file changed, 9 insertions(+), 1 deletion(-) > > diff --git a/scons/llvm.py b/scons/llvm.py > index 7cd609c..432ece6 100644 > --- a/scons/llvm.py > +++ b/scons/llvm.py > @@ -198,7 +198,15 @@ def generate(env): > if llvm_version >= distutils.version.LooseVersion('3.2'): > env.Append(CXXFLAGS = ('-fno-rtti',)) > > -env.ParseConfig('llvm-config --libs ' + ' '.join(components)) > +llvm_shared_library = os.path.join( > +env.backtick('llvm-config --libdir').strip(), > +'libLLVM-%s%s' % (llvm_version, env['SHLIBSUFFIX']) > +) > +if os.path.exists(llvm_shared_library): > +env.Append(LIBS = ['LLVM-%s' % llvm_version]) > +else: > +env.ParseConfig('llvm-config --libs ' + ' > '.join(components)) > + > env.ParseConfig('llvm-config --ldflags') > except OSError: > print 'scons: llvm-config version %s failed' % llvm_version > -- > 1.8.2.1 > > ___ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev > ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] llvmpipe: Remove x/y from cmd_bin
- Original Message - > Am 16.05.2013 21:44, schrieb Adam Jackson: > > These were mostly just a waste of memory and cache pressure, and were > > really only used for debugging. > > > > This change reduces instruction count (as measured by callgrind's Ir > > event) of gnome-shell-perf-tool on Ivybridge by 3.5% ± 0.015% (n=20). > > > > Signed-off-by: Adam Jackson > > --- > > src/gallium/drivers/llvmpipe/lp_rast.c | 37 > > +++- > > src/gallium/drivers/llvmpipe/lp_rast_debug.c | 19 +++--- > > src/gallium/drivers/llvmpipe/lp_rast_priv.h | 2 +- > > src/gallium/drivers/llvmpipe/lp_scene.c | 4 ++- > > src/gallium/drivers/llvmpipe/lp_scene.h | 4 +-- > > src/gallium/drivers/llvmpipe/lp_setup.c | 11 + > > 6 files changed, 30 insertions(+), 47 deletions(-) > > > > diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c > > b/src/gallium/drivers/llvmpipe/lp_rast.c > > index a557db4..3dc00ef 100644 > > --- a/src/gallium/drivers/llvmpipe/lp_rast.c > > +++ b/src/gallium/drivers/llvmpipe/lp_rast.c > > @@ -87,13 +87,14 @@ lp_rast_end( struct lp_rasterizer *rast ) > > */ > > static void > > lp_rast_tile_begin(struct lp_rasterizer_task *task, > > - const struct cmd_bin *bin) > > + const struct cmd_bin *bin, > > + int x, int y) > > { > > - LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y); > > + LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, x, y); > > > > task->bin = bin; > > - task->x = bin->x * TILE_SIZE; > > - task->y = bin->y * TILE_SIZE; > > + task->x = x * TILE_SIZE; > > + task->y = y * TILE_SIZE; > > > > /* reset pointers to color and depth tile(s) */ > > memset(task->color_tiles, 0, sizeof(task->color_tiles)); > > @@ -551,13 +552,14 @@ static lp_rast_cmd_func dispatch[LP_RAST_OP_MAX] = > > > > static void > > do_rasterize_bin(struct lp_rasterizer_task *task, > > - const struct cmd_bin *bin) > > + const struct cmd_bin *bin, > > + int x, int y) > > { > > const struct cmd_block *block; > > unsigned k; > > > > if (0) > > - lp_debug_bin(bin); > > + lp_debug_bin(bin, x, y); > > > > for (block = bin->head; block; block = block->next) { > >for (k = 0; k < block->count; k++) { > > @@ -576,11 +578,11 @@ do_rasterize_bin(struct lp_rasterizer_task *task, > > */ > > static void > > rasterize_bin(struct lp_rasterizer_task *task, > > - const struct cmd_bin *bin ) > > + const struct cmd_bin *bin, int x, int y ) > > { > > - lp_rast_tile_begin( task, bin ); > > + lp_rast_tile_begin( task, bin, x, y ); > > > > - do_rasterize_bin(task, bin); > > + do_rasterize_bin(task, bin, x, y); > > > > lp_rast_tile_end(task); > > > > @@ -622,27 +624,16 @@ rasterize_scene(struct lp_rasterizer_task *task, > > > > if (!task->rast->no_rast && !scene->discard) { > >/* loop over scene bins, rasterize each */ > > -#if 0 > > - { > > - unsigned i, j; > > - for (i = 0; i < scene->tiles_x; i++) { > > -for (j = 0; j < scene->tiles_y; j++) { > > - struct cmd_bin *bin = lp_scene_get_bin(scene, i, j); > > - rasterize_bin(task, bin, i, j); > > -} > > - } > > - } > > -#else > >{ > > struct cmd_bin *bin; > > + int i, j; > > > > assert(scene); > > - while ((bin = lp_scene_bin_iter_next(scene))) { > > + while ((bin = lp_scene_bin_iter_next(scene, &i, &j))) { > > if (!is_empty_bin( bin )) > > - rasterize_bin(task, bin); > > + rasterize_bin(task, bin, i, j); > > } > >} > > -#endif > > } > > > > > > diff --git a/src/gallium/drivers/llvmpipe/lp_rast_debug.c > > b/src/gallium/drivers/llvmpipe/lp_rast_debug.c > > index 4008251..3bc75aa 100644 > > --- a/src/gallium/drivers/llvmpipe/lp_rast_debug.c > > +++ b/src/gallium/drivers/llvmpipe/lp_rast_debug.c > > @@ -90,13 +90,13 @@ is_blend( const struct lp_rast_state *state, > > > > > > static void > > -debug_bin( const struct cmd_bin *bin ) > > +debug_bin( const struct cmd_bin *bin, int x, int y ) > > { > > const struct lp_rast_state *state = NULL; > > const struct cmd_block *head = bin->head; > > int i, j = 0; > > > > - debug_printf("bin %d,%d:\n", bin->x, bin->y); > > + debug_printf("bin %d,%d:\n", x, y); > > > > while (head) { > >for (i = 0; i < head->count; i++, j++) { > > @@ -231,13 +231,14 @@ debug_triangle(int tilex, int tiley, > > static void > > do_debug_bin( struct tile *tile, > >const struct cmd_bin *bin, > > + int x, int y, > >boolean print_cmds) > > { > > unsigned k, j = 0; > > const struct cmd_block *block; > > > > - int tx = bin->x * TILE_SIZE; > > - int ty = bin->y * TI
Re: [Mesa-dev] R600/SI Patches: A few cleanups for compute
On Mit, 2013-05-15 at 14:26 -0700, Tom Stellard wrote: > > The attached patches add some new patterns and instructions for SI and > are a prerequisite for more invasive compute shader changes that I'm > working on. > > Please Review. The SI changes are Reviewed-by: Michel Dänzer -- Earthling Michel Dänzer | http://www.amd.com Libre software enthusiast | Debian, X and DRI developer ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH] scons: Don't force stabs debug format for Mingw.
From: José Fonseca - recent gdb handles DWARF fine (tested both with version 7.1.90.20100730 from mingw-w64 project, and 7.5-1 from mingw project) - http://people.freedesktop.org/~jrfonseca/bfdhelp/ was updated to handle DWARF - it requires ugly hacks to prevent compilation failures - it prevents proper back when stabs/dwarf is mixed (which is inevitable, given that the MinGW C runtime is pre-built with dwarf) For example, without this change I get: (gdb) bt #0 _wassert (_Message=0xf925060 L"Num < NumOperands && \"Invalid child # of SDNode!\"", _File=0xf60b488 L"llvm/include/llvm/CodeGen/SelectionDAGNodes.h", _Line=534) at ../../../../mingw-w64-crt/misc/wassert.c:51 #1 0x0368996b in _assert (_Message=0x39d7ee4 "Num < NumOperands && \"Invalid child # of SDNode!\"", _File=0x39d7e94 "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", _Line=534) at ../../../../mingw-w64-crt/misc/wassert.c:44 #2 0x0004 in ?? () #3 0x0004 in ?? () #4 0x0f60b488 in ?? () #5 0x in ?? () While with this change I get: (gdb) bt #0 _wassert (_Message=0xfb982e8 L"Num < NumOperands && \"Invalid child # of SDNode!\"", _File=0xefbcb40 L"llvm/include/llvm/CodeGen/SelectionDAGNodes.h", _Line=534) at ../../../../mingw-w64-crt/misc/wassert.c:51 #1 0x039c996b in _assert (_Message=0x3d17f24 "Num < NumOperands && \"Invalid child # of SDNode!\"", _File=0x3d17ed4 "llvm/include/llvm/CodeGen/SelectionDAGNodes.h", _Line=534) at ../../../../mingw-w64-crt/misc/wassert.c:44 #2 0x033111cc in getOperand (Num=4, this=) at llvm/include/llvm/CodeGen/SelectionDAGNodes.h:534 #3 getOperand (i=4, this=) at llvm/include/llvm/CodeGen/SelectionDAGNodes.h:779 #4 llvm::SelectionDAG::getNode (this=0xf00cb08, Opcode=79, DL=..., VT=..., N1=..., N2=...) at llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp:2859 #5 0x03377b20 in llvm::SelectionDAGBuilder::visitExtractElement (this=0xfb45028, I=...) at llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp:2803 [...] --- scons/crossmingw.py | 42 -- src/gallium/auxiliary/SConscript |4 src/mapi/glapi/SConscript|5 - 3 files changed, 51 deletions(-) diff --git a/scons/crossmingw.py b/scons/crossmingw.py index 23c56c0..1287e0e 100644 --- a/scons/crossmingw.py +++ b/scons/crossmingw.py @@ -130,40 +130,6 @@ SCons.Tool.SourceFileScanner.add_scanner('.rc', SCons.Defaults.CScan) -def compile_without_gstabs(env, sources, c_file): -'''This is a hack used to compile some source files without the --gstabs option. - -It seems that some versions of mingw32's gcc (4.4.2 at least) die -when compiling large files with the -gstabs option. -gstabs is -related to debug symbols and can be omitted from the effected -files. - -This function compiles the given c_file without -gstabs, removes -the c_file from the sources list, then appends the new .o file to -sources. Then return the new sources list. -''' - -# Modify CCFLAGS to not have -gstabs option: -env2 = env.Clone() -flags = str(env2['CCFLAGS']) -flags = flags.replace("-gstabs", "") -env2['CCFLAGS'] = SCons.Util.CLVar(flags) - -# Build the special-case files: -obj_file = env2.SharedObject(c_file) - -# Replace ".cpp" or ".c" with ".o" -o_file = c_file.replace(".cpp", ".o") -o_file = o_file.replace(".c", ".o") - -# Replace the .c files with the specially-compiled .o file -sources.remove(c_file) -sources.append(o_file) - -return sources - - def generate(env): mingw_prefix = find(env) @@ -221,13 +187,5 @@ def generate(env): env['LIBPREFIXES']= [ 'lib', '' ] env['LIBSUFFIXES']= [ '.a', '.lib' ] -# MinGW x86 port of gdb does not handle well dwarf debug info which is the -# default in recent gcc versions. The x64 port gdb from mingw-w64 seems to -# handle it fine though, so stick with the default there. -if env['machine'] != 'x86_64': -env.AppendUnique(CCFLAGS = ['-gstabs']) - -env.AddMethod(compile_without_gstabs, 'compile_without_gstabs') - def exists(env): return find(env) diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index bfd5ec3..31dfed3 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -51,10 +51,6 @@ if env['llvm']: 'GALLIVM_CPP_SOURCES' ]) -if env['toolchain'] == 'crossmingw': -# compile lp_bld_misc.cpp without -gstabs option -source = env.compile_without_gstabs(source, "gallivm/lp_bld_misc.cpp") - gallium = env.ConvenienceLibrary( target = 'gallium', source = source, diff --git a/src/mapi/glapi/SConscript b/src/mapi/glapi/SConscript index ac11148..c4ac080 100644 --- a/src/mapi/glapi/SConscript +++ b/src/mapi/glapi/SConscript @@ -95,11 +95,6 @@ if (env['gcc'] or env['clang']) and
[Mesa-dev] [Bug 64668] Clipping is performed incorrectly when using shaders with intel/nouveau/radeon drivers.
https://bugs.freedesktop.org/show_bug.cgi?id=64668 --- Comment #13 from edg...@yandex.ru --- > As far as I've been able to tell from experimenting with the nVidia > proprietary driver, its behaviour in this corner case is to not clip at all. You're absolutely correct, it doesn't clip. I see that OpenGL specification is a garbage. :) In this case I'm mostly interested in portability and compatibility, since it doesn't clip in both Windows and Linux with proprietary drivers, it seems logical to implement such behaviour for much less spread open implementation mesa and open drivers... -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [Bug 64649] Anomaly 2 (Steam) exits with GLX_EXT_swap_control not supported, unable to set vertical sync
https://bugs.freedesktop.org/show_bug.cgi?id=64649 --- Comment #4 from bartosz.brzos...@11bitstudios.com --- The swap control extension is not required by the game to function. The exit must be caused by something else. What exactly happens? Does it look like graceful exit or a segfault? What kind of GPU are you using? -- You are receiving this mail because: You are the assignee for the bug. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] radeonsi: increase array size for shader inputs and outputs
From: Marek Olšák and add assertions to prevent buffer overflow. This fixes corruption of the si_shader struct. NOTE: This is a candidate for the 9.1 branch. [ Cherry-pick of r600g commit da33f9b919039442e9ab51f9b1d1c83a73607133 ] Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeonsi/radeonsi_shader.c | 2 ++ src/gallium/drivers/radeonsi/radeonsi_shader.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index f942436..bea2895 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -589,6 +589,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) switch (d->Declaration.File) { case TGSI_FILE_INPUT: i = shader->ninput++; + assert(i < Elements(shader->input)); shader->input[i].name = d->Semantic.Name; shader->input[i].sid = d->Semantic.Index; shader->input[i].interpolate = d->Interp.Interpolate; @@ -597,6 +598,7 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) case TGSI_FILE_OUTPUT: i = shader->noutput++; + assert(i < Elements(shader->output)); shader->output[i].name = d->Semantic.Name; shader->output[i].sid = d->Semantic.Index; shader->output[i].interpolate = d->Interp.Interpolate; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h index 1552dc2..0fbd601 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h @@ -101,10 +101,10 @@ struct si_pipe_shader_selector { struct si_shader { unsignedninput; - struct si_shader_io input[32]; + struct si_shader_io input[40]; unsignednoutput; - struct si_shader_io output[32]; + struct si_shader_io output[40]; unsignedninterp; booluses_kill; -- 1.8.3.rc1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/5] radeonsi: Initial support for multiple constant buffers
From: Michel Dänzer Just enough to support an additional internal constant buffer for the user clip planes. NOTE: This is a candidate for the 9.1 branch. Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeonsi/r600_buffer.c | 30 --- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 8 +++ src/gallium/drivers/radeonsi/si_state.c | 81 +--- src/gallium/drivers/radeonsi/si_state_draw.c | 73 + 4 files changed, 119 insertions(+), 73 deletions(-) diff --git a/src/gallium/drivers/radeonsi/r600_buffer.c b/src/gallium/drivers/radeonsi/r600_buffer.c index 0c33c1e..cdf9988 100644 --- a/src/gallium/drivers/radeonsi/r600_buffer.c +++ b/src/gallium/drivers/radeonsi/r600_buffer.c @@ -24,7 +24,6 @@ * Jerome Glisse * Corbin Simpson */ -#include #include "pipe/p_screen.h" #include "util/u_format.h" @@ -169,32 +168,3 @@ void r600_upload_index_buffer(struct r600_context *rctx, u_upload_data(rctx->uploader, 0, count * ib->index_size, ib->user_buffer, &ib->offset, &ib->buffer); } - -void r600_upload_const_buffer(struct r600_context *rctx, struct si_resource **rbuffer, - const uint8_t *ptr, unsigned size, - uint32_t *const_offset) -{ - *rbuffer = NULL; - - if (R600_BIG_ENDIAN) { - uint32_t *tmpPtr; - unsigned i; - - if (!(tmpPtr = malloc(size))) { - R600_ERR("Failed to allocate BE swap buffer.\n"); - return; - } - - for (i = 0; i < size / 4; ++i) { - tmpPtr[i] = bswap_32(((uint32_t *)ptr)[i]); - } - - u_upload_data(rctx->uploader, 0, size, tmpPtr, const_offset, - (struct pipe_resource**)rbuffer); - - free(tmpPtr); - } else { - u_upload_data(rctx->uploader, 0, size, ptr, const_offset, - (struct pipe_resource**)rbuffer); - } -} diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index c5b33f7..e50088f 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -124,6 +124,13 @@ struct r600_fence_block { #define R600_CONSTANT_ARRAY_SIZE 256 #define R600_RESOURCE_ARRAY_SIZE 160 +struct r600_constbuf_state +{ + struct pipe_constant_buffer cb[2]; + uint32_tenabled_mask; + uint32_tdirty_mask; +}; + struct r600_context { struct pipe_context context; struct blitter_context *blitter; @@ -152,6 +159,7 @@ struct r600_context { /* shader information */ unsignedsprite_coord_enable; unsignedexport_16bpc; + struct r600_constbuf_state constbuf_state[PIPE_SHADER_TYPES]; struct r600_textures_info vs_samplers; struct r600_textures_info ps_samplers; struct si_resource *border_color_table; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 6d072ef..de86b1e 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -24,12 +24,15 @@ * Christian König */ +#include + #include "util/u_memory.h" #include "util/u_framebuffer.h" #include "util/u_blitter.h" #include "util/u_helpers.h" #include "util/u_math.h" #include "util/u_pack_color.h" +#include "util/u_upload_mgr.h" #include "util/u_format_s3tc.h" #include "tgsi/tgsi_parse.h" #include "radeonsi_pipe.h" @@ -2492,64 +2495,56 @@ static void si_delete_sampler_state(struct pipe_context *ctx, void *state) * Constants */ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, - struct pipe_constant_buffer *cb) + struct pipe_constant_buffer *input) { struct r600_context *rctx = (struct r600_context *)ctx; - struct si_resource *rbuffer = cb ? si_resource(cb->buffer) : NULL; - struct si_pm4_state *pm4; - uint32_t offset; - uint64_t va; + struct r600_constbuf_state *state = &rctx->constbuf_state[shader]; + struct pipe_constant_buffer *cb; + const uint8_t *ptr; /* Note that the state tracker can unbind constant buffers by * passing NULL here. */ - if (cb == NULL || (!cb->buffer && !cb->user_buffer)) + if (unlikely(!input || (!input->buffer && !input->user_buffer))) { + state->enabled_mask &= ~(1 << index); + state->dirty_mask &= ~(1 << index); + pipe_resource_reference(&state->cb[index].buffer, NULL); return; + } - pm4 = CALLOC_STRUCT(si_pm4_state);
[Mesa-dev] [PATCH 2/5] radeonsi: Fix handling of TGSI_SEMANTIC_PSIZE
From: Michel Dänzer Two more little piglits. NOTE: This is a candidate for the 9.1 branch. Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 1 - src/gallium/drivers/radeonsi/radeonsi_shader.c | 4 +++- src/gallium/drivers/radeonsi/radeonsi_shader.h | 2 ++ src/gallium/drivers/radeonsi/si_state.c| 4 src/gallium/drivers/radeonsi/si_state_draw.c | 15 --- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 388f6df..c5b33f7 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -140,7 +140,6 @@ struct r600_context { unsignedpa_sc_line_stipple; unsignedpa_su_sc_mode_cntl; unsignedpa_cl_clip_cntl; - unsignedpa_cl_vs_out_cntl; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; struct si_pipe_shader_selector *ps_shader; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index bea2895..e6ed545 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -612,7 +612,9 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) /* Select the correct target */ switch(d->Semantic.Name) { case TGSI_SEMANTIC_PSIZE: - target = V_008DFC_SQ_EXP_POS; + shader->vs_out_misc_write = 1; + shader->vs_out_point_size = 1; + target = V_008DFC_SQ_EXP_POS + 1; break; case TGSI_SEMANTIC_POSITION: if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) { diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.h b/src/gallium/drivers/radeonsi/radeonsi_shader.h index 0fbd601..667f2c3 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.h +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.h @@ -110,6 +110,8 @@ struct si_shader { booluses_kill; booluses_instanceid; boolfs_write_all; + boolvs_out_misc_write; + boolvs_out_point_size; unsignednr_cbufs; }; diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index ed95b1d..6d072ef 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -412,9 +412,6 @@ static void *si_create_rs_state(struct pipe_context *ctx, S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip) | S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip) | S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); - rs->pa_cl_vs_out_cntl = - S_02881C_USE_VTX_POINT_SIZE(state->point_size_per_vertex) | - S_02881C_VS_OUT_MISC_VEC_ENA(state->point_size_per_vertex); clip_rule = state->scissor ? 0x : 0x; @@ -485,7 +482,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) rctx->pa_sc_line_stipple = rs->pa_sc_line_stipple; rctx->pa_su_sc_mode_cntl = rs->pa_su_sc_mode_cntl; rctx->pa_cl_clip_cntl = rs->pa_cl_clip_cntl; - rctx->pa_cl_vs_out_cntl = rs->pa_cl_vs_out_cntl; si_pm4_bind_state(rctx, rasterizer, rs); si_update_fb_rs_state(rctx); diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 8d16907..a9ecc64 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -55,8 +55,13 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s * takes care of adding a dummy export. */ for (nparams = 0, i = 0 ; i < shader->shader.noutput; i++) { - if (shader->shader.output[i].name != TGSI_SEMANTIC_POSITION) + switch (shader->shader.output[i].name) { + case TGSI_SEMANTIC_POSITION: + case TGSI_SEMANTIC_PSIZE: + break; + default: nparams++; + } } if (nparams < 1) nparams = 1; @@ -66,7 +71,9 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s si_pm4_set_reg(pm4, R_02870C_SPI_SHADER_POS_FORMAT, S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | - S_02870C_POS1_EXPORT_FORMAT(V_02870C_SPI_SHADER_NONE) | + S_02870C_POS1_EXPORT_
[Mesa-dev] [PATCH 4/5] radeonsi: Handle TGSI_SEMANTIC_CLIPVERTEX
From: Michel Dänzer 17 more little piglits. NOTE: This is a candidate for the 9.1 branch. Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeonsi/radeonsi_pipe.h | 1 - src/gallium/drivers/radeonsi/radeonsi_shader.c | 62 ++ src/gallium/drivers/radeonsi/radeonsi_shader.h | 1 + src/gallium/drivers/radeonsi/si_state.c| 10 - src/gallium/drivers/radeonsi/si_state.h| 1 + src/gallium/drivers/radeonsi/si_state_draw.c | 19 +--- 6 files changed, 86 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index e50088f..3274049 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -146,7 +146,6 @@ struct r600_context { struct pipe_framebuffer_state framebuffer; unsignedpa_sc_line_stipple; unsignedpa_su_sc_mode_cntl; - unsignedpa_cl_clip_cntl; /* for saving when using blitter */ struct pipe_stencil_ref stencil_ref; struct si_pipe_shader_selector *ps_shader; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index e6ed545..484f7ec 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -554,6 +554,64 @@ static void si_alpha_test(struct lp_build_tgsi_context *bld_base, } } +static void si_llvm_emit_clipvertex(struct lp_build_tgsi_context * bld_base, + unsigned index) +{ + struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); + struct lp_build_context *base = &bld_base->base; + struct lp_build_context *uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld; + LLVMValueRef args[9]; + unsigned reg_index; + unsigned chan; + unsigned const_chan; + LLVMValueRef out_elts[4]; + LLVMValueRef base_elt; + LLVMValueRef ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_CONST); + LLVMValueRef const_resource = build_indexed_load(si_shader_ctx, ptr, uint->one); + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + LLVMValueRef out_ptr = si_shader_ctx->radeon_bld.soa.outputs[index][chan]; + out_elts[chan] = LLVMBuildLoad(base->gallivm->builder, out_ptr, ""); + } + + for (reg_index = 0; reg_index < 2; reg_index ++) { + args[5] = + args[6] = + args[7] = + args[8] = lp_build_const_float(base->gallivm, 0.0f); + + /* Compute dot products of position and user clip plane vectors */ + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + for (const_chan = 0; const_chan < TGSI_NUM_CHANNELS; const_chan++) { + args[0] = const_resource; + args[1] = lp_build_const_int32(base->gallivm, + ((reg_index * 4 + chan) * 4 + + const_chan) * 4); + base_elt = build_intrinsic(base->gallivm->builder, + "llvm.SI.load.const", + base->elem_type, + args, 2, + LLVMReadNoneAttribute | LLVMNoUnwindAttribute); + args[5 + chan] = + lp_build_add(base, args[5 + chan], +lp_build_mul(base, base_elt, + out_elts[const_chan])); + } + } + + args[0] = lp_build_const_int32(base->gallivm, 0xf); + args[1] = uint->zero; + args[2] = uint->zero; + args[3] = lp_build_const_int32(base->gallivm, + V_008DFC_SQ_EXP_POS + 2 + reg_index); + args[4] = uint->zero; + lp_build_intrinsic(base->gallivm->builder, + "llvm.SI.export", + LLVMVoidTypeInContext(base->gallivm->context), + args, 9); + } +} + /* XXX: This is partially implemented for VS only at this point. It is not complete */ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) { @@ -642,6 +700,10 @@ static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base) color_count++; }
[Mesa-dev] [PATCH 5/5] radeonsi: Fix user clip planes
From: Michel Dänzer 4 more little piglits. NOTE: This is a candidate for the 9.1 branch. Signed-off-by: Michel Dänzer --- src/gallium/drivers/radeonsi/si_state_draw.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 4380d2c..ae571a4 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -316,10 +316,9 @@ static bool si_update_draw_info_state(struct r600_context *rctx, (rctx->queued.named.rasterizer->clip_plane_enable & vs->clip_dist_write)); si_pm4_set_reg(pm4, R_028810_PA_CL_CLIP_CNTL, - rctx->queued.named.rasterizer->pa_cl_clip_cntl - /*| (rctx->vs_shader->shader.clip_dist_write || - rctx->vs_shader->shader.vs_prohibit_ucps ? - 0 : rctx->rasterizer->clip_plane_enable & 0x3F)*/); + rctx->queued.named.rasterizer->pa_cl_clip_cntl | + (vs->clip_dist_write ? 0 : + rctx->queued.named.rasterizer->clip_plane_enable & 0x3F)); si_pm4_set_state(rctx, draw_info, pm4); return true; -- 1.8.3.rc1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev