Mesa (master): i965: Use intel_bufferobj_buffer() wrapper in image surface state setup.
Module: Mesa Branch: master Commit: 936cd3c87a212c28fe89a5c059fc4febd8b52ab7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=936cd3c87a212c28fe89a5c059fc4febd8b52ab7 Author: Francisco Jerez Date: Fri Mar 16 14:28:59 2018 -0700 i965: Use intel_bufferobj_buffer() wrapper in image surface state setup. Instead of directly using intel_obj->buffer. Among other things intel_bufferobj_buffer() will update intel_buffer_object:: gpu_active_start/end, which are used by glBufferSubData() to decide which path to take. Fixes a failure in the Piglit ARB_shader_image_load_store-host-mem-barrier Buffer Update/WaW tests, which could be reproduced with a non-standard glGetTexSubImage implementation (see bug report). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105351 Reported-by: Nanley Chery Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Nanley Chery --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 39e898243d..73cae9ef7c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1520,14 +1520,16 @@ update_image_surface(struct brw_context *brw, const unsigned format = get_image_format(brw, u->_ActualFormat, access); if (obj->Target == GL_TEXTURE_BUFFER) { - struct intel_buffer_object *intel_obj = -intel_buffer_object(obj->BufferObject); const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 : _mesa_get_format_bytes(u->_ActualFormat)); const unsigned buffer_size = buffer_texture_range_size(brw, obj); + struct brw_bo *const bo = !obj->BufferObject ? NULL : +intel_bufferobj_buffer(brw, intel_buffer_object(obj->BufferObject), + obj->BufferOffset, buffer_size, + access != GL_READ_ONLY); brw_emit_buffer_surface_state( -brw, surf_offset, intel_obj->buffer, obj->BufferOffset, +brw, surf_offset, bo, obj->BufferOffset, format, buffer_size, texel_size, access != GL_READ_ONLY ? RELOC_WRITE : 0); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Handle non-zero texture buffer offsets in buffer object range calculation.
Module: Mesa Branch: master Commit: e989acb03ba802737f762627dd16ac1d0b9f0d13 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e989acb03ba802737f762627dd16ac1d0b9f0d13 Author: Francisco Jerez Date: Fri Mar 16 14:35:10 2018 -0700 i965: Handle non-zero texture buffer offsets in buffer object range calculation. Otherwise the specified surface state will allow the GPU to access memory up to BufferOffset bytes past the end of the buffer. Found by inspection. v2: Protect against out-of-range BufferOffset (Nanley). Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Nanley Chery --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index af629a17bf..39e898243d 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -647,6 +647,7 @@ buffer_texture_range_size(struct brw_context *brw, const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat); const unsigned buffer_size = (!obj->BufferObject ? 0 : obj->BufferObject->Size); + const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset); /* The ARB_texture_buffer_specification says: * @@ -664,7 +665,8 @@ buffer_texture_range_size(struct brw_context *brw, * so that when ISL divides by stride to obtain the number of texels, that * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. */ - return MIN3((unsigned)obj->BufferSize, buffer_size, + return MIN3((unsigned)obj->BufferSize, + buffer_size - buffer_offset, brw->ctx.Const.MaxTextureBufferSize * texel_size); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Move buffer texture size calculation into a common helper function.
Module: Mesa Branch: master Commit: 156d2c6e621d836c4d45c636b87669e1de3d4464 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=156d2c6e621d836c4d45c636b87669e1de3d4464 Author: Francisco Jerez Date: Fri Mar 16 13:06:26 2018 -0700 i965: Move buffer texture size calculation into a common helper function. The buffer texture size calculations (should be easy enough, right?) are repeated in three different places, each of them subtly broken in a different way. E.g. the image load/store path was never fixed to clamp to MaxTextureBufferSize, and none of them are taking into account the buffer offset correctly. It's easier to fix it all in one place. Cc: mesa-sta...@lists.freedesktop.org Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106481 Reviewed-by: Nanley Chery --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 55 ++-- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 67438b0f7e..af629a17bf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -639,26 +639,14 @@ brw_emit_buffer_surface_state(struct brw_context *brw, .mocs = brw_get_bo_mocs(devinfo, bo)); } -void -brw_update_buffer_texture_surface(struct gl_context *ctx, - unsigned unit, - uint32_t *surf_offset) +static unsigned +buffer_texture_range_size(struct brw_context *brw, + struct gl_texture_object *obj) { - struct brw_context *brw = brw_context(ctx); - struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; - struct intel_buffer_object *intel_obj = - intel_buffer_object(tObj->BufferObject); - uint32_t size = tObj->BufferSize; - struct brw_bo *bo = NULL; - mesa_format format = tObj->_BufferObjectFormat; - const enum isl_format isl_format = brw_isl_format_for_mesa_format(format); - int texel_size = _mesa_get_format_bytes(format); - - if (intel_obj) { - size = MIN2(size, intel_obj->Base.Size); - bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size, - false); - } + assert(obj->Target == GL_TEXTURE_BUFFER); + const unsigned texel_size = _mesa_get_format_bytes(obj->_BufferObjectFormat); + const unsigned buffer_size = (!obj->BufferObject ? 0 : + obj->BufferObject->Size); /* The ARB_texture_buffer_specification says: * @@ -676,7 +664,28 @@ brw_update_buffer_texture_surface(struct gl_context *ctx, * so that when ISL divides by stride to obtain the number of texels, that * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE. */ - size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size); + return MIN3((unsigned)obj->BufferSize, buffer_size, + brw->ctx.Const.MaxTextureBufferSize * texel_size); +} + +void +brw_update_buffer_texture_surface(struct gl_context *ctx, + unsigned unit, + uint32_t *surf_offset) +{ + struct brw_context *brw = brw_context(ctx); + struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current; + struct intel_buffer_object *intel_obj = + intel_buffer_object(tObj->BufferObject); + const unsigned size = buffer_texture_range_size(brw, tObj); + struct brw_bo *bo = NULL; + mesa_format format = tObj->_BufferObjectFormat; + const enum isl_format isl_format = brw_isl_format_for_mesa_format(format); + int texel_size = _mesa_get_format_bytes(format); + + if (intel_obj) + bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size, + false); if (isl_format == ISL_FORMAT_UNSUPPORTED) { _mesa_problem(NULL, "bad format %s for texture buffer\n", @@ -1477,8 +1486,7 @@ update_buffer_image_param(struct brw_context *brw, unsigned surface_idx, struct brw_image_param *param) { - struct gl_buffer_object *obj = u->TexObj->BufferObject; - const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size); + const unsigned size = buffer_texture_range_size(brw, u->TexObj); update_default_image_param(brw, u, surface_idx, param); param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat); @@ -1514,10 +1522,11 @@ update_image_surface(struct brw_context *brw, intel_buffer_object(obj->BufferObject); const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 : _mesa_get_format_bytes(u->_ActualFormat)); + const unsigned buffer_size = buffer_texture_range_size(brw, obj); brw_emit_buffer_surface_state(
Mesa (master): Revert "mesa: simplify _mesa_is_image_unit_valid for buffers"
Module: Mesa Branch: master Commit: 5a6814780322988a7adee525899bca8a83907ab7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5a6814780322988a7adee525899bca8a83907ab7 Author: Francisco Jerez Date: Fri Mar 16 13:43:27 2018 -0700 Revert "mesa: simplify _mesa_is_image_unit_valid for buffers" This reverts commit c0ed52f6146c7e24e1275451773bd47c1eda3145. It was preventing the image format validation from being done on buffer textures, which is required to ensure that the application doesn't attempt to bind a buffer texture with an internal format incompatible with the image unit format (e.g. of different texel size), which is not allowed by the spec (it's not allowed for *any* texture target, whether or not there is spec wording restricting this behavior specifically for buffer textures) and will cause the driver to calculate texel bounds incorrectly and potentially crash instead of the expected behavior. Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Marek Olšák Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106465 Reviewed-by: Nanley Chery --- src/mesa/main/shaderimage.c | 25 - 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index feff8ccd91..31ac852d37 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -478,13 +478,6 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u) if (!t) return GL_FALSE; - /* The GL 4.5 Core spec doesn't say anything about buffers. In practice, -* the image buffer format is always compatible with the underlying -* buffer storage. -*/ - if (t->Target == GL_TEXTURE_BUFFER) - return GL_TRUE; - if (!t->_BaseComplete && !t->_MipmapComplete) _mesa_test_texobj_completeness(ctx, t); @@ -498,14 +491,20 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, struct gl_image_unit *u) u->_Layer >= _mesa_get_texture_layers(t, u->Level)) return GL_FALSE; - struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ? - t->Image[u->_Layer][u->Level] : - t->Image[0][u->Level]); + if (t->Target == GL_TEXTURE_BUFFER) { + tex_format = _mesa_get_shader_image_format(t->BufferObjectFormat); - if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples) - return GL_FALSE; + } else { + struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ? + t->Image[u->_Layer][u->Level] : + t->Image[0][u->Level]); + + if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples) + return GL_FALSE; + + tex_format = _mesa_get_shader_image_format(img->InternalFormat); + } - tex_format = _mesa_get_shader_image_format(img->InternalFormat); if (!tex_format) return GL_FALSE; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/compiler: Memory fence commit must always be enabled for gen10+
Module: Mesa Branch: master Commit: 56dc9f9f49638e0769d6bc696ff7f5dafccec9fc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=56dc9f9f49638e0769d6bc696ff7f5dafccec9fc Author: Anuj Phogat Date: Tue Feb 6 17:09:09 2018 -0800 intel/compiler: Memory fence commit must always be enabled for gen10+ Commit bit in the message descriptor (Bit 13) must be always set to true in CNL+ for memory fence messages. It also fixes a piglit GPU hang on cnl+ in simulation environment. Piglit test: arb_shader_image_load_store-shader-mem-barrier See HSD ES # 1404612949 Signed-off-by: Anuj Phogat Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Francisco Jerez --- src/intel/compiler/brw_eu_emit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 44abede16b..f8102e014e 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3287,7 +3287,9 @@ brw_memory_fence(struct brw_codegen *p, struct brw_reg dst) { const struct gen_device_info *devinfo = p->devinfo; - const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell; + const bool commit_enable = + devinfo->gen >= 10 || /* HSD ES # 1404612949 */ + (devinfo->gen == 7 && !devinfo->is_haswell); struct brw_inst *insn; brw_push_insn_state(p); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Handle surface opcode sample masks via predication.
Module: Mesa Branch: master Commit: c063e88909e630bb4605037eb0fc072f40f8c2a2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c063e88909e630bb4605037eb0fc072f40f8c2a2 Author: Francisco Jerez Date: Tue Dec 12 12:05:04 2017 -0800 intel/fs: Handle surface opcode sample masks via predication. The main motivation is to enable HDC surface opcodes on ICL which no longer allows the sample mask to be provided in a message header, but this is enabled all the way back to IVB when possible because it decreases the instruction count of some shaders using HDC messages significantly, e.g. one of the SynMark2 CSDof compute shaders decreases instruction count by about 40% due to the removal of header setup boilerplate which in turn makes a number of send message payloads more easily CSE-able. Shader-db results on SKL: total instructions in shared programs: 15325319 -> 15314384 (-0.07%) instructions in affected programs: 311532 -> 300597 (-3.51%) helped: 491 HURT: 1 Shader-db results on BDW where the optimization needs to be disabled in some cases due to hardware restrictions: total instructions in shared programs: 15604794 -> 15598028 (-0.04%) instructions in affected programs: 220863 -> 214097 (-3.06%) helped: 351 HURT: 0 The FPS of SynMark2 CSDof improves by 5.09% ±0.36% (n=10) on my SKL laptop with this change. According to Eero this improves performance of the same test by 9% on BYT and by 7-8% on BXT J4205 and on SKL GT2 desktop. Reviewed-by: Kenneth Graunke Tested-By: Eero Tamminen --- src/intel/compiler/brw_fs.cpp | 43 ++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index c255a3b23b..b1e1d98f6e 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -4460,6 +4460,8 @@ static void lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, const fs_reg &sample_mask) { + const gen_device_info *devinfo = bld.shader->devinfo; + /* Get the logical send arguments. */ const fs_reg &addr = inst->src[0]; const fs_reg &src = inst->src[1]; @@ -4470,7 +4472,20 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, /* Calculate the total number of components of the payload. */ const unsigned addr_sz = inst->components_read(0); const unsigned src_sz = inst->components_read(1); - const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1); + /* From the BDW PRM Volume 7, page 147: +* +* "For the Data Cache Data Port*, the header must be present for the +* following message types: [...] Typed read/write/atomics" +* +* Earlier generations have a similar wording. Because of this restriction +* we don't attempt to implement sample masks via predication for such +* messages prior to Gen9, since we have to provide a header anyway. On +* Gen11+ the header has been removed so we can only use predication. +*/ + const unsigned header_sz = devinfo->gen < 9 && + (op == SHADER_OPCODE_TYPED_SURFACE_READ || + op == SHADER_OPCODE_TYPED_SURFACE_WRITE || + op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0; const unsigned sz = header_sz + addr_sz + src_sz; /* Allocate space for the payload. */ @@ -4490,6 +4505,32 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op, bld.LOAD_PAYLOAD(payload, components, sz, header_sz); + /* Predicate the instruction on the sample mask if no header is +* provided. +*/ + if (!header_sz && sample_mask.file != BAD_FILE && + sample_mask.file != IMM) { + const fs_builder ubld = bld.group(1, 0).exec_all(); + if (inst->predicate) { + assert(inst->predicate == BRW_PREDICATE_NORMAL); + assert(!inst->predicate_inverse); + assert(inst->flag_subreg < 2); + /* Combine the sample mask with the existing predicate by using a + * vertical predication mode. + */ + inst->predicate = BRW_PREDICATE_ALIGN1_ALLV; + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2), + sample_mask.type), + sample_mask); + } else { + inst->flag_subreg = 2; + inst->predicate = BRW_PREDICATE_NORMAL; + inst->predicate_inverse = false; + ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type), + sample_mask); + } + } + /* Update the original instruction. */ inst->opcode = op; inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/ir: Allow representing additional flag subregisters in the IR.
Module: Mesa Branch: master Commit: cc0fc8b8ac608b036d260007a689eeeb8e815031 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc0fc8b8ac608b036d260007a689eeeb8e815031 Author: Francisco Jerez Date: Tue Dec 12 12:05:02 2017 -0800 intel/ir: Allow representing additional flag subregisters in the IR. This allows representing conditional mods and predicates on f1.0-f1.1 at the IR level by adding an extra bit to the flag_subreg backend_instruction field. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.cpp| 12 +++- src/intel/compiler/brw_fs_generator.cpp | 4 ++-- src/intel/compiler/brw_reg.h | 7 +++ src/intel/compiler/brw_schedule_instructions.cpp | 2 +- src/intel/compiler/brw_shader.h | 4 ++-- src/intel/compiler/brw_vec4.cpp | 7 --- src/intel/compiler/brw_vec4_generator.cpp| 2 +- 7 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0d7988dae4..16b6a06c69 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -5488,9 +5488,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fs_inst *inst = (fs_inst *)be_inst; if (inst->predicate) { - fprintf(file, "(%cf0.%d) ", - inst->predicate_inverse ? '-' : '+', - inst->flag_subreg); + fprintf(file, "(%cf%d.%d) ", + inst->predicate_inverse ? '-' : '+', + inst->flag_subreg / 2, + inst->flag_subreg % 2); } fprintf(file, "%s", brw_instruction_name(devinfo, inst->opcode)); @@ -5502,7 +5503,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL && inst->opcode != BRW_OPCODE_IF && inst->opcode != BRW_OPCODE_WHILE))) { - fprintf(file, ".f0.%d", inst->flag_subreg); + fprintf(file, ".f%d.%d", inst->flag_subreg / 2, + inst->flag_subreg % 2); } } fprintf(file, "(%d) ", inst->exec_size); @@ -5888,7 +5890,7 @@ fs_visitor::calculate_register_pressure() bool fs_visitor::opt_drop_redundant_mov_to_flags() { - bool flag_mov_found[2] = {false}; + bool flag_mov_found[4] = {false}; bool progress = false; /* Instructions removed by this pass can only be added if this were true */ diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index a5a821a13b..557b098c20 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1508,7 +1508,7 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, void fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst) { - struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg); + struct brw_reg flags = brw_flag_subreg(inst->flag_subreg); struct brw_reg dispatch_mask; if (devinfo->gen >= 6) @@ -1764,7 +1764,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_predicate_control(p, inst->predicate); brw_set_default_predicate_inverse(p, inst->predicate_inverse); - brw_set_default_flag_reg(p, 0, inst->flag_subreg); + brw_set_default_flag_reg(p, inst->flag_subreg / 2, inst->flag_subreg % 2); brw_set_default_saturate(p, inst->saturate); brw_set_default_mask_control(p, inst->force_writemask_all); brw_set_default_acc_write_control(p, inst->writes_accumulator); diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h index 17d5b97bf3..c41408104f 100644 --- a/src/intel/compiler/brw_reg.h +++ b/src/intel/compiler/brw_reg.h @@ -842,6 +842,13 @@ brw_flag_reg(int reg, int subreg) BRW_ARF_FLAG + reg, subreg); } +static inline struct brw_reg +brw_flag_subreg(unsigned subreg) +{ + return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, + BRW_ARF_FLAG + subreg / 2, subreg % 2); +} + /** * Return the mask register present in Gen4-5, or the related register present * in Gen7.5 and later hardware referred to as "channel enable" register in diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 692f712532..0e793de4dd 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -974,7 +974,7 @@ fs_instruction_scheduler::calculate_deps() */ schedule_node *last_grf_write[grf_count * 16]; schedule_node *last_mrf_write[BRW_MA
Mesa (master): intel/ir: Allow arbitrary scratch flag registers for SHADER_OPCODE_FIND_LIVE_CHANNEL.
Module: Mesa Branch: master Commit: 6edb332b44b2570abac8fea2123050ea0f84e1e6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6edb332b44b2570abac8fea2123050ea0f84e1e6 Author: Francisco Jerez Date: Thu Feb 22 12:49:01 2018 -0800 intel/ir: Allow arbitrary scratch flag registers for SHADER_OPCODE_FIND_LIVE_CHANNEL. This shouldn't cause any functional change at this point, it changes SHADER_OPCODE_FIND_LIVE_CHANNEL to use the flag register specified at the IR level instead of the hard-coded f1.0, now that it can be represented in backend_instruction::flag_subreg. This will be necessary for scheduling to behave correctly once more things start making use of f1.0. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_eu_emit.c| 5 +++-- src/intel/compiler/brw_fs.cpp | 3 ++- src/intel/compiler/brw_fs_builder.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index a96fe43556..14b1c592b6 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -3399,7 +3399,9 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, */ inst = brw_FBL(p, vec1(dst), exec_mask); } else { - const struct brw_reg flag = brw_flag_reg(1, 0); + const struct brw_reg flag = brw_flag_reg( +brw_inst_flag_reg_nr(devinfo, p->current), +brw_inst_flag_subreg_nr(devinfo, p->current)); brw_set_default_exec_size(p, BRW_EXECUTE_1); brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0)); @@ -3418,7 +3420,6 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE); brw_inst_set_group(devinfo, inst, lower_size * i + 8 * qtr_control); brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z); -brw_inst_set_flag_reg_nr(devinfo, inst, 1); brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1); } diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 16b6a06c69..c255a3b23b 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -931,7 +931,8 @@ fs_inst::flags_written() const if ((conditional_mod && (opcode != BRW_OPCODE_SEL && opcode != BRW_OPCODE_IF && opcode != BRW_OPCODE_WHILE)) || - opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) { + opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS || + opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL) { return flag_mask(this); } else { return flag_mask(dst, size_written); diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h index 874272b7af..b157e33c39 100644 --- a/src/intel/compiler/brw_fs_builder.h +++ b/src/intel/compiler/brw_fs_builder.h @@ -406,7 +406,7 @@ namespace brw { const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); const dst_reg dst = vgrf(src.type); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 2; ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); return src_reg(component(dst, 0)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/l3: Don't allocate SLM partition on ICL+.
Module: Mesa Branch: master Commit: 9ec3362e0ba293f20d08493753edeb29d13baadf URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ec3362e0ba293f20d08493753edeb29d13baadf Author: Francisco Jerez Date: Tue Dec 12 12:05:00 2017 -0800 intel/l3: Don't allocate SLM partition on ICL+. SLM has a chunk of special-purpose memory separate from L3 on ICL+, we shouldn't allocate a partition for it on L3 anymore. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/common/gen_l3_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/common/gen_l3_config.c b/src/intel/common/gen_l3_config.c index aff13c06ec..7d58ad8d7c 100644 --- a/src/intel/common/gen_l3_config.c +++ b/src/intel/common/gen_l3_config.c @@ -232,7 +232,7 @@ gen_get_default_l3_weights(const struct gen_device_info *devinfo, { struct gen_l3_weights w = {{ 0 }}; - w.w[GEN_L3P_SLM] = needs_slm; + w.w[GEN_L3P_SLM] = devinfo->gen < 11 && needs_slm; w.w[GEN_L3P_URB] = 1.0; if (devinfo->gen >= 8) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): Revert "i965/fs: Predicate byte scattered writes if needed"
Module: Mesa Branch: master Commit: 4b4838b1ae46a0ce9fed88f275cc01167302cf24 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b4838b1ae46a0ce9fed88f275cc01167302cf24 Author: Francisco Jerez Date: Sat Feb 24 16:05:21 2018 -0800 Revert "i965/fs: Predicate byte scattered writes if needed" This reverts commit a4031bdfa927fb4c3c5d0bdadc70634f3c1a5eac. It's redundant with the sample mask predication done at this point by the common logical send lowering infrastructure, and rather buggy because it wasn't applying the correct sample mask in shaders using discard, since the dispatch mask returned by FS_OPCODE_MOV_DISPATCH_TO_FLAGS doesn't reflect samples discarded by the shader, so it could have led to data corruption in fragment shader invocations that execute discard based on a non-dynamically uniform condition. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs_nir.cpp | 15 +-- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 47247875e8..554d61d71a 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -4207,25 +4207,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * to rely on byte scattered in order to write 16-bit elements. * The byte_scattered_write message needs that every written 16-bit * type to be aligned 32-bits (stride=2). - * Additionally, while on Untyped Surface messages the - * bits of the execution mask are ANDed with the corresponding - * bits of the Pixel/Sample Mask, that is not the case for byte - * scattered writes. That is needed to avoid ssbo stores writing - * on helper invocations. So when that can affect, we load the - * sample mask, and predicate the send message. */ -brw_predicate pred = BRW_PREDICATE_NONE; - -if (stage == MESA_SHADER_FRAGMENT) { - bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS); - pred = BRW_PREDICATE_NORMAL; -} - emit_byte_scattered_write(bld, surf_index, offset_reg, write_src, 1 /* dims */, 1, bit_size, - pred); + BRW_PREDICATE_NONE); } else { assert(num_components * type_size <= 16); assert((num_components * type_size) % 4 == 0); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/eu: Plumb header present bit to codegen helpers for HDC messages.
Module: Mesa Branch: master Commit: e7c9adca5726a8c96de20ae7c5f21a30061db392 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7c9adca5726a8c96de20ae7c5f21a30061db392 Author: Francisco Jerez Date: Tue Dec 12 12:05:03 2017 -0800 intel/eu: Plumb header present bit to codegen helpers for HDC messages. This makes sure that the header-present bit of the message descriptor is in sync with the IR instruction fields, which gives the optimizer more control to avoid the overhead of setting up a message header when it's possible to do so. Reviewed-by: Jordan Justen Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_eu.h | 18 -- src/intel/compiler/brw_eu_emit.c | 30 ++ src/intel/compiler/brw_fs_generator.cpp | 20 ++-- src/intel/compiler/brw_vec4_generator.cpp | 11 ++- 4 files changed, 50 insertions(+), 29 deletions(-) diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h index 2d0f56f793..a5f28d8fc6 100644 --- a/src/intel/compiler/brw_eu.h +++ b/src/intel/compiler/brw_eu.h @@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected); + bool response_expected, + bool header_present); void brw_untyped_surface_read(struct brw_codegen *p, @@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels); + unsigned num_channels, + bool header_present); void brw_typed_atomic(struct brw_codegen *p, @@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected); + bool response_expected, + bool header_present); void brw_typed_surface_read(struct brw_codegen *p, @@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels); + unsigned num_channels, + bool header_present); void brw_typed_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, -unsigned num_channels); +unsigned num_channels, +bool header_present); void brw_byte_scattered_read(struct brw_codegen *p, @@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned bit_size); + unsigned bit_size, + bool header_present); void brw_memory_fence(struct brw_codegen *p, diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index 14b1c592b6..44abede16b 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -2883,7 +2883,8 @@ brw_untyped_atomic(struct brw_codegen *p, struct brw_reg surface, unsigned atomic_op, unsigned msg_length, - bool response_expected) + bool response_expected, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ? @@ -2901,7 +2902,7 @@ brw_untyped_atomic(struct brw_codegen *p, p, sfid, brw_writemask(dst, mask), payload, surface, msg_length, brw_surface_payload_size(p, response_expected, devinfo->gen >= 8 || devinfo->is_haswell, true), - align1); + header_present); brw_set_dp_untyped_atomic_message( p, insn, atomic_op, response_expected); @@ -2984,7 +2985,8 @@ brw_untyped_surface_write(struct brw_codegen *p, struct brw_reg payload, struct brw_reg surface, unsigned msg_length, - unsigned num_channels) + unsigned num_channels, + bool header_present) { const struct gen_device_info *devinfo = p->devinfo; const unsigned sfid = (devinfo->gen >= 8 || devin
Mesa (master): intel/ir: Fix invalid type aliasing with undefined behavior in test_eu_compact.
Module: Mesa Branch: master Commit: cb309d27c52e9a6dbddb82a0f6eb75a6f263 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb309d27c52e9a6dbddb82a0f6eb75a6f263 Author: Francisco Jerez Date: Fri Jan 26 11:48:02 2018 -0800 intel/ir: Fix invalid type aliasing with undefined behavior in test_eu_compact. test_fuzz_compact_instruction() was attempting to modify the uint64_t data array of a brw_inst through a pointer to uint32_t, which has undefined behavior. This was causing the test_eu_compact unit test to fail mysteriously for me on GCC 7 with some additional harmless-looking changes I had applied to my tree, which happened to affect the order instructions are emitted by GCC causing the bit twiddling to be done after the clear_pad_bits() call which is supposed to overwrite the same data through a pointer of different type, leading to data corruption. A similar failure has been reported by Vinson Lee on the master branch built with GCC 8. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105052 Tested-by: Vinson Lee Reviewed-by: Matt Turner --- src/intel/compiler/test_eu_compact.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/test_eu_compact.cpp b/src/intel/compiler/test_eu_compact.cpp index 1532e3b984..f6924abd36 100644 --- a/src/intel/compiler/test_eu_compact.cpp +++ b/src/intel/compiler/test_eu_compact.cpp @@ -149,13 +149,13 @@ test_fuzz_compact_instruction(struct brw_codegen *p, brw_inst src) for (int bit1 = 0; bit1 < 128; bit1++) { brw_inst instr = src; -uint32_t *bits = (uint32_t *)&instr; +uint64_t *bits = instr.data; if (skip_bit(p->devinfo, &src, bit1)) continue; -bits[bit0 / 32] ^= (1 << (bit0 & 31)); -bits[bit1 / 32] ^= (1 << (bit1 & 31)); +bits[bit0 / 64] ^= (1ull << (bit0 & 63)); +bits[bit1 / 64] ^= (1ull << (bit1 & 63)); clear_pad_bits(p->devinfo, &instr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): util/bitset: Make C++ wrapper trivially constructible.
Module: Mesa Branch: master Commit: 69b4a9d21d00e1f72b52e818cc059ee1642f263e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=69b4a9d21d00e1f72b52e818cc059ee1642f263e Author: Francisco Jerez Date: Sat Feb 24 18:37:34 2018 -0800 util/bitset: Make C++ wrapper trivially constructible. In order to fix a build failure on compilers not implementing unrestricted unions, which is a C++11 feature. v2: Provide signed integer comparison and assignment operators instead of BITSET_WORD ones to avoid spurious ambiguity warnings on comparisons with a signed integer literal. Fixes: ba79a90fb52e1e81fb "glsl: Switch ast_type_qualifier to a 128-bit bitset." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105238 Tested-by: Roland Scheidegger Tested-By: George Kyriazis Reviewed-by: Roland Scheidegger --- src/compiler/glsl/ast.h | 2 -- src/compiler/glsl/glsl_parser.yy | 1 - src/util/bitset.h| 37 - 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h index e5e4b572ff..a1ec0d566f 100644 --- a/src/compiler/glsl/ast.h +++ b/src/compiler/glsl/ast.h @@ -477,8 +477,6 @@ struct ast_type_qualifier { DECLARE_BITSET_T(bitset_t, 128); union flags { - flags() : i(0) {} - struct { unsigned invariant:1; unsigned precise:1; diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy index f1986ed0a8..e5ea41d4df 100644 --- a/src/compiler/glsl/glsl_parser.yy +++ b/src/compiler/glsl/glsl_parser.yy @@ -96,7 +96,6 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %parse-param {struct _mesa_glsl_parse_state *state} %union { - YYSTYPE() {} int n; int64_t n64; float real; diff --git a/src/util/bitset.h b/src/util/bitset.h index 7bb5f3c83c..b4c2152023 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -142,23 +142,6 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp, * it as, and N is the number of bits in the bitset. */ #define DECLARE_BITSET_T(T, N) struct T { \ - /* XXX - Replace this with an implicitly-defined \ - * constructor when support for C++11 defaulted \ - * constructors can be assumed (available on GCC 4.4 and \ - * later) in order to make the object trivially \ - * constructible like a fundamental integer type for \ - * convenience. \ - */ \ - T() \ - { \ - } \ -\ - T(BITSET_WORD x) \ - { \ - for (unsigned i = 0; i < BITSET_WORDS(N); i++, x = 0) \ -words[i] = x; \ - } \ -\ EXPLICIT_CONVERSION \ operator bool() const \ { \ @@ -168,6 +151,13 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp, return false; \ } \ \ + T & \ + operator=(int x) \ + { \ + const T c = {{ (BITSET_WORD)x }}; \ + return *this = c; \ + } \ +\ friend bool \ operator==(const T &b, const T &c)\ { \ @@ -180,6 +170,19 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp, return !(b == c); \ } \ \ + friend bool \ + operator==(const T &b, int x) \ + { \ + const T c = {{ (BITSET_WORD)x }};
Mesa (master): util/bitset: Add C++ wrapper for static-size bitsets.
Module: Mesa Branch: master Commit: bdbc2ffa4219b39e47a27decbc603d445286d92d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bdbc2ffa4219b39e47a27decbc603d445286d92d Author: Francisco Jerez Date: Mon Feb 12 14:09:24 2018 -0800 util/bitset: Add C++ wrapper for static-size bitsets. Reviewed-by: Plamena Manolova --- src/util/bitset.h | 114 ++ 1 file changed, 114 insertions(+) diff --git a/src/util/bitset.h b/src/util/bitset.h index 2404ce7f63..7bb5f3c83c 100644 --- a/src/util/bitset.h +++ b/src/util/bitset.h @@ -132,4 +132,118 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp, for (__tmp = *(__set), __i = 0; \ (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;) +#ifdef __cplusplus + +/** + * Simple C++ wrapper of a bitset type of static size, with value semantics + * and basic bitwise arithmetic operators. The operators defined below are + * expected to have the same semantics as the same operator applied to other + * fundamental integer types. T is the name of the struct to instantiate + * it as, and N is the number of bits in the bitset. + */ +#define DECLARE_BITSET_T(T, N) struct T { \ + /* XXX - Replace this with an implicitly-defined \ + * constructor when support for C++11 defaulted \ + * constructors can be assumed (available on GCC 4.4 and \ + * later) in order to make the object trivially \ + * constructible like a fundamental integer type for \ + * convenience. \ + */ \ + T() \ + { \ + } \ +\ + T(BITSET_WORD x) \ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++, x = 0) \ +words[i] = x; \ + } \ +\ + EXPLICIT_CONVERSION \ + operator bool() const \ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ +if (words[i]) \ + return true; \ + return false; \ + } \ +\ + friend bool \ + operator==(const T &b, const T &c)\ + { \ + return BITSET_EQUAL(b.words, c.words); \ + } \ +\ + friend bool \ + operator!=(const T &b, const T &c)\ + { \ + return !(b == c); \ + } \ +\ + friend T \ + operator~(const T &b) \ + { \ + T c; \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ +c.words[i] = ~b.words[i]; \ + return c; \ + } \ +\ + T & \ + operator|=(const T &b)\ + { \ + for (unsigned i = 0; i < BITSET_WORDS(N); i++) \ +words[i] |= b.words[i]; \ + return *this; \ + } \ +\ + friend T
Mesa (master): glsl: Switch ast_type_qualifier to a 128-bit bitset.
Module: Mesa Branch: master Commit: ba79a90fb52e1e81fbfb38113e85a56b13497c50 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba79a90fb52e1e81fbfb38113e85a56b13497c50 Author: Francisco Jerez Date: Mon Feb 12 14:18:15 2018 -0800 glsl: Switch ast_type_qualifier to a 128-bit bitset. This should end the drought of bits in the ast_type_qualifier object. The bitset_t type works pretty much as a drop-in replacement for the current uint64_t bitset. The only catch is that the bitset_t type as defined in the previous commit doesn't have a trivial constructor (because it has a user-defined constructor), so it cannot be used as union member without providing a user-defined constructor for the union (which causes it in turn to be non-trivially constructible). This annoyance could be easily addressed in C++11 by declaring the default constructor of bitset_t to be the implicitly defined one -- IMO one more reason to drop support for GCC 4.2-4.3. The other minor change was required because glsl_parser_extras.cpp was hard-coding the type of bitset temporaries as uint64_t, which (unlike would have been the case if the uint64_t had been replaced with e.g. an __int128) would otherwise have caused a build failure, because the boolean conversion operator of bitset_t is marked explicit (if C++11 is available), so the bitset won't be silently truncated down to 1 bit in order to use it to initialize the uint64_t temporaries (yikes). Reviewed-by: Plamena Manolova --- src/compiler/glsl/ast.h | 8 ++-- src/compiler/glsl/glsl_parser.yy | 1 + src/compiler/glsl/glsl_parser_extras.cpp | 4 ++-- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h index eee2248281..2a38a4b1f7 100644 --- a/src/compiler/glsl/ast.h +++ b/src/compiler/glsl/ast.h @@ -28,6 +28,7 @@ #include "list.h" #include "glsl_parser_extras.h" #include "compiler/glsl_types.h" +#include "util/bitset.h" struct _mesa_glsl_parse_state; @@ -473,8 +474,11 @@ enum { struct ast_type_qualifier { DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier); + DECLARE_BITSET_T(bitset_t, 128); + + union flags { + flags() : i(0) {} - union { struct { unsigned invariant:1; unsigned precise:1; @@ -636,7 +640,7 @@ struct ast_type_qualifier { q; /** \brief Set of flags, accessed as a bitmask. */ - uint64_t i; + bitset_t i; } flags; /** Precision of the type (highp/medium/lowp). */ diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy index 19147c7a3e..4faf9602a0 100644 --- a/src/compiler/glsl/glsl_parser.yy +++ b/src/compiler/glsl/glsl_parser.yy @@ -96,6 +96,7 @@ static bool match_layout_qualifier(const char *s1, const char *s2, %parse-param {struct _mesa_glsl_parse_state *state} %union { + YYSTYPE() {} int n; int64_t n64; float real; diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 81d74e92ce..106417c5c3 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -1011,7 +1011,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp, "an instance name are not allowed"); } - uint64_t interface_type_mask; + ast_type_qualifier::bitset_t interface_type_mask; struct ast_type_qualifier temp_type_qualifier; /* Get a bitmask containing only the in/out/uniform/buffer @@ -1030,7 +1030,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp, * production rule guarantees that only one bit will be set (and * it will be in/out/uniform). */ - uint64_t block_interface_qualifier = q.flags.i; + ast_type_qualifier::bitset_t block_interface_qualifier = q.flags.i; block->default_layout.flags.i |= block_interface_qualifier; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Expose EXT_shader_framebuffer_fetch(_non_coherent) on desktop and embedded GL.
Module: Mesa Branch: master Commit: 51562ea7a0678b8067f438f17a3d5fbe5280a997 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=51562ea7a0678b8067f438f17a3d5fbe5280a997 Author: Francisco Jerez Date: Fri Feb 23 18:35:59 2018 -0800 mesa: Expose EXT_shader_framebuffer_fetch(_non_coherent) on desktop and embedded GL. Reviewed-by: Plamena Manolova --- docs/relnotes/18.1.0.html| 2 ++ src/mesa/main/extensions_table.h | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html index 8dd2550ced..1d5201717f 100644 --- a/docs/relnotes/18.1.0.html +++ b/docs/relnotes/18.1.0.html @@ -48,6 +48,8 @@ Note: some of the new features are only available with certain drivers. GL_ARB_bindless_texture on nvc0/maxwell+ GL_EXT_semaphore on radeonsi GL_EXT_semaphore_fd on radeonsi +GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already supported) +GL_EXT_shader_framebuffer_fetch_non_coherent on i965 Disk shader cache support for i965 enabled by default diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h index 6be16c4407..492f7c3d20 100644 --- a/src/mesa/main/extensions_table.h +++ b/src/mesa/main/extensions_table.h @@ -252,7 +252,8 @@ EXT(EXT_semaphore , EXT_semaphore EXT(EXT_semaphore_fd, EXT_semaphore_fd , GLL, GLC, x , ES2, 2017) EXT(EXT_separate_shader_objects , dummy_true , x , x , x , ES2, 2013) EXT(EXT_separate_specular_color , dummy_true , GLL, x , x , x , 1997) -EXT(EXT_shader_framebuffer_fetch, EXT_shader_framebuffer_fetch , x , x , x , ES2, 2013) +EXT(EXT_shader_framebuffer_fetch, EXT_shader_framebuffer_fetch , GLL, GLC, x , ES2, 2013) +EXT(EXT_shader_framebuffer_fetch_non_coherent, EXT_shader_framebuffer_fetch_non_coherent, GLL, GLC, x, ES2, 2018) EXT(EXT_shader_integer_mix , EXT_shader_integer_mix , GLL, GLC, x , 30, 2013) EXT(EXT_shader_io_blocks, dummy_true , x , x , x , 31, 2014) EXT(EXT_shader_samples_identical, EXT_shader_samples_identical , GLL, GLC, x , 31, 2015) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Replace MESA_shader_framebuffer_fetch extension flags with EXT ones.
Module: Mesa Branch: master Commit: 6ebefb0fd5065bde02611172928a7cdeb9d32726 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6ebefb0fd5065bde02611172928a7cdeb9d32726 Author: Francisco Jerez Date: Mon Feb 12 14:54:27 2018 -0800 glsl: Replace MESA_shader_framebuffer_fetch extension flags with EXT ones. Reviewed-by: Plamena Manolova --- src/compiler/glsl/glsl_parser_extras.cpp | 1 + src/compiler/glsl/glsl_parser_extras.h | 9 +++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 106417c5c3..275c4d7571 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -707,6 +707,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT_AEP(EXT_primitive_bounding_box), EXT(EXT_separate_shader_objects), EXT(EXT_shader_framebuffer_fetch), + EXT(EXT_shader_framebuffer_fetch_non_coherent), EXT(EXT_shader_integer_mix), EXT_AEP(EXT_shader_io_blocks), EXT(EXT_shader_samples_identical), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index f88cb78347..66bd1a3db6 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -317,8 +317,7 @@ struct _mesa_glsl_parse_state { bool has_framebuffer_fetch() const { return EXT_shader_framebuffer_fetch_enable || - MESA_shader_framebuffer_fetch_enable || - MESA_shader_framebuffer_fetch_non_coherent_enable; + EXT_shader_framebuffer_fetch_non_coherent_enable; } bool has_texture_cube_map_array() const @@ -782,6 +781,8 @@ struct _mesa_glsl_parse_state { bool EXT_separate_shader_objects_warn; bool EXT_shader_framebuffer_fetch_enable; bool EXT_shader_framebuffer_fetch_warn; + bool EXT_shader_framebuffer_fetch_non_coherent_enable; + bool EXT_shader_framebuffer_fetch_non_coherent_warn; bool EXT_shader_integer_mix_enable; bool EXT_shader_integer_mix_warn; bool EXT_shader_io_blocks_enable; @@ -800,10 +801,6 @@ struct _mesa_glsl_parse_state { bool EXT_texture_cube_map_array_warn; bool INTEL_conservative_rasterization_enable; bool INTEL_conservative_rasterization_warn; - bool MESA_shader_framebuffer_fetch_enable; - bool MESA_shader_framebuffer_fetch_warn; - bool MESA_shader_framebuffer_fetch_non_coherent_enable; - bool MESA_shader_framebuffer_fetch_non_coherent_warn; bool MESA_shader_integer_functions_enable; bool MESA_shader_integer_functions_warn; bool NV_image_formats_enable; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Specify framebuffer fetch coherency mode in lower_blend_equation_advanced().
Module: Mesa Branch: master Commit: 537bb1da98c34eafbed714d468c56fc0af543e49 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=537bb1da98c34eafbed714d468c56fc0af543e49 Author: Francisco Jerez Date: Wed Feb 14 11:53:49 2018 -0800 glsl: Specify framebuffer fetch coherency mode in lower_blend_equation_advanced(). This requires passing an extra argument to the lowering pass because the KHR_blend_equation_advanced specification doesn't seem to define any mechanism for the implementation to determine at compile-time whether coherent blending can ever be used (not even an "#extension KHR_blend_equation_advanced_coherent" directive seems to be required in the shader source AFAICT). In the long run we'll probably want to do state-dependent recompiles based on the value of ctx->Color.BlendCoherent, but right now there would be no benefit from that because the only driver that supports coherent framebuffer fetch is i965 on SKL+ hardware, which are unable to support the non-coherent path for the moment because of texture layout issues, so framebuffer fetch coherency is always enabled for them. Reviewed-by: Plamena Manolova --- src/compiler/glsl/ir_optimization.h | 2 +- src/compiler/glsl/lower_blend_equation_advanced.cpp | 3 ++- src/mesa/drivers/dri/i965/brw_link.cpp | 3 ++- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 3 ++- 4 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h index 2b8c195151..81049a479e 100644 --- a/src/compiler/glsl/ir_optimization.h +++ b/src/compiler/glsl/ir_optimization.h @@ -166,7 +166,7 @@ bool lower_tess_level(gl_linked_shader *shader); bool lower_vertex_id(gl_linked_shader *shader); bool lower_cs_derived(gl_linked_shader *shader); -bool lower_blend_equation_advanced(gl_linked_shader *shader); +bool lower_blend_equation_advanced(gl_linked_shader *shader, bool coherent); bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); void propagate_invariance(exec_list *instructions); diff --git a/src/compiler/glsl/lower_blend_equation_advanced.cpp b/src/compiler/glsl/lower_blend_equation_advanced.cpp index c6db58142c..b05a2e0f0b 100644 --- a/src/compiler/glsl/lower_blend_equation_advanced.cpp +++ b/src/compiler/glsl/lower_blend_equation_advanced.cpp @@ -462,7 +462,7 @@ get_main(gl_linked_shader *sh) } bool -lower_blend_equation_advanced(struct gl_linked_shader *sh) +lower_blend_equation_advanced(struct gl_linked_shader *sh, bool coherent) { if (sh->Program->sh.fs.BlendSupport == 0) return false; @@ -480,6 +480,7 @@ lower_blend_equation_advanced(struct gl_linked_shader *sh) fb->data.location = FRAG_RESULT_DATA0; fb->data.read_only = 1; fb->data.fb_fetch_output = 1; + fb->data.memory_coherent = coherent; fb->data.how_declared = ir_var_hidden; ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type, diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index f0598f591a..b08b56a935 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -99,7 +99,8 @@ process_glsl_ir(struct brw_context *brw, ralloc_adopt(mem_ctx, shader->ir); - lower_blend_equation_advanced(shader); + lower_blend_equation_advanced( + shader, ctx->Extensions.KHR_blend_equation_advanced_coherent); /* lower_packing_builtins() inserts arithmetic instructions, so it * must precede lower_instructions(). diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index 7fef93949e..ccf4dabcc9 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -7056,7 +7056,8 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) do_mat_op_to_vec(ir); if (stage == MESA_SHADER_FRAGMENT) - lower_blend_equation_advanced(shader); + lower_blend_equation_advanced( +shader, ctx->Extensions.KHR_blend_equation_advanced_coherent); lower_instructions(ir, MOD_TO_FLOOR | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glapi: Update XML for last revision of EXT_shader_framebuffer_fetch.
Module: Mesa Branch: master Commit: e4124f9bc119ae22e34daea6f44bd3ddec454ec7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4124f9bc119ae22e34daea6f44bd3ddec454ec7 Author: Francisco Jerez Date: Mon Feb 12 14:46:39 2018 -0800 glapi: Update XML for last revision of EXT_shader_framebuffer_fetch. Desktop GL is now supported, and there is an additional entry-point for EXT_shader_framebuffer_fetch_non_coherent. Reviewed-by: Plamena Manolova --- src/mapi/glapi/gen/es_EXT.xml | 5 - src/mapi/glapi/gen/gl_API.xml | 6 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml index e5104259b6..a53fcd1e8a 100644 --- a/src/mapi/glapi/gen/es_EXT.xml +++ b/src/mapi/glapi/gen/es_EXT.xml @@ -842,11 +842,6 @@ - - - - - diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index d13a3bfd83..38c1921047 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -12886,6 +12886,12 @@ http://www.w3.org/2001/XInclude"/> + + + + + + ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Rename MESA_shader_framebuffer_fetch gl_extensions bits to EXT.
Module: Mesa Branch: master Commit: 6a8ec78c2ab12d75f16e4a2f95e9be014dae021e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a8ec78c2ab12d75f16e4a2f95e9be014dae021e Author: Francisco Jerez Date: Mon Feb 12 14:31:32 2018 -0800 mesa: Rename MESA_shader_framebuffer_fetch gl_extensions bits to EXT. The changes I had originally planned for the MESA_shader_framebuffer_fetch extension have been merged into the EXT spec, there's no point in keeping MESA_shader_framebuffer_fetch extension enables. Reviewed-by: Plamena Manolova --- src/mesa/drivers/dri/i965/brw_draw.c | 2 +- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/brw_wm.c | 4 ++-- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +- src/mesa/drivers/dri/i965/intel_extensions.c | 4 ++-- src/mesa/main/barrier.c | 2 +- src/mesa/main/extensions_table.h | 2 +- src/mesa/main/get.c | 2 +- src/mesa/main/get_hash_params.py | 7 +++ src/mesa/main/mtypes.h | 4 ++-- 10 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c index 50cf8b12c7..299e7f929e 100644 --- a/src/mesa/drivers/dri/i965/brw_draw.c +++ b/src/mesa/drivers/dri/i965/brw_draw.c @@ -513,7 +513,7 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw, } /* Resolve color buffers for non-coherent framebuffer fetch. */ - if (!ctx->Extensions.MESA_shader_framebuffer_fetch && + if (!ctx->Extensions.EXT_shader_framebuffer_fetch && ctx->FragmentProgram._Current && ctx->FragmentProgram._Current->info.outputs_read) { const struct gl_framebuffer *fb = ctx->DrawBuffer; diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 684890e8ba..527f003977 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -318,7 +318,7 @@ brw_framebuffer_fetch_barrier(struct gl_context *ctx) struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; - if (!ctx->Extensions.MESA_shader_framebuffer_fetch) { + if (!ctx->Extensions.EXT_shader_framebuffer_fetch) { if (devinfo->gen >= 6) { brw_emit_pipe_control_flush(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH | diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index cfc2d47a67..68d4ab88d7 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -573,7 +573,7 @@ brw_wm_populate_key(struct brw_context *brw, struct brw_wm_prog_key *key) key->program_string_id = fp->id; /* Whether reads from the framebuffer should behave coherently. */ - key->coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch; + key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch; } void @@ -645,7 +645,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_program *prog) key.program_string_id = bfp->id; /* Whether reads from the framebuffer should behave coherently. */ - key.coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch; + key.coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch; uint32_t old_prog_offset = brw->wm.base.prog_offset; struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data; diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 32d9e2c70f..23bf5a266c 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -1032,7 +1032,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw) brw_wm_prog_data(brw->wm.base.prog_data); if (wm_prog_data->has_render_target_reads && - !ctx->Extensions.MESA_shader_framebuffer_fetch) { + !ctx->Extensions.EXT_shader_framebuffer_fetch) { /* _NEW_BUFFERS */ const struct gl_framebuffer *fb = ctx->DrawBuffer; diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 5a6b12e52a..127371c5b8 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -153,7 +153,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 130; if (devinfo->is_g4x || devinfo->gen >= 5) { - ctx->Extensions.MESA_shader_framebuffer_fetch_non_coherent = true; + ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true; ctx->Extensions.KHR_blend_equation_advanced =
Mesa (master): glsl: Add support for the framebuffer fetch layout(noncoherent) qualifier.
Module: Mesa Branch: master Commit: ef9e3f63ca369e3549b4f17b39934dc4b3cbbb05 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef9e3f63ca369e3549b4f17b39934dc4b3cbbb05 Author: Francisco Jerez Date: Mon Feb 12 15:54:33 2018 -0800 glsl: Add support for the framebuffer fetch layout(noncoherent) qualifier. This allows the application to request framebuffer fetch coherency with per-fragment output granularity. Coherent framebuffer fetch outputs (which is the default if no qualifier is present for compatibility with older versions of the EXT_shader_framebuffer_fetch extension) will have ir_variable_data::memory_coherent set to true. Reviewed-by: Plamena Manolova --- src/compiler/glsl/ast.h | 5 src/compiler/glsl/ast_to_hir.cpp| 45 + src/compiler/glsl/ast_type.cpp | 6 +++-- src/compiler/glsl/builtin_variables.cpp | 1 + src/compiler/glsl/glsl_parser.yy| 6 + 5 files changed, 61 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h index 2a38a4b1f7..e5e4b572ff 100644 --- a/src/compiler/glsl/ast.h +++ b/src/compiler/glsl/ast.h @@ -635,6 +635,11 @@ struct ast_type_qualifier { unsigned bound_sampler:1; unsigned bound_image:1; /** \} */ + + /** \name Layout qualifiers for GL_EXT_shader_framebuffer_fetch_non_coherent */ + /** \{ */ + unsigned non_coherent:1; + /** \} */ } /** \brief Set of flags, accessed by name. */ q; diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 966450ca78..5acbaa321a 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -2008,6 +2008,20 @@ ast_expression::do_hir(exec_list *instructions, _mesa_glsl_warning(&loc, state, "`%s' used uninitialized", this->primary_expression.identifier); } + + /* From the EXT_shader_framebuffer_fetch spec: + * + * "Unless the GL_EXT_shader_framebuffer_fetch extension has been + *enabled in addition, it's an error to use gl_LastFragData if it + *hasn't been explicitly redeclared with layout(noncoherent)." + */ + if (var->data.fb_fetch_output && var->data.memory_coherent && + !state->EXT_shader_framebuffer_fetch_enable) { +_mesa_glsl_error(&loc, state, + "invalid use of framebuffer fetch output not " + "qualified with layout(noncoherent)"); + } + } else { _mesa_glsl_error(& loc, state, "`%s' undeclared", this->primary_expression.identifier); @@ -4002,6 +4016,33 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, var->data.fb_fetch_output = (strcmp(var->name, "gl_LastFragData") == 0); } + if (var->data.fb_fetch_output) { + var->data.memory_coherent = !qual->flags.q.non_coherent; + + /* From the EXT_shader_framebuffer_fetch spec: + * + * "It is an error to declare an inout fragment output not qualified + *with layout(noncoherent) if the GL_EXT_shader_framebuffer_fetch + *extension hasn't been enabled." + */ + if (var->data.memory_coherent && + !state->EXT_shader_framebuffer_fetch_enable) + _mesa_glsl_error(loc, state, + "invalid declaration of framebuffer fetch output not " + "qualified with layout(noncoherent)"); + + } else { + /* From the EXT_shader_framebuffer_fetch spec: + * + * "Fragment outputs declared inout may specify the following layout + *qualifier: [...] noncoherent" + */ + if (qual->flags.q.non_coherent) + _mesa_glsl_error(loc, state, + "invalid layout(noncoherent) qualifier not part of " + "framebuffer fetch output declaration"); + } + if (!is_parameter && is_varying_var(var, state->stage)) { /* User-defined ins/outs are not permitted in compute shaders. */ if (state->stage == MESA_SHADER_COMPUTE) { @@ -4268,8 +4309,12 @@ get_variable_being_redeclared(ir_variable **var_ptr, YYLTYPE loc, * "By default, gl_LastFragData is declared with the mediump precision *qualifier. This can be changed by redeclaring the corresponding *variables with the desired precision qualifier." + * + * "Fragment shaders may specify the following layout qualifier only for + *redeclaring the built-in gl_LastFragData array
Mesa (master): glsl: Silence warnings when reading from a framebuffer fetch output.
Module: Mesa Branch: master Commit: c6c64d4d6a134231cbdbe09e3c6c87adb811ac7d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6c64d4d6a134231cbdbe09e3c6c87adb811ac7d Author: Francisco Jerez Date: Mon Feb 12 15:55:13 2018 -0800 glsl: Silence warnings when reading from a framebuffer fetch output. Framebuffer fetch outputs are implicitly initialized upon entry to the fragment shader. Reviewed-by: Plamena Manolova --- src/compiler/glsl/ast_to_hir.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 5acbaa321a..badfbe6816 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -4017,6 +4017,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, } if (var->data.fb_fetch_output) { + var->data.assigned = true; var->data.memory_coherent = !qual->flags.q.non_coherent; /* From the EXT_shader_framebuffer_fetch spec: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): util: Add EXPLICIT_CONVERSION macro.
Module: Mesa Branch: master Commit: 8d1f1ce4124c1e0dbfc5f3d0578fbee6e24140c8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8d1f1ce4124c1e0dbfc5f3d0578fbee6e24140c8 Author: Francisco Jerez Date: Mon Feb 12 16:32:20 2018 -0800 util: Add EXPLICIT_CONVERSION macro. This can be used to specify that a C++ conversion operator is not meant to be used for implicit conversions, which can lead to unintended loss of information in some cases. Implemented as a macro in order to keep old GCC versions happy. Reviewed-by: Plamena Manolova --- src/util/macros.h | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/util/macros.h b/src/util/macros.h index e3c785af50..6d3df90408 100644 --- a/src/util/macros.h +++ b/src/util/macros.h @@ -285,4 +285,14 @@ do { \ #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C)) #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C)) +/** + * Macro for declaring an explicit conversion operator. Defaults to an + * implicit conversion if C++11 is not supported. + */ +#if __cplusplus >= 201103L +#define EXPLICIT_CONVERSION explicit +#elif defined(__cplusplus) +#define EXPLICIT_CONVERSION +#endif + #endif /* UTIL_MACROS_H */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Fix KHR_blend_equation_advanced with some render targets.
Module: Mesa Branch: master Commit: 27c829da28ab3cfac0195d02ffb13afa8fe0e23d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=27c829da28ab3cfac0195d02ffb13afa8fe0e23d Author: Francisco Jerez Date: Tue Feb 13 14:16:03 2018 -0800 i965: Fix KHR_blend_equation_advanced with some render targets. This reverts two bogus and seemingly useless changes from the commits referenced below, which broke KHR_blend_equation_advanced (and EXT_shader_framebuffer_fetch_non_coherent which wasn't exposed yet) for any kind of render target surface that would cause the get_isl_surf() call in brw_emit_surface_state() to do anything useful (notice how the result of get_isl_surf() is completely ignored by the caller right now), as was the case while using those extensions with 1D array or 3D framebuffers in particular. Fixes: f5859b45b1686e8116380d87 "i965/miptree: Switch remaining surfaces to isl" Fixes: bf24c3539e4b6989512968ca "i965/miptree: Clean-up unused" Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Plamena Manolova --- src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index 0b6016427b..32d9e2c70f 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -89,6 +89,8 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt, const enum isl_dim_layout dim_layout = get_isl_dim_layout(devinfo, mt->surf.tiling, target); + surf->dim = get_isl_surf_dim(target); + if (surf->dim_layout == dim_layout) return; @@ -184,7 +186,7 @@ brw_emit_surface_state(struct brw_context *brw, brw->isl_dev.ss.align, surf_offset); - isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view, + isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view, .address = brw_state_reloc(&brw->batch, *surf_offset + brw->isl_dev.ss.addr_offset, mt->bo, offset, reloc_flags), ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Rename dd_function_table::BlendBarrier to match latest EXT spec.
Module: Mesa Branch: master Commit: d0bef79f12aca8d3db323cc49881100be16905fb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d0bef79f12aca8d3db323cc49881100be16905fb Author: Francisco Jerez Date: Mon Feb 12 14:23:25 2018 -0800 mesa: Rename dd_function_table::BlendBarrier to match latest EXT spec. This GL entry point was renamed to glFramebufferFetchBarrier() in the EXT extension on request from Khronos members. Update the Mesa codebase to match the latest spec. Reviewed-by: Plamena Manolova --- src/mesa/drivers/dri/i965/brw_program.c | 4 ++-- src/mesa/main/barrier.c | 2 +- src/mesa/main/dd.h| 6 +++--- src/mesa/state_tracker/st_cb_texturebarrier.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index a513499516..684890e8ba 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -313,7 +313,7 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield barriers) } static void -brw_blend_barrier(struct gl_context *ctx) +brw_framebuffer_fetch_barrier(struct gl_context *ctx) { struct brw_context *brw = brw_context(ctx); const struct gen_device_info *devinfo = &brw->screen->devinfo; @@ -443,7 +443,7 @@ void brwInitFragProgFuncs( struct dd_function_table *functions ) functions->LinkShader = brw_link_shader; functions->MemoryBarrier = brw_memory_barrier; - functions->BlendBarrier = brw_blend_barrier; + functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier; } struct shader_times { diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c index 5284f28dc0..2c8194e6eb 100644 --- a/src/mesa/main/barrier.c +++ b/src/mesa/main/barrier.c @@ -134,5 +134,5 @@ _mesa_BlendBarrier(void) return; } - ctx->Driver.BlendBarrier(ctx); + ctx->Driver.FramebufferFetchBarrier(ctx); } diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h index 7a39f939c9..3e6a0418a2 100644 --- a/src/mesa/main/dd.h +++ b/src/mesa/main/dd.h @@ -963,15 +963,15 @@ struct dd_function_table { /** @} */ /** -* GL_MESA_shader_framebuffer_fetch_non_coherent rendering barrier. +* GL_EXT_shader_framebuffer_fetch_non_coherent rendering barrier. * * On return from this function any framebuffer contents written by * previous draw commands are guaranteed to be visible from subsequent * fragment shader invocations using the -* MESA_shader_framebuffer_fetch_non_coherent interface. +* EXT_shader_framebuffer_fetch_non_coherent interface. */ /** @{ */ - void (*BlendBarrier)(struct gl_context *ctx); + void (*FramebufferFetchBarrier)(struct gl_context *ctx); /** @} */ /** diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c b/src/mesa/state_tracker/st_cb_texturebarrier.c index 29cd37c16c..2bff03b484 100644 --- a/src/mesa/state_tracker/st_cb_texturebarrier.c +++ b/src/mesa/state_tracker/st_cb_texturebarrier.c @@ -55,10 +55,10 @@ st_TextureBarrier(struct gl_context *ctx) /** - * Called via ctx->Driver.BlendBarrier() + * Called via ctx->Driver.FramebufferFetchBarrier() */ static void -st_BlendBarrier(struct gl_context *ctx) +st_FramebufferFetchBarrier(struct gl_context *ctx) { struct pipe_context *pipe = st_context(ctx)->pipe; @@ -130,6 +130,6 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield barriers) void st_init_texture_barrier_functions(struct dd_function_table *functions) { functions->TextureBarrier = st_TextureBarrier; - functions->BlendBarrier = st_BlendBarrier; + functions->FramebufferFetchBarrier = st_FramebufferFetchBarrier; functions->MemoryBarrier = st_MemoryBarrier; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Initialize ir_variable_data::fb_fetch_output earlier for GL(ES) 2.
Module: Mesa Branch: master Commit: 1bc01db95fb5162f01a2c4a9b2473dd7a5eddcd8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1bc01db95fb5162f01a2c4a9b2473dd7a5eddcd8 Author: Francisco Jerez Date: Mon Feb 12 15:24:39 2018 -0800 glsl: Initialize ir_variable_data::fb_fetch_output earlier for GL(ES) 2. At the same point where it is initialized on GL(ES) 3.0+ so we can implement some common layout qualifier handling in a future commit. Until now the fb_fetch_output flag would be inherited from the original implicit gl_LastFragData declaration at a later point in the AST to GLSL IR translation. Reviewed-by: Plamena Manolova --- src/compiler/glsl/ast_to_hir.cpp | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index 41e74815f3..966450ca78 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -3994,8 +3994,13 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, else if (qual->flags.q.shared_storage) var->data.mode = ir_var_shader_shared; - var->data.fb_fetch_output = state->stage == MESA_SHADER_FRAGMENT && - qual->flags.q.in && qual->flags.q.out; + if (!is_parameter && state->has_framebuffer_fetch() && + state->stage == MESA_SHADER_FRAGMENT) { + if (state->is_version(130, 300)) + var->data.fb_fetch_output = qual->flags.q.in && qual->flags.q.out; + else + var->data.fb_fetch_output = (strcmp(var->name, "gl_LastFragData") == 0); + } if (!is_parameter && is_varying_var(var, state->stage)) { /* User-defined ins/outs are not permitted in compute shaders. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Allow layout token for EXT_shader_framebuffer_fetch_non_coherent.
Module: Mesa Branch: master Commit: 0aeec504b484cb37b856dd574974d739f35e968b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0aeec504b484cb37b856dd574974d739f35e968b Author: Francisco Jerez Date: Mon Feb 12 15:26:45 2018 -0800 glsl: Allow layout token for EXT_shader_framebuffer_fetch_non_coherent. EXT_shader_framebuffer_fetch_non_coherent requires layout qualifiers even on GL(ES) 2. Reviewed-by: Plamena Manolova --- src/compiler/glsl/glsl_lexer.ll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll index ed7a80a2bb..b7cf10018d 100644 --- a/src/compiler/glsl/glsl_lexer.ll +++ b/src/compiler/glsl/glsl_lexer.ll @@ -502,7 +502,8 @@ layout { || yyextra->ARB_fragment_coord_conventions_enable || yyextra->ARB_shading_language_420pack_enable || yyextra->ARB_compute_shader_enable - || yyextra->ARB_tessellation_shader_enable) { + || yyextra->ARB_tessellation_shader_enable + || yyextra->EXT_shader_framebuffer_fetch_non_coherent_enable) { return LAYOUT_TOK; } else { return classify_identifier(yyextra, yytext, yyleng, yylval); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Implement glFramebufferFetchBarrierEXT entry point.
Module: Mesa Branch: master Commit: 378e918e2891b2712b64c4ad1ef92bfc539a13e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=378e918e2891b2712b64c4ad1ef92bfc539a13e7 Author: Francisco Jerez Date: Mon Feb 12 14:48:20 2018 -0800 mesa: Implement glFramebufferFetchBarrierEXT entry point. Reviewed-by: Plamena Manolova --- src/mesa/main/barrier.c | 17 +++-- src/mesa/main/barrier.h | 3 +++ src/mesa/main/tests/dispatch_sanity.cpp | 6 ++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c index 0f0b0a210d..2be30220e4 100644 --- a/src/mesa/main/barrier.c +++ b/src/mesa/main/barrier.c @@ -127,8 +127,7 @@ _mesa_BlendBarrier(void) { GET_CURRENT_CONTEXT(ctx); - if (!ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent && - !ctx->Extensions.KHR_blend_equation_advanced) { + if (!ctx->Extensions.KHR_blend_equation_advanced) { _mesa_error(ctx, GL_INVALID_OPERATION, "glBlendBarrier(not supported)"); return; @@ -136,3 +135,17 @@ _mesa_BlendBarrier(void) ctx->Driver.FramebufferFetchBarrier(ctx); } + +void GLAPIENTRY +_mesa_FramebufferFetchBarrierEXT(void) +{ + GET_CURRENT_CONTEXT(ctx); + + if (!ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent) { + _mesa_error(ctx, GL_INVALID_OPERATION, + "glFramebufferFetchBarrierEXT(not supported)"); + return; + } + + ctx->Driver.FramebufferFetchBarrier(ctx); +} diff --git a/src/mesa/main/barrier.h b/src/mesa/main/barrier.h index 53ecf863f0..acc15c6779 100644 --- a/src/mesa/main/barrier.h +++ b/src/mesa/main/barrier.h @@ -53,4 +53,7 @@ _mesa_MemoryBarrierByRegion(GLbitfield barriers); void GLAPIENTRY _mesa_BlendBarrier(void); +void GLAPIENTRY +_mesa_FramebufferFetchBarrierEXT(void); + #endif /* BARRIER_H */ diff --git a/src/mesa/main/tests/dispatch_sanity.cpp b/src/mesa/main/tests/dispatch_sanity.cpp index d697343627..83a4b04654 100644 --- a/src/mesa/main/tests/dispatch_sanity.cpp +++ b/src/mesa/main/tests/dispatch_sanity.cpp @@ -1023,6 +1023,9 @@ const struct function common_desktop_functions_possible[] = { /* GL_ARB_gl_spirv */ { "glSpecializeShaderARB", 45, -1 }, + /* GL_EXT_shader_framebuffer_fetch_non_coherent */ + { "glFramebufferFetchBarrierEXT", 20, -1 }, + { NULL, 0, -1 } }; @@ -2446,6 +2449,9 @@ const struct function gles2_functions_possible[] = { { "glGetQueryObjectui64vEXT", 20, -1 }, { "glQueryCounterEXT", 20, -1 }, + /* GL_EXT_shader_framebuffer_fetch_non_coherent */ + { "glFramebufferFetchBarrierEXT", 20, -1 }, + { NULL, 0, -1 } }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Optimize and simplify the copy propagation dataflow logic.
Module: Mesa Branch: master Commit: 11674dad8acef294bc920e7f02ef45185420fbce URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11674dad8acef294bc920e7f02ef45185420fbce Author: Francisco Jerez Date: Mon Dec 18 15:22:04 2017 -0800 intel/fs: Optimize and simplify the copy propagation dataflow logic. Previously the dataflow propagation algorithm would calculate the ACP live-in and -out sets in a two-pass fixed-point algorithm. The first pass would update the live-out sets of all basic blocks of the program based on their live-in sets, while the second pass would update the live-in sets based on the live-out sets. This is incredibly inefficient in the typical case where the CFG of the program is approximately acyclic, because it can take up to 2*n passes for an ACP entry introduced at the top of the program to reach the bottom (where n is the number of basic blocks in the program), until which point the algorithm won't be able to reach a fixed point. The same effect can be achieved in a single pass by computing the live-in and -out sets in lock-step, because that makes sure that processing of any basic block will pick up the updated live-out sets of the lexically preceding blocks. This gives the dataflow propagation algorithm effectively O(n) run-time instead of O(n^2) in the acyclic case. The time spent in dataflow propagation is reduced by 30x in the GLES31.functional.ssbo.layout.random.all_shared_buffer.5 dEQP test-case on my CHV system (the improvement is likely to be of the same order of magnitude on other platforms). This more than reverses an apparent run-time regression in this test-case from my previous copy-propagation undefined-value handling patch, which was ultimately caused by the additional work introduced in that commit to account for undefined values being multiplied by a huge quadratic factor. According to Chad this test was failing on CHV due to a 30s time-out imposed by the Android CTS (this was the case regardless of my undefined-value handling patch, even though my patch substantially exacerbated the issue). On my CHV system this patch reduces the overall run-time of the test by approximately 12x, getting us to around 13s, well below the time-out. v2: Initialize live-out set to the universal set to avoid rather pessimistic dataflow estimation in shaders with cycles (Addresses performance regression reported by Eero in GpuTest Piano). Performance numbers given above still apply. No shader-db changes with respect to master. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104271 Reported-by: Chad Versace Reviewed-by: Ian Romanick --- src/intel/compiler/brw_fs_copy_propagation.cpp | 35 -- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index af5635eace..92cc0a8de5 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -186,8 +186,7 @@ fs_copy_prop_dataflow::setup_initial_values() /* Populate the initial values for the livein and liveout sets. For the * block at the start of the program, livein = 0 and liveout = copy. -* For the others, set liveout to 0 (the empty set) and livein to ~0 -* (the universal set). +* For the others, set liveout and livein to ~0 (the universal set). */ foreach_block (block, cfg) { if (block->parents.is_empty()) { @@ -197,7 +196,7 @@ fs_copy_prop_dataflow::setup_initial_values() } } else { for (int i = 0; i < bitset_words; i++) { -bd[block->num].liveout[i] = 0u; +bd[block->num].liveout[i] = ~0u; bd[block->num].livein[i] = ~0u; } } @@ -228,34 +227,17 @@ fs_copy_prop_dataflow::run() do { progress = false; - /* Update liveout for all blocks. */ foreach_block (block, cfg) { if (block->parents.is_empty()) continue; for (int i = 0; i < bitset_words; i++) { const BITSET_WORD old_liveout = bd[block->num].liveout[i]; - -bd[block->num].liveout[i] = - bd[block->num].copy[i] | (bd[block->num].livein[i] & - ~bd[block->num].kill[i]); - -if (old_liveout != bd[block->num].liveout[i]) - progress = true; - } - } - - /* Update livein for all blocks. If a copy is live out of all parent - * blocks, it's live coming in to this block. - */ - foreach_block (block, cfg) { - if (block->parents.is_empty()) -continue; - - for (int i = 0; i < bitset_words; i++) { -const BITSET_WORD old_livein = bd[block->num].livein[i]; BITSET_WORD livein_from_any_block = 0; +/* Update livein for this block. If a co
Mesa (master): i965/gen6-7/sol: Keep independent counters for the current and previous begin/end block.
Module: Mesa Branch: master Commit: f476b3f6e7b9f61c5bd93cf463005fd88aacaeba URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f476b3f6e7b9f61c5bd93cf463005fd88aacaeba Author: Francisco Jerez Date: Thu Nov 16 14:27:41 2017 -0800 i965/gen6-7/sol: Keep independent counters for the current and previous begin/end block. This allows us to aggregate the primitive counts of a completed transform feedback begin/end block lazily, which in the most typical case (where glDrawTransformFeedback is not used) will allow us to avoid aggregating the primitive counters on the CPU altogether, preventing a stall on previous rendering during glBeginTransformFeedback(), which dramatically improves performance of applications that rely heavily on transform feedback. Improves performance of SynMark2 OglGSCloth by 65.52% ±0.25% (data gathered on VLV). Tested-By: Eero Tamminen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h| 9 --- src/mesa/drivers/dri/i965/gen6_sol.c | 39 +- src/mesa/drivers/dri/i965/gen7_sol_state.c | 15 ++-- 3 files changed, 36 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 950ede05fc..8d8ab71093 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -579,6 +579,12 @@ struct brw_transform_feedback_object { struct brw_transform_feedback_counter counter; /** +* Count of primitives generated during the previous transform feedback +* operation. Used to implement DrawTransformFeedback(). +*/ + struct brw_transform_feedback_counter previous_counter; + + /** * Number of vertices written between last Begin/EndTransformFeedback(). * * Used to implement DrawTransformFeedback(). @@ -1519,9 +1525,6 @@ brw_resume_transform_feedback(struct gl_context *ctx, void brw_save_primitives_written_counters(struct brw_context *brw, struct brw_transform_feedback_object *obj); -void -brw_compute_xfb_vertices_written(struct brw_context *brw, - struct brw_transform_feedback_object *obj); GLsizei brw_get_transform_feedback_vertex_count(struct gl_context *ctx, struct gl_transform_feedback_object *obj, diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index a909339e16..b1baf01bcd 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -289,6 +289,8 @@ brw_save_primitives_written_counters(struct brw_context *brw, /* Check if there's enough space for a new pair of four values. */ if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) { aggregate_transform_feedback_counter(brw, obj->prim_count_bo, + &obj->previous_counter); + aggregate_transform_feedback_counter(brw, obj->prim_count_bo, &obj->counter); } @@ -316,6 +318,7 @@ brw_save_primitives_written_counters(struct brw_context *brw, static void compute_vertices_written_so_far(struct brw_context *brw, struct brw_transform_feedback_object *obj, +struct brw_transform_feedback_counter *counter, uint64_t *vertices_written) { const struct gl_context *ctx = &brw->ctx; @@ -336,25 +339,26 @@ compute_vertices_written_so_far(struct brw_context *brw, } /* Get the number of primitives generated. */ - aggregate_transform_feedback_counter(brw, obj->prim_count_bo, &obj->counter); + aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter); for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) { - vertices_written[i] = vertices_per_prim * obj->counter.accum[i]; + vertices_written[i] = vertices_per_prim * counter->accum[i]; } } /** - * Compute the number of vertices written by this transform feedback operation. + * Compute the number of vertices written by the last transform feedback + * begin/end block. */ -void -brw_compute_xfb_vertices_written(struct brw_context *brw, - struct brw_transform_feedback_object *obj) +static void +compute_xfb_vertices_written(struct brw_context *brw, + struct brw_transform_feedback_object *obj) { if (obj->vertices_written_valid || !obj->base.EndedAnytime) return; - compute_vertices_written_so_far(brw, obj, obj->vertices_written); - + compute_vertices_written_so_far(brw, obj, &obj->previous_counter, + obj->vertices_written); obj->vertices_written_valid = true; } @@ -376,7 +380,7 @@ brw_get_transform_feedback_vertex_
Mesa (master): i965/gen6-7/sol: Restructure primitive counter into a separate type.
Module: Mesa Branch: master Commit: b0c8d61281d5e09cd216e1ff3f2c441f7c550a47 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b0c8d61281d5e09cd216e1ff3f2c441f7c550a47 Author: Francisco Jerez Date: Fri Nov 17 14:06:04 2017 -0800 i965/gen6-7/sol: Restructure primitive counter into a separate type. A primitive counter encapsulates a scalar aggregating counter for each vertex stream along with a section within the primitive tally buffer which hasn't been read out yet. Defining this as a separate type will allow us to keep multiple counter objects around for the same transform feedback object without any code duplication. Tested-By: Eero Tamminen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_context.h| 38 ++--- src/mesa/drivers/dri/i965/gen6_sol.c | 53 ++ src/mesa/drivers/dri/i965/gen7_sol_state.c | 6 +--- 3 files changed, 58 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 0f0aad8534..950ede05fc 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -523,6 +523,36 @@ struct intel_batchbuffer { #define BRW_MAX_XFB_STREAMS 4 +struct brw_transform_feedback_counter { + /** +* Index of the first entry of this counter within the primitive count BO. +* An entry is considered to be an N-tuple of 64bit values, where N is the +* number of vertex streams supported by the platform. +*/ + unsigned bo_start; + + /** +* Index one past the last entry of this counter within the primitive +* count BO. +*/ + unsigned bo_end; + + /** +* Primitive count values accumulated while this counter was active, +* excluding any entries buffered between \c bo_start and \c bo_end, which +* haven't been accounted for yet. +*/ + uint64_t accum[BRW_MAX_XFB_STREAMS]; +}; + +static inline void +brw_reset_transform_feedback_counter( + struct brw_transform_feedback_counter *counter) +{ + counter->bo_start = counter->bo_end; + memset(&counter->accum, 0, sizeof(counter->accum)); +} + struct brw_transform_feedback_object { struct gl_transform_feedback_object base; @@ -541,14 +571,12 @@ struct brw_transform_feedback_object { */ unsigned max_index; + struct brw_bo *prim_count_bo; + /** * Count of primitives generated during this transform feedback operation. -* @{ */ - uint64_t prims_generated[BRW_MAX_XFB_STREAMS]; - struct brw_bo *prim_count_bo; - unsigned prim_count_buffer_index; /**< in number of uint64_t units */ - /** @} */ + struct brw_transform_feedback_counter counter; /** * Number of vertices written between last Begin/EndTransformFeedback(). diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 7a510940c8..a909339e16 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -233,37 +233,36 @@ brw_delete_transform_feedback(struct gl_context *ctx, * Note that we expose one stream pre-Gen7, so the above is just (start, end). */ static void -tally_prims_generated(struct brw_context *brw, - struct brw_transform_feedback_object *obj) +aggregate_transform_feedback_counter( + struct brw_context *brw, + struct brw_bo *bo, + struct brw_transform_feedback_counter *counter) { - const struct gl_context *ctx = &brw->ctx; - const int streams = ctx->Const.MaxVertexStreams; + const unsigned streams = brw->ctx.Const.MaxVertexStreams; /* If the current batch is still contributing to the number of primitives * generated, flush it now so the results will be present when mapped. */ - if (brw_batch_references(&brw->batch, obj->prim_count_bo)) + if (brw_batch_references(&brw->batch, bo)) intel_batchbuffer_flush(brw); - if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo))) + if (unlikely(brw->perf_debug && brw_bo_busy(bo))) perf_debug("Stalling for # of transform feedback primitives written.\n"); - uint64_t *prim_counts = brw_bo_map(brw, obj->prim_count_bo, MAP_READ); + uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ); + prim_counts += counter->bo_start * streams; - assert(obj->prim_count_buffer_index % (2 * streams) == 0); - int pairs = obj->prim_count_buffer_index / (2 * streams); + for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) { + for (unsigned s = 0; s < streams; s++) + counter->accum[s] += prim_counts[streams + s] - prim_counts[s]; - for (int i = 0; i < pairs; i++) { - for (int s = 0; s < streams; s++) { - obj->prims_generated[s] += prim_counts[streams + s] - prim_counts[s]; - } - prim_counts += 2 * streams; /* move to the next pai
Mesa (master): i965/gen6-7/sol: Bump primitive counter BO size.
Module: Mesa Branch: master Commit: 53d8508f1d964423123b7a444e07eabe2d723f7e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=53d8508f1d964423123b7a444e07eabe2d723f7e Author: Francisco Jerez Date: Fri Nov 17 14:07:21 2017 -0800 i965/gen6-7/sol: Bump primitive counter BO size. Improves performance of SynMark2 OglGSCloth by a further 9.65%±0.59% due to the reduction in overwraps of the primitive count buffer that lead to a CPU stall on previous rendering. Cummulative performance improvement from the series 81.50% ±0.96% (data gathered on VLV). Tested-By: Eero Tamminen Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/gen6_sol.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index b1baf01bcd..355acd4218 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -197,7 +197,7 @@ brw_new_transform_feedback(struct gl_context *ctx, GLuint name) brw_obj->offset_bo = brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64); brw_obj->prim_count_bo = - brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64); + brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, 64); return &brw_obj->base; } @@ -287,7 +287,8 @@ brw_save_primitives_written_counters(struct brw_context *brw, assert(obj->prim_count_bo != NULL); /* Check if there's enough space for a new pair of four values. */ - if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) { + if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= + obj->prim_count_bo->size) { aggregate_transform_feedback_counter(brw, obj->prim_count_bo, &obj->previous_counter); aggregate_transform_feedback_counter(brw, obj->prim_count_bo, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Initialize fs_visitor::grf_used on construction.
Module: Mesa Branch: master Commit: b3e3cb990125c71c1fd172588852bd92bcfb8904 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3e3cb990125c71c1fd172588852bd92bcfb8904 Author: Francisco Jerez Date: Sun Dec 17 00:21:13 2017 -0800 intel/fs: Initialize fs_visitor::grf_used on construction. This should shut up some Valgrind errors during pre-regalloc scheduling. The errors were harmless since they could only have led to the estimation of the bank conflict penalty of an instruction pre-regalloc, which is inaccurate at that point of the program compilation, but no less accurate than the intended "return 0" fall-back path. The scheduling pass is normally re-run after regalloc with a well-defined grf_used value and accurate bank conflict information. Fixes: acf98ff933d "intel/fs: Teach instruction scheduler about GRF bank conflict cycles." Reported-by: Eero Tamminen Reviewed-by: Ian Romanick --- src/intel/compiler/brw_fs_visitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 481d9c51e7..7a5f6451f2 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -898,6 +898,7 @@ fs_visitor::init() this->promoted_constants = 0, + this->grf_used = 0; this->spilled_any_registers = false; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs/bank_conflicts: Use posix_memalign() instead of overaligned new to obtain vector storage.
Module: Mesa Branch: master Commit: 1aa79d5ed5fbc9d3ee3c4d279892c49e8393fd3b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1aa79d5ed5fbc9d3ee3c4d279892c49e8393fd3b Author: Francisco Jerez Date: Sun Dec 17 13:05:55 2017 -0800 intel/fs/bank_conflicts: Use posix_memalign() instead of overaligned new to obtain vector storage. The weight_vector_type constructor was inadvertently assuming C++17 semantics of the new operator applied on a type with alignment requirement greater than the largest fundamental alignment. Unfortunately on earlier C++ dialects the implementation was allowed to raise an allocation failure when the alignment requirement of the allocated type was unsupported, in an implementation-defined fashion. It's expected that a C++ implementation recent enough to implement P0035R4 would have honored allocation requests for such over-aligned types even if the C++17 dialect wasn't active, which is likely the reason why this problem wasn't caught by our CI system. A more elegant fix would involve wrapping the __SSE2__ block in a '__cpp_aligned_new >= 201606' preprocessor conditional and continue taking advantage of the language feature, but that would yield lower compile-time performance on old compilers not implementing it (e.g. GCC versions older than 7.0). Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation pass." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104226 Reported-by: Józef Kucia Reviewed-by: Ian Romanick --- src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 -- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index 0cd880d44f..e87fcbfc5e 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -277,13 +277,10 @@ namespace { struct weight_vector_type { weight_vector_type() : v(NULL), size(0) {} - weight_vector_type(unsigned n) : - v(new vector_type[DIV_ROUND_UP(n, vector_width)]()), - size(n) {} + weight_vector_type(unsigned n) : v(alloc(n)), size(n) {} weight_vector_type(const weight_vector_type &u) : - v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()), - size(u.size) + v(alloc(u.size)), size(u.size) { memcpy(v, u.v, DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type)); @@ -291,7 +288,7 @@ namespace { ~weight_vector_type() { - delete[] v; + free(v); } weight_vector_type & @@ -304,6 +301,19 @@ namespace { vector_type *v; unsigned size; + + private: + static vector_type * + alloc(unsigned n) + { + const unsigned align = MAX2(sizeof(void *), __alignof__(vector_type)); + const unsigned size = DIV_ROUND_UP(n, vector_width) * sizeof(vector_type); + void *p; + if (posix_memalign(&p, align, size)) +return NULL; + memset(p, 0, size); + return reinterpret_cast(p); + } }; /** ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs/bank_conflicts: Don' t touch Gen7 MRF hack registers.
Module: Mesa Branch: master Commit: acab52f5201683ec3f3698d25045ed1441ecdd14 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=acab52f5201683ec3f3698d25045ed1441ecdd14 Author: Francisco Jerez Date: Mon Dec 11 20:24:53 2017 -0800 intel/fs/bank_conflicts: Don't touch Gen7 MRF hack registers. Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation pass." Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104199 Reported-by: Darius Spitznagel Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs.cpp| 2 +- src/intel/compiler/brw_fs.h | 2 +- src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 0e3ab381fa..3717c50e32 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -945,7 +945,7 @@ fs_inst::flags_written() const * instruction -- the FS opcodes often generate MOVs in addition. */ int -fs_visitor::implied_mrf_writes(fs_inst *inst) +fs_visitor::implied_mrf_writes(fs_inst *inst) const { if (inst->mlen == 0) return 0; diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 9c160068a7..63373580ee 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -277,7 +277,7 @@ public: struct brw_reg interp_reg(int location, int channel); - int implied_mrf_writes(fs_inst *inst); + int implied_mrf_writes(fs_inst *inst) const; virtual void dump_instructions(); virtual void dump_instructions(const char *name); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index 42cdc6ef7d..0cd880d44f 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -530,12 +530,12 @@ namespace { for (unsigned reg = 0; reg < 2; reg++) constrained[p.atom_of_reg(reg)] = true; - /* Assume that anything referenced via fixed GRFs is baked into the - * hardware's fixed-function logic and may be unsafe to move around. - * Also take into account the source GRF restrictions of EOT - * send-message instructions. - */ foreach_block_and_inst(block, fs_inst, inst, v->cfg) { + /* Assume that anything referenced via fixed GRFs is baked into the + * hardware's fixed-function logic and may be unsafe to move around. + * Also take into account the source GRF restrictions of EOT + * send-message instructions. + */ if (inst->dst.file == FIXED_GRF) constrained[p.atom_of_reg(reg_of(inst->dst))] = true; @@ -544,6 +544,18 @@ namespace { (is_grf(inst->src[i]) && inst->eot)) constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true; } + + /* The location of the Gen7 MRF hack registers is hard-coded in the + * rest of the compiler back-end. Don't attempt to move them around. + */ + if (v->devinfo->gen >= 7) { +assert(inst->dst.file != MRF); + +for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i; + constrained[p.atom_of_reg(reg)] = true; +} + } } return constrained; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/cfg: Represent divergent control flow paths caused by non-uniform loop execution.
Module: Mesa Branch: master Commit: 4d1959e69328cf0d59f0ec7aeea5a2b704ef0c5f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d1959e69328cf0d59f0ec7aeea5a2b704ef0c5f Author: Francisco Jerez Date: Fri Oct 13 17:52:00 2017 -0700 intel/cfg: Represent divergent control flow paths caused by non-uniform loop execution. This addresses a long-standing back-end compiler bug that could lead to cross-channel data corruption in loops executed non-uniformly. In some cases live variables extending through a loop divergence point (e.g. a non-uniform break) into a convergence point (e.g. the end of the loop) wouldn't be considered live along all physical control flow paths the SIMD thread could possibly have taken in between due to some channels remaining in the loop for additional iterations. This patch fixes the problem by extending the CFG with physical edges that don't exist in the idealized non-vectorized program, but represent valid control flow paths the SIMD EU may take due to the divergence of logical threads. This makes sense because the i965 IR is explicitly SIMD, and it's not uncommon for instructions to have an influence on neighboring channels (e.g. a force_writemask_all header setup), so the behavior of the SIMD thread as a whole needs to be considered. No changes in shader-db. Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_cfg.cpp | 75 ++ 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_cfg.cpp b/src/intel/compiler/brw_cfg.cpp index fad12eec58..600b428a49 100644 --- a/src/intel/compiler/brw_cfg.cpp +++ b/src/intel/compiler/brw_cfg.cpp @@ -98,6 +98,7 @@ ends_block(const backend_instruction *inst) op == BRW_OPCODE_ELSE || op == BRW_OPCODE_CONTINUE || op == BRW_OPCODE_BREAK || + op == BRW_OPCODE_DO || op == BRW_OPCODE_WHILE; } @@ -268,13 +269,57 @@ cfg_t::cfg_t(exec_list *instructions) } cur->instructions.push_tail(inst); + + /* Represent divergent execution of the loop as a pair of alternative + * edges coming out of the DO instruction: For any physical iteration + * of the loop a given logical thread can either start off enabled + * (which is represented as the "next" successor), or disabled (if it + * has reached a non-uniform exit of the loop during a previous + * iteration, which is represented as the "cur_while" successor). + * + * The disabled edge will be taken by the logical thread anytime we + * arrive at the DO instruction through a back-edge coming from a + * conditional exit of the loop where divergent control flow started. + * + * This guarantees that there is a control-flow path from any + * divergence point of the loop into the convergence point + * (immediately past the WHILE instruction) such that it overlaps the + * whole IP region of divergent control flow (potentially the whole + * loop) *and* doesn't imply the execution of any instructions part + * of the loop (since the corresponding execution mask bit will be + * disabled for a diverging thread). + * + * This way we make sure that any variables that are live throughout + * the region of divergence for an inactive logical thread are also + * considered to interfere with any other variables assigned by + * active logical threads within the same physical region of the + * program, since otherwise we would risk cross-channel data + * corruption. + */ + next = new_block(); + cur->add_successor(mem_ctx, next); + cur->add_successor(mem_ctx, cur_while); + set_next_block(&cur, next, ip); break; case BRW_OPCODE_CONTINUE: cur->instructions.push_tail(inst); + /* A conditional CONTINUE may start a region of divergent control + * flow until the start of the next loop iteration (*not* until the + * end of the loop which is why the successor is not the top-level + * divergence point at cur_do). The live interval of any variable + * extending through a CONTINUE edge is guaranteed to overlap the + * whole region of divergent execution, because any variable live-out + * at the CONTINUE instruction will also be live-in at the top of the + * loop, and therefore also live-out at the bottom-most point of the + * loop which is reachable from the top (since a control flow path + * exists from a definition of the variable through this CONTINUE + * instruction, the top of the loop, the (reachable) bottom of the + * loop, the top of the loop again, into a use of the variable). + */
Mesa (master): intel/fs: Restrict live intervals to the subset possibly reachable from any definition.
Module: Mesa Branch: master Commit: c3c1aa5aeb921caa2ec18c2320ceb94854e0f47c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3c1aa5aeb921caa2ec18c2320ceb94854e0f47c Author: Francisco Jerez Date: Thu Sep 7 00:26:03 2017 -0700 intel/fs: Restrict live intervals to the subset possibly reachable from any definition. Currently the liveness analysis pass would extend a live interval up to the top of the program when no unconditional and complete definition of the variable is found that dominates all of its uses. This can lead to a serious performance problem in shaders containing many partial writes, like scalar arithmetic, FP64 and soon FP16 operations. The number of oversize live intervals in such workloads can cause the compilation time of the shader to explode because of the worse than quadratic behavior of the register allocator and scheduler when running out of registers, and it can also cause the running time of the shader to explode due to the amount of spilling it leads to, which is orders of magnitude slower than GRF memory. This patch fixes it by computing the intersection of our current live intervals with the subset of the program that can possibly be reached from any definition of the variable. Extending the storage allocation of the variable beyond that is pretty useless because its value is guaranteed to be undefined at a point that cannot be reached from any definition. According to Jason, this improves performance of the subgroup Vulkan CTS tests significantly (e.g. the runtime of the dvec4 broadcast test improves by nearly 50x). No significant change in the running time of shader-db (with 5% statistical significance). shader-db results on IVB: total cycles in shared programs: 61108780 -> 60932856 (-0.29%) cycles in affected programs: 16335482 -> 16159558 (-1.08%) helped: 5121 HURT: 4347 total spills in shared programs: 1309 -> 1288 (-1.60%) spills in affected programs: 249 -> 228 (-8.43%) helped: 3 HURT: 0 total fills in shared programs: 1652 -> 1597 (-3.33%) fills in affected programs: 262 -> 207 (-20.99%) helped: 4 HURT: 0 LOST: 2 GAINED: 209 shader-db results on BDW: total cycles in shared programs: 67617262 -> 67361220 (-0.38%) cycles in affected programs: 23397142 -> 23141100 (-1.09%) helped: 8045 HURT: 6488 total spills in shared programs: 1456 -> 1252 (-14.01%) spills in affected programs: 465 -> 261 (-43.87%) helped: 3 HURT: 0 total fills in shared programs: 1720 -> 1465 (-14.83%) fills in affected programs: 471 -> 216 (-54.14%) helped: 4 HURT: 0 LOST: 2 GAINED: 162 shader-db results on SKL: total cycles in shared programs: 65436248 -> 65245186 (-0.29%) cycles in affected programs: 22560936 -> 22369874 (-0.85%) helped: 8457 HURT: 6247 total spills in shared programs: 437 -> 437 (0.00%) spills in affected programs: 0 -> 0 helped: 0 HURT: 0 total fills in shared programs: 870 -> 854 (-1.84%) fills in affected programs: 16 -> 0 helped: 1 HURT: 0 LOST: 0 GAINED: 107 Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs_live_variables.cpp | 34 src/intel/compiler/brw_fs_live_variables.h | 12 ++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/src/intel/compiler/brw_fs_live_variables.cpp b/src/intel/compiler/brw_fs_live_variables.cpp index c449672a51..059f076fa5 100644 --- a/src/intel/compiler/brw_fs_live_variables.cpp +++ b/src/intel/compiler/brw_fs_live_variables.cpp @@ -83,9 +83,11 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst, /* The def[] bitset marks when an initialization in a block completely * screens off previous updates of that variable (VGRF channel). */ - if (inst->dst.file == VGRF && !inst->is_partial_write()) { - if (!BITSET_TEST(bd->use, var)) + if (inst->dst.file == VGRF) { + if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var)) BITSET_SET(bd->def, var); + + BITSET_SET(bd->defout, var); } } @@ -199,6 +201,28 @@ fs_live_variables::compute_live_variables() } } } + + /* Propagate defin and defout down the CFG to calculate the union of live +* variables potentially defined along any possible control flow path. +*/ + do { + cont = false; + + foreach_block (block, cfg) { + const struct block_data *bd = &block_data[block->num]; + +foreach_list_typed(bblock_link, child_link, link, &block->children) { +struct block_data *child_bd = &block_data[child_link->block->num]; + + for (int i = 0; i < bitset_words; i++) { + const BITSET_WORD new_def = bd->defout[i] & ~child_bd->defin[i]; + child_bd->defin[i] |= new_def; + child_bd->defout[i] |= new_def;
Mesa (master): intel/fs: Don' t let undefined values prevent copy propagation.
Module: Mesa Branch: master Commit: 9355116bdad6ee9914554de8e48ba271bd36a8eb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9355116bdad6ee9914554de8e48ba271bd36a8eb Author: Francisco Jerez Date: Mon Oct 23 13:47:10 2017 -0700 intel/fs: Don't let undefined values prevent copy propagation. This makes the dataflow propagation logic of the copy propagation pass more intelligent in cases where the destination of a copy is known to be undefined for some incoming CFG edges, building upon the definedness information provided by the last patch. Helps a few programs, and avoids a handful shader-db regressions from the next patch. shader-db results on ILK: total instructions in shared programs: 6541547 -> 6541523 (-0.00%) instructions in affected programs: 360 -> 336 (-6.67%) helped: 8 HURT: 0 LOST: 0 GAINED: 10 shader-db results on BDW: total instructions in shared programs: 8174323 -> 8173882 (-0.01%) instructions in affected programs: 7730 -> 7289 (-5.71%) helped: 5 HURT: 2 LOST: 0 GAINED: 4 shader-db results on SKL: total instructions in shared programs: 8185669 -> 8184598 (-0.01%) instructions in affected programs: 10364 -> 9293 (-10.33%) helped: 5 HURT: 2 LOST: 0 GAINED: 2 Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs_copy_propagation.cpp | 50 -- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp index d4d01d783c..af5635eace 100644 --- a/src/intel/compiler/brw_fs_copy_propagation.cpp +++ b/src/intel/compiler/brw_fs_copy_propagation.cpp @@ -36,9 +36,12 @@ #include "util/bitset.h" #include "brw_fs.h" +#include "brw_fs_live_variables.h" #include "brw_cfg.h" #include "brw_eu.h" +using namespace brw; + namespace { /* avoid conflict with opt_copy_propagation_elements */ struct acp_entry : public exec_node { fs_reg dst; @@ -77,12 +80,19 @@ struct block_data { * course of this block. */ BITSET_WORD *kill; + + /** +* Which entries in the fs_copy_prop_dataflow acp table are guaranteed to +* have a fully uninitialized destination at the end of this block. +*/ + BITSET_WORD *undef; }; class fs_copy_prop_dataflow { public: fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, + const fs_live_variables *live, exec_list *out_acp[ACP_HASH_SIZE]); void setup_initial_values(); @@ -92,6 +102,7 @@ public: void *mem_ctx; cfg_t *cfg; + const fs_live_variables *live; acp_entry **acp; int num_acp; @@ -102,8 +113,9 @@ public: } /* anonymous namespace */ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, + const fs_live_variables *live, exec_list *out_acp[ACP_HASH_SIZE]) - : mem_ctx(mem_ctx), cfg(cfg) + : mem_ctx(mem_ctx), cfg(cfg), live(live) { bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks); @@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words); bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words); bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words); + bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words); for (int i = 0; i < ACP_HASH_SIZE; i++) { foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) { @@ -189,6 +202,18 @@ fs_copy_prop_dataflow::setup_initial_values() } } } + + /* Initialize the undef set. */ + foreach_block (block, cfg) { + for (int i = 0; i < num_acp; i++) { + BITSET_SET(bd[block->num].undef, i); + for (unsigned off = 0; off < acp[i]->size_written; off += REG_SIZE) { +if (BITSET_TEST(live->block_data[block->num].defout, +live->var_from_reg(byte_offset(acp[i]->dst, off + BITSET_CLEAR(bd[block->num].undef, i); + } + } + } } /** @@ -229,13 +254,30 @@ fs_copy_prop_dataflow::run() for (int i = 0; i < bitset_words; i++) { const BITSET_WORD old_livein = bd[block->num].livein[i]; +BITSET_WORD livein_from_any_block = 0; bd[block->num].livein[i] = ~0u; foreach_list_typed(bblock_link, parent_link, link, &block->parents) { bblock_t *parent = parent_link->block; - bd[block->num].livein[i] &= bd[parent->num].liveout[i]; + /* Consider ACP entries with a known-undefined destination to +* be available from the parent. This is valid beca
Mesa (master): intel/fs: Teach instruction scheduler about GRF bank conflict cycles.
Module: Mesa Branch: master Commit: acf98ff933d338c521d7c6a57c17a010149eb344 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=acf98ff933d338c521d7c6a57c17a010149eb344 Author: Francisco Jerez Date: Wed Dec 6 11:42:54 2017 -0800 intel/fs: Teach instruction scheduler about GRF bank conflict cycles. This should allow the post-RA scheduler to do a slightly better job at hiding latency in presence of instructions incurring bank conflicts. The main purpuse of this patch is not to improve performance though, but to get conflict cycles to show up in shader-db statistics in order to make sure that regressions in the bank conflict mitigation pass don't go unnoticed. Acked-by: Matt Turner --- src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_bank_conflicts.cpp | 19 +++ src/intel/compiler/brw_schedule_instructions.cpp | 5 +++-- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 0cec6fdcba..9c160068a7 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -146,6 +146,7 @@ public: bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool opt_bank_conflicts(); + unsigned bank_conflict_cycles(const fs_inst *inst) const; bool register_coalesce(); bool compute_to_mrf(); bool eliminate_find_live_channel(); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index b64a3d4a8a..42cdc6ef7d 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -891,3 +891,22 @@ fs_visitor::opt_bank_conflicts() delete[] constrained; return true; } + +/** + * Estimate the number of GRF bank conflict cycles incurred by an instruction. + * + * Note that this neglects conflict cycles prior to register allocation + * because we don't know which bank each VGRF is going to end up aligned to. + */ +unsigned +fs_visitor::bank_conflict_cycles(const fs_inst *inst) const +{ + if (grf_used && inst->is_3src(devinfo) && + is_grf(inst->src[1]) && is_grf(inst->src[2]) && + bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) && + !is_conflict_optimized_out(devinfo, inst)) { + return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE); + } else { + return 0; + } +} diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index a1e825c661..692f712532 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1543,10 +1543,11 @@ vec4_instruction_scheduler::choose_instruction_to_schedule() int fs_instruction_scheduler::issue_time(backend_instruction *inst) { + const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst); if (is_compressed((fs_inst *)inst)) - return 4; + return 4 + overhead; else - return 2; + return 2 + overhead; } int ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Implement GRF bank conflict mitigation pass.
Module: Mesa Branch: master Commit: af2c320190f3c73180f1610c8df955a7fa2a4d09 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=af2c320190f3c73180f1610c8df955a7fa2a4d09 Author: Francisco Jerez Date: Thu Jun 15 15:23:57 2017 -0700 intel/fs: Implement GRF bank conflict mitigation pass. Unnecessary GRF bank conflicts increase the issue time of ternary instructions (the overwhelmingly most common of which is MAD) by roughly 50%, leading to reduced ALU throughput. This pass attempts to minimize the number of bank conflicts by rearranging the layout of the GRF space post-register allocation. It's in general not possible to eliminate all of them without introducing extra copies, which are typically more expensive than the bank conflict itself. In a shader-db run on SKL this helps roughly 46k shaders: total conflicts in shared programs: 1008981 -> 600461 (-40.49%) conflicts in affected programs: 816222 -> 407702 (-50.05%) helped: 46234 HURT: 72 The running time of shader-db itself on SKL seems to be increased by roughly 2.52%±1.13% with n=20 due to the additional work done by the compiler back-end. On earlier generations the pass is somewhat less effective in relative terms because the hardware incurs a bank conflict anytime the last two sources of the instruction are duplicate (e.g. while trying to square a value using MAD), which is impossible to avoid without introducing copies. E.g. for a shader-db run on SNB: total conflicts in shared programs: 944636 -> 623185 (-34.03%) conflicts in affected programs: 853258 -> 531807 (-37.67%) helped: 31052 HURT: 19 And on BDW: total conflicts in shared programs: 1418393 -> 987539 (-30.38%) conflicts in affected programs: 1179787 -> 748933 (-36.52%) helped: 47592 HURT: 70 On SKL GT4e this improves performance of GpuTest Volplosion by 3.64% ±0.33% with n=16. NOTE: This patch intentionally disregards some i965 coding conventions for the sake of reviewability. This is addressed by the next squash patch which introduces an amount of (for the most part boring) boilerplate that might distract reviewers from the non-trivial algorithmic details of the pass. The following patch is squashed in: SQUASH: intel/fs/bank_conflicts: Roll back to the nineties. Acked-by: Matt Turner --- src/intel/Makefile.sources | 1 + src/intel/compiler/brw_fs.cpp| 2 + src/intel/compiler/brw_fs.h | 1 + src/intel/compiler/brw_fs_bank_conflicts.cpp | 893 +++ src/intel/compiler/meson.build | 1 + 5 files changed, 898 insertions(+) diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources index cdb10ece35..1c62bad816 100644 --- a/src/intel/Makefile.sources +++ b/src/intel/Makefile.sources @@ -48,6 +48,7 @@ COMPILER_FILES = \ compiler/brw_eu_util.c \ compiler/brw_eu_validate.c \ compiler/brw_fs_builder.h \ + compiler/brw_fs_bank_conflicts.cpp \ compiler/brw_fs_cmod_propagation.cpp \ compiler/brw_fs_combine_constants.cpp \ compiler/brw_fs_copy_propagation.cpp \ diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 93bb6b4673..c5d4f5634d 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -6065,6 +6065,8 @@ fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling) if (failed) return; + opt_bank_conflicts(); + schedule_instructions(SCHEDULE_POST); if (last_scratch > 0) { diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 30557324d5..0cec6fdcba 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -145,6 +145,7 @@ public: exec_list *acp); bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); + bool opt_bank_conflicts(); bool register_coalesce(); bool compute_to_mrf(); bool eliminate_find_live_channel(); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp new file mode 100644 index 00..b64a3d4a8a --- /dev/null +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -0,0 +1,893 @@ +/* + * Copyright © 2017 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Softwa
Mesa (master): anv: Check that in_fence fd is valid before closing it.
Module: Mesa Branch: master Commit: e29ccaac298d04ad4272af2d8b8d7a953c523e28 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e29ccaac298d04ad4272af2d8b8d7a953c523e28 Author: Francisco Jerez Date: Fri Aug 18 12:04:55 2017 -0700 anv: Check that in_fence fd is valid before closing it. Probably harmless, but will overwrite errno with a failure status code. Reported by coverity. CID 1416600: Argument cannot be negative (NEGATIVE_RETURNS) Fixes: 5c4e4932e02 (anv: Implement support for exporting semaphores as FENCE_FD) Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_batch_chain.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 26b5375903..1e7455f71e 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1571,7 +1571,8 @@ anv_cmd_buffer_execbuf(struct anv_device *device, result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos); /* Execbuf does not consume the in_fence. It's our job to close it. */ - close(in_fence); + if (in_fence != -1) + close(in_fence); for (uint32_t i = 0; i < num_in_semaphores; i++) { ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv: Add error handling to setup_empty_execbuf().
Module: Mesa Branch: master Commit: 7ca124a6a3987fbfc09bc530761d44714c0da773 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ca124a6a3987fbfc09bc530761d44714c0da773 Author: Francisco Jerez Date: Fri Aug 18 11:00:42 2017 -0700 anv: Add error handling to setup_empty_execbuf(). The anv_execbuf_add_bo() call can actually fail in practice, which should cause the QueueSubmit operation to fail. Reported by Coverity. CID: 1416606: Unchecked return value (CHECKED_RETURN) Fixes: 017cdb10cf (anv: Submit a dummy batch when only semaphores are provided.) Reviewed-by: Lionel Landwerlin --- src/intel/vulkan/anv_batch_chain.c | 22 +- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/intel/vulkan/anv_batch_chain.c b/src/intel/vulkan/anv_batch_chain.c index 0078cc5142..26b5375903 100644 --- a/src/intel/vulkan/anv_batch_chain.c +++ b/src/intel/vulkan/anv_batch_chain.c @@ -1424,11 +1424,13 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, return VK_SUCCESS; } -static void +static VkResult setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device) { - anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo, NULL, 0, - &device->alloc); + VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo, +NULL, 0, &device->alloc); + if (result != VK_SUCCESS) + return result; execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { .buffers_ptr = (uintptr_t) execbuf->objects, @@ -1439,6 +1441,8 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device) .rsvd1 = device->context_id, .rsvd2 = 0, }; + + return VK_SUCCESS; } VkResult @@ -1541,13 +1545,13 @@ anv_cmd_buffer_execbuf(struct anv_device *device, } } - if (cmd_buffer) { + if (cmd_buffer) result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer); - if (result != VK_SUCCESS) - return result; - } else { - setup_empty_execbuf(&execbuf, device); - } + else + result = setup_empty_execbuf(&execbuf, device); + + if (result != VK_SUCCESS) + return result; if (execbuf.fence_count > 0) { assert(device->instance->physicalDevice.has_syncobj); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Take into account amount of data read in spilling cost heuristic.
Module: Mesa Branch: master Commit: 58324389be7bc7c5e10093b9cc0a8efa9b4c93a9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=58324389be7bc7c5e10093b9cc0a8efa9b4c93a9 Author: Francisco Jerez Date: Thu Apr 20 11:42:27 2017 -0700 intel/fs: Take into account amount of data read in spilling cost heuristic. Until now the spilling cost calculation was neglecting the amount of data read from the register during the spilling cost calculation. This caused it to make suboptimal decisions in some cases leading to higher memory bandwidth usage than necessary. Improves Unigine Heaven performance by ~4% on BDW, reversing an unintended FPS regression from my previous commit 147e71242ce539ff28e282f009c332818c35f5ac with n=12 and statistical significance 5%. In addition SynMark2 OglCSDof performance is improved by an additional ~5% on SKL, and a Kerbal Space Program apitrace around the Moho planet I can provide on request improves by ~20%. Cc: Reviewed-by: Plamena Manolova Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs_reg_allocate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 2d4d46ef33..ec8e116cb3 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) -spill_costs[inst->src[i].nr] += block_scale; +spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale; } if (inst->dst.file == VGRF) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy.
Module: Mesa Branch: master Commit: ecc19e12dca95d2571d3761dea6dec24b061013c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecc19e12dca95d2571d3761dea6dec24b061013c Author: Francisco Jerez Date: Thu Apr 20 11:44:01 2017 -0700 intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy. This is what we use later on to compute the number of registers that will actually get spilled to memory, so it's more likely to match reality than the current open-coded approximation. Cc: Reviewed-by: Plamena Manolova Reviewed-by: Jason Ekstrand --- src/intel/compiler/brw_fs_reg_allocate.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index c981d72e4f..2d4d46ef33 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -826,8 +826,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) } if (inst->dst.file == VGRF) - spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE) - * block_scale; + spill_costs[inst->dst.nr] += regs_written(inst) * block_scale; switch (inst->opcode) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: enable ARB_vertex_attrib_64bit for gen7+
Module: Mesa Branch: master Commit: 0aed1212ae54c3286c7f6e155c129b1973723c46 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0aed1212ae54c3286c7f6e155c129b1973723c46 Author: Juan A. Suarez Romero Date: Fri Oct 21 16:57:25 2016 +0200 i965: enable ARB_vertex_attrib_64bit for gen7+ Reviewed-by: Andreas Boll Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/intel_extensions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 467a0d3e84..53b5eaf8a0 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -221,6 +221,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_texture_compression_bptc = true; ctx->Extensions.ARB_texture_view = true; ctx->Extensions.ARB_shader_storage_buffer_object = true; + ctx->Extensions.ARB_vertex_attrib_64bit = true; ctx->Extensions.EXT_shader_samples_identical = true; ctx->Extensions.OES_primitive_bounding_box = true; ctx->Extensions.OES_texture_buffer = true; @@ -247,7 +248,6 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_shader_precision = true; ctx->Extensions.ARB_stencil_texturing = true; ctx->Extensions.ARB_texture_stencil8 = true; - ctx->Extensions.ARB_vertex_attrib_64bit = true; ctx->Extensions.OES_geometry_shader = true; ctx->Extensions.OES_texture_cube_map_array = true; ctx->Extensions.OES_viewport_array = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: enable OpenGL 4.2 in Ivybridge
Module: Mesa Branch: master Commit: 1877982aca7d50541618a8997fdd72c5286b4b67 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1877982aca7d50541618a8997fdd72c5286b4b67 Author: Juan A. Suarez Romero Date: Wed Mar 29 11:41:35 2017 +0200 i965: enable OpenGL 4.2 in Ivybridge Reviewed-by: Andreas Boll Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/intel_extensions.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index fc974b9860..0133fa1006 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -139,7 +139,7 @@ intelInitExtensions(struct gl_context *ctx) else if (brw->is_haswell && can_do_pipelined_register_writes(brw->screen)) ctx->Const.GLSLVersion = 450; else if (brw->gen >= 7 && can_do_pipelined_register_writes(brw->screen)) - ctx->Const.GLSLVersion = 400; + ctx->Const.GLSLVersion = 420; else if (brw->gen >= 6) ctx->Const.GLSLVersion = 330; else diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 9e536f58b3..39e463d264 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1654,7 +1654,7 @@ set_max_gl_versions(struct intel_screen *screen) case 7: dri_screen->max_gl_core_version = 33; if (can_do_pipelined_register_writes(screen)) { - dri_screen->max_gl_core_version = screen->devinfo.is_haswell ? 42 : 40; + dri_screen->max_gl_core_version = 42; if (screen->devinfo.is_haswell && can_do_compute_dispatch(screen)) dri_screen->max_gl_core_version = 43; if (screen->devinfo.is_haswell && can_do_mi_math_and_lrr(screen)) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): docs: mark GL_ARB_vertex_attrib_64bit and OpenGL 4.2 as supported by i965/gen7+
Module: Mesa Branch: master Commit: 96dfc014fd33a4f38e31fa1d4c9c4ea52d85a0b8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=96dfc014fd33a4f38e31fa1d4c9c4ea52d85a0b8 Author: Francisco Jerez Date: Fri Apr 14 15:59:52 2017 -0700 docs: mark GL_ARB_vertex_attrib_64bit and OpenGL 4.2 as supported by i965/gen7+ v2 (Andreas Boll): - Mark GL 4.1 as supported by i965/gen7+ - Mark GL_ARB_shader_precision as supported by i965/gen7+ - Update release notes Reviewed-by: Andreas Boll Reviewed-by: Francisco Jerez --- docs/features.txt | 8 docs/relnotes/17.1.0.html | 3 +++ 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 3dd4094865..5f63632e82 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -136,17 +136,17 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi GL_ARB_transform_feedback3DONE (i965/gen7+, llvmpipe, softpipe, swr) -GL 4.1, GLSL 4.10 --- all DONE: i965/hsw+, nvc0, r600, radeonsi +GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi GL_ARB_ES2_compatibility DONE (i965, nv50, llvmpipe, softpipe, swr) GL_ARB_get_program_binary DONE (0 binary formats) GL_ARB_separate_shader_objectsDONE (all drivers) - GL_ARB_shader_precision DONE (i965/hsw+, all drivers that support GLSL 4.10) - GL_ARB_vertex_attrib_64bitDONE (i965/hsw+, llvmpipe, softpipe) + GL_ARB_shader_precision DONE (i965/gen7+, all drivers that support GLSL 4.10) + GL_ARB_vertex_attrib_64bitDONE (i965/gen7+, llvmpipe, softpipe) GL_ARB_viewport_array DONE (i965, nv50, llvmpipe, softpipe) -GL 4.2, GLSL 4.20 -- all DONE: i965/hsw+, nvc0, radeonsi +GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, radeonsi GL_ARB_texture_compression_bptc DONE (i965, r600) GL_ARB_compressed_texture_pixel_storage DONE (all drivers) diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html index ba21b61f91..4f3e0030a8 100644 --- a/docs/relnotes/17.1.0.html +++ b/docs/relnotes/17.1.0.html @@ -44,15 +44,18 @@ Note: some of the new features are only available with certain drivers. +OpenGL 4.2 on i965/ivb GL_ARB_gpu_shader_fp64 on i965/ivybridge GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, llvmpipe GL_ARB_shader_ballot on nvc0, radeonsi GL_ARB_shader_clock on nv50, nvc0, radeonsi GL_ARB_shader_group_vote on radeonsi +GL_ARB_shader_precision on i965/ivb GL_ARB_shader_viewport_layer_array on radeonsi GL_ARB_sparse_buffer on radeonsi/CIK+ GL_ARB_transform_feedback2 on i965/gen6 GL_ARB_transform_feedback_overflow_query on i965/gen6+ +GL_ARB_vertex_attrib_64bit on i965/ivb GL_NV_fill_rectangle on nvc0 Geometry shaders enabled on swr ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: enable ARB_shader_precision in gen7+
Module: Mesa Branch: master Commit: 92d4dc76eaec64e99194f3d2afcc55eb7c7b46ba URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=92d4dc76eaec64e99194f3d2afcc55eb7c7b46ba Author: Samuel Iglesias Gonsálvez Date: Mon Oct 17 14:40:06 2016 + i965: enable ARB_shader_precision in gen7+ Reviewed-by: Andreas Boll Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/intel_extensions.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c b/src/mesa/drivers/dri/i965/intel_extensions.c index 53b5eaf8a0..fc974b9860 100644 --- a/src/mesa/drivers/dri/i965/intel_extensions.c +++ b/src/mesa/drivers/dri/i965/intel_extensions.c @@ -216,6 +216,7 @@ intelInitExtensions(struct gl_context *ctx) ctx->Extensions.ARB_shader_clock = true; ctx->Extensions.ARB_shader_image_load_store = true; ctx->Extensions.ARB_shader_image_size = true; + ctx->Extensions.ARB_shader_precision = true; ctx->Extensions.ARB_shader_texture_image_samples = true; ctx->Extensions.ARB_tessellation_shader = true; ctx->Extensions.ARB_texture_compression_bptc = true; @@ -245,7 +246,6 @@ intelInitExtensions(struct gl_context *ctx) } if (brw->gen >= 8 || brw->is_haswell) { - ctx->Extensions.ARB_shader_precision = true; ctx->Extensions.ARB_stencil_texturing = true; ctx->Extensions.ARB_texture_stencil8 = true; ctx->Extensions.OES_geometry_shader = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): 30 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8973ae3162aec112b22cdf58f47d0ee12c4a09cd Author: Samuel Iglesias Gonsálvez Date: Wed Apr 5 06:23:43 2017 +0200 docs/relnotes: add GL_ARB_gpu_shader_fp64 support on i965/ivybridge Signed-off-by: Samuel Iglesias Gonsálvez Acked-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef49dda2df94c8060047b845a3a027460c45ba7c Author: Samuel Iglesias Gonsálvez Date: Tue Oct 11 10:59:52 2016 +0200 docs: mark GL_ARB_gpu_shader_fp64 and OpenGL 4.0 as supported by i965/gen7+ Signed-off-by: Samuel Iglesias Gonsálvez Acked-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a494afdb8e09640956743649354fbb7147231d1d Author: Samuel Iglesias Gonsálvez Date: Fri Aug 26 07:39:04 2016 +0200 i965: enable OpenGL 4.0 to Ivybridge/Baytrail Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd0a6b2fc2ef6e04ffb262072821113cb49cd530 Author: Samuel Iglesias Gonsálvez Date: Fri Aug 26 07:37:42 2016 +0200 i965: enable ARB_gpu_shader_fp64 for Ivybridge/Baytrail Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2eeb1b0ad9453ba135b72aaeec6c0d4dbf9ac87c Author: Matt Turner Date: Fri Jan 20 13:35:33 2017 -0800 i965: Use correct VertStride on align16 instructions. In commit c35fa7a, we changed the "width" of DF source registers to 2, which is conceptually fine. Unfortunately a VertStride of 2 is not allowed by align16 instructions on IVB/BYT, and the regular VertStride of 4 works fine in any case. See generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test for example: cmp.ge.f0(8)g18<1>DFg1<0>.xyxyDF-g8<2>DF{ align16 1Q }; ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed cmp.ge.f0(8)g19<1>DFg1<0>.xyxyDF-g9<2>DF{ align16 2N }; ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed v2: - Add spec quote (Curro). - Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro) Reviewed-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8441e2276912d353d4fc6c0cf6b781ab5153ee7 Author: Samuel Iglesias Gonsálvez Date: Fri Mar 17 11:57:25 2017 +0100 i965/vec4/dce: improve track of partial flag register writes This is required for correctness in presence of multiple 4-wide flag writes (e.g. 4-wide instructions with a conditional mod set) which update a different portion of the same 8-bit flag subregister. Right now we keep track of flag dataflow with 8-bit granularity and consider flag writes to have killed any previous definition of the same subregister even if the write was less than 8 channels wide, which can cause live flag register updates to be dead code-eliminated incorrectly. Signed-off-by: Samuel Iglesias Gonsálvez Reviewed-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c1fc8fad47f60bda857fc45c4052c5f4effe0d84 Author: Samuel Iglesias Gonsálvez Date: Fri Mar 17 11:55:49 2017 +0100 i965/vec4: don't do horizontal stride on some register file types horiz_offset() shouldn't be doing anything for scalar registers, because all channels of any SIMD instructions will end up reading or writing the same component of the register, so shifting the register offset would be wrong. Signed-off-by: Samuel Iglesias Gonsálvez [ Francisco Jerez: Re-implement in terms of is_uniform() for simplicity. Pass argument by const reference. Clarify commit message. ] Reviewed-by: Francisco Jerez URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21e8e3a8484241508ac2c250fc4367234fa337df Author: Matt Turner Date: Fri Jan 20 13:35:32 2017 -0800 i965/vec4: Fix exec size for MOVs {SET,PICK}_{HIGH,LOW}_32BIT. Otherwise for a pack_double_2x32_split opcode, we emit: vec1 64 ssa_135 = pack_double_2x32_split ssa_133, ssa_134 mov(8) g5<1>UD g5<4>.xUD { align16 1Q compacted }; mov(8) g7<2>UD g5<4,4,1>UD { align1 1Q }; ERROR: When the destination spans two registers, the source must span two registers (exceptions for scalar source and packed-word to packed-dword expansion) mov(8) g8<2>UD g5.4<4,4,1>UD { align1 2N }; ERROR: The offset from the two source registers must be the same mov(8)
Mesa (master): i965/fs: Take into account lower frequency of conditional blocks in spilling cost heuristic.
Module: Mesa Branch: master Commit: 147e71242ce539ff28e282f009c332818c35f5ac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=147e71242ce539ff28e282f009c332818c35f5ac Author: Francisco Jerez Date: Sun Apr 9 17:28:58 2017 -0700 i965/fs: Take into account lower frequency of conditional blocks in spilling cost heuristic. The individual branches of an if/else/endif construct will be executed some unknown number of times between 0 and 1 relative to the parent block. Use some factor in between as weight while approximating the cost of spill/fill instructions within a conditional if-else branch. This favors spilling registers used within conditional branches which are likely to be executed less frequently than registers used at the top level. Improves the framerate of the SynMark2 OglCSDof benchmark by ~1.9x on my SKL GT4e. Should have a comparable effect on other platforms. No significant regressions. Reviewed-by: Jason Ekstrand Reviewed-by: Kenneth Graunke Reviewed-by: Matt Turner --- src/intel/compiler/brw_fs_reg_allocate.cpp | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 5c6f3d490f..c981d72e4f 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -806,7 +806,7 @@ emit_spill(const fs_builder &bld, fs_reg src, int fs_visitor::choose_spill_reg(struct ra_graph *g) { - float loop_scale = 1.0; + float block_scale = 1.0; float spill_costs[this->alloc.count]; bool no_spill[this->alloc.count]; @@ -822,23 +822,32 @@ fs_visitor::choose_spill_reg(struct ra_graph *g) foreach_block_and_inst(block, fs_inst, inst, cfg) { for (unsigned int i = 0; i < inst->sources; i++) { if (inst->src[i].file == VGRF) -spill_costs[inst->src[i].nr] += loop_scale; +spill_costs[inst->src[i].nr] += block_scale; } if (inst->dst.file == VGRF) spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE) - * loop_scale; + * block_scale; switch (inst->opcode) { case BRW_OPCODE_DO: -loop_scale *= 10; +block_scale *= 10; break; case BRW_OPCODE_WHILE: -loop_scale /= 10; +block_scale /= 10; break; + case BRW_OPCODE_IF: + case BRW_OPCODE_IFF: + block_scale *= 0.5; + break; + + case BRW_OPCODE_ENDIF: + block_scale /= 0.5; + break; + case SHADER_OPCODE_GEN4_SCRATCH_WRITE: if (inst->src[0].file == VGRF) no_spill[inst->src[0].nr] = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): drirc: Set glsl_zero_init for Kerbal Space Program.
Module: Mesa Branch: master Commit: 0de17f52a515e655682b4b894c44ad9d7308794e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0de17f52a515e655682b4b894c44ad9d7308794e Author: Francisco Jerez Date: Tue Apr 4 14:12:59 2017 -0700 drirc: Set glsl_zero_init for Kerbal Space Program. This fixes the stripes of garbage rendered on the floor of the vehicle assembly building among other rendering issues. The reason for the misrendering seems to be that some of the GLSL shaders used by the application use variables before initializing them, incorrectly assuming that they will be implicitly set to zero by the implementation. Acked-by: Matt Turner --- src/mesa/drivers/dri/common/drirc | 8 1 file changed, 8 insertions(+) diff --git a/src/mesa/drivers/dri/common/drirc b/src/mesa/drivers/dri/common/drirc index 23d09fabb1..14d7713fdc 100644 --- a/src/mesa/drivers/dri/common/drirc +++ b/src/mesa/drivers/dri/common/drirc @@ -128,5 +128,13 @@ TODO: document the other workarounds. + + + + + + + + ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium/tgsi: Treat UCMP sources as floats to match the GLSL-to-TGSI pass expectations.
Module: Mesa Branch: master Commit: e6469ec43b25898e99766a30aa8f54cc64c3bc04 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6469ec43b25898e99766a30aa8f54cc64c3bc04 Author: Francisco Jerez Date: Mon Mar 13 17:31:39 2017 -0700 gallium/tgsi: Treat UCMP sources as floats to match the GLSL-to-TGSI pass expectations. Currently the GLSL-to-TGSI translation pass assumes it can use floating point source modifiers on the UCMP instruction. See the bug report linked below for an example where an unrelated change in the GLSL built-in lowering code for atan2 (e9ffd12827ac11a2d2002a42fa8eb1) caused the generation of floating-point ir_unop_neg instructions followed by ir_triop_csel, which is translated into UCMP with a negate modifier on back-ends with native integer support. Allowing floating-point source modifiers on an integer instruction seems like rather dubious design for a transport IR, since the same semantics could be represented as a sequence of MOV+UCMP instructions instead, but supposedly this matches the expectations of TGSI back-ends other than tgsi_exec, and the expectations of the DX10 API. I take no responsibility for future headaches caused by this inconsistency. Fixes a regression of piglit glsl-fs-tan-1 on softpipe introduced by the above-mentioned glsl front-end commit. Even though the commit that triggered the regression doesn't seem to have made it to any stable branches yet, this might be worth back-porting since I don't see any reason why the bug couldn't have been reproduced before that point. Suggested-by: Roland Scheidegger Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99817 Reviewed-by: Roland Scheidegger --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 54 ++ src/gallium/docs/source/tgsi.rst | 8 +++-- 2 files changed, 46 insertions(+), 16 deletions(-) diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 3c15306..48d91af 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3359,6 +3359,46 @@ exec_up2h(struct tgsi_exec_machine *mach, } static void +micro_ucmp(union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2) +{ + dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; + dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; + dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; + dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; +} + +static void +exec_ucmp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + unsigned int chan; + struct tgsi_exec_vector dst; + + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + union tgsi_exec_channel src[3]; + + fetch_source(mach, &src[0], &inst->Src[0], chan, + TGSI_EXEC_DATA_UINT); + fetch_source(mach, &src[1], &inst->Src[1], chan, + TGSI_EXEC_DATA_FLOAT); + fetch_source(mach, &src[2], &inst->Src[2], chan, + TGSI_EXEC_DATA_FLOAT); + micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); + } + } + for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { + if (inst->Dst[0].Register.WriteMask & (1 << chan)) { + store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, +TGSI_EXEC_DATA_FLOAT); + } + } +} + +static void exec_scs(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -4997,18 +5037,6 @@ micro_uarl(union tgsi_exec_channel *dst, dst->i[3] = src->u[3]; } -static void -micro_ucmp(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} - /** * Signed bitfield extract (i.e. sign-extend the extracted bits) */ @@ -5911,7 +5939,7 @@ exec_instruction( break; case TGSI_OPCODE_UCMP: - exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); + exec_ucmp(mach, inst); break; case TGSI_OPCODE_IABS: diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 18b42fb..9976875 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -28,9 +28,11 @@ Modifiers TGSI supports modifiers on inputs (as well as saturat
Mesa (master): nir/spirv/glsl450: Rewrite atan2 implementation to fix accuracy and handling of zero/ infinity.
Module: Mesa Branch: master Commit: 7215375c445f533e3962a09b8e3b075880c1382f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7215375c445f533e3962a09b8e3b075880c1382f Author: Francisco Jerez Date: Fri Jan 20 15:24:30 2017 -0800 nir/spirv/glsl450: Rewrite atan2 implementation to fix accuracy and handling of zero/infinity. See "glsl: Rewrite atan2 implementation to fix accuracy and handling of zero/infinity." for the rationale, but note that the instruction count benefit discussed there is somewhat less important for the SPIRV implementation, because the current code already emitted no control flow instructions -- Still this saves us one hardware instruction per scalar component on Intel SKL hardware. Fixes the following Vulkan CTS tests on Intel hardware: dEQP-VK.glsl.builtin.precision.atan2.highp_compute.scalar dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec2 dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec3 dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec4 dEQP-VK.glsl.builtin.precision.atan2.mediump_compute.vec2 dEQP-VK.glsl.builtin.precision.atan2.mediump_compute.vec4 Note that most of the test-cases above expect IEEE-compliant handling of atan2(±∞, ±∞), which this patch doesn't explicitly handle, so except for the last two the test-cases above weren't expected to pass yet. The reason they do is that the i965 back-end implementation of the NIR fmin and fmax instructions is not quite GLSL-compliant (it complies with IEEE 754 recommendations though), because fmin/fmax of a NaN and a non-NaN argument currently always return the non-NaN argument, which causes atan() to flush NaN to one and return the expected value. The front-end should probably not be relying on this behavior for correctness though because other back-ends are likely to behave differently -- A follow-up patch will handle the atan2(±∞, ±∞) corner cases explicitly. v2: Fix up argument scaling to take into account the range and precision of exotic FP24 hardware. Flip coordinate system for arguments along the vertical line as if they were on the left half-plane in order to avoid division by zero which may give unspecified results on non-GLSL 4.1-capable hardware. Sprinkle in some more comments. Reviewed-by: Ian Romanick --- src/compiler/spirv/vtn_glsl450.c | 77 1 file changed, 55 insertions(+), 22 deletions(-) diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 0d32fdd..8509f64 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -302,28 +302,61 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x) static nir_ssa_def * build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) { - nir_ssa_def *zero = nir_imm_float(b, 0.0f); - - /* If |x| >= 1.0e-8 * |y|: */ - nir_ssa_def *condition = - nir_fge(b, nir_fabs(b, x), - nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y))); - - /* Then...call atan(y/x) and fix it up: */ - nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x)); - nir_ssa_def *r_then = - nir_bcsel(b, nir_flt(b, x, zero), - nir_fadd(b, atan1, - nir_bcsel(b, nir_fge(b, y, zero), -nir_imm_float(b, M_PIf), -nir_imm_float(b, -M_PIf))), - atan1); - - /* Else... */ - nir_ssa_def *r_else = - nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f)); - - return nir_bcsel(b, condition, r_then, r_else); + nir_ssa_def *zero = nir_imm_float(b, 0); + nir_ssa_def *one = nir_imm_float(b, 1); + + /* If we're on the left half-plane rotate the coordinates π/2 clock-wise +* for the y=0 discontinuity to end up aligned with the vertical +* discontinuity of atan(s/t) along t=0. This also makes sure that we +* don't attempt to divide by zero along the vertical line, which may give +* unspecified results on non-GLSL 4.1-capable hardware. +*/ + nir_ssa_def *flip = nir_fge(b, zero, x); + nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y); + nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x)); + + /* If the magnitude of the denominator exceeds some huge value, scale down +* the arguments in order to prevent the reciprocal operation from flushing +* its result to zero, which would cause precision problems, and for s +* infinite would cause us to return a NaN instead of the correct finite +* value. +* +* If fmin and fmax are respectively the smallest and largest positive +* normalized floating point values representable by the implementation, +* the constants below should be in agreement with: +* +*huge <= 1 / fmin +*scale <= 1 / fmin / fmax (for |t| >= huge) +* +* In addition scale should be a negative power of two in ord
Mesa (master): glsl: Fix constant evaluation of the rcp op.
Module: Mesa Branch: master Commit: 6643a97de308bc100a497f18fed8819f6f6f570b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6643a97de308bc100a497f18fed8819f6f6f570b Author: Francisco Jerez Date: Tue Jan 24 11:41:46 2017 -0800 glsl: Fix constant evaluation of the rcp op. Will avoid a regression in a future commit that introduces some additional rcp operations. According to the GLSL 4.10 specification: "Dividing by 0 results in the appropriately signed IEEE Inf." Reviewed-by: Ian Romanick Reviewed-by: Juan A. Suarez Romero --- src/compiler/glsl/ir_expression_operation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py index f91ac9b..4ac1ffb 100644 --- a/src/compiler/glsl/ir_expression_operation.py +++ b/src/compiler/glsl/ir_expression_operation.py @@ -422,7 +422,7 @@ ir_expression_operation = [ operation("neg", 1, source_types=numeric_types, c_expression={'u': "-((int) {src0})", 'default': "-{src0}"}), operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': "{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})", 'i64': "{src0} < 0 ? -{src0} : {src0}"}), operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': "({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 'd': "double(({src0} > 0.0) - ({src0} < 0.0))", 'i64': "({src0} > 0) - ({src0} < 0)"}), - operation("rcp", 1, source_types=real_types, c_expression={'f': "{src0} != 0.0F ? 1.0F / {src0} : 0.0F", 'd': "{src0} != 0.0 ? 1.0 / {src0} : 0.0"}), + operation("rcp", 1, source_types=real_types, c_expression={'f': "1.0F / {src0}", 'd': "1.0 / {src0}"}), operation("rsq", 1, source_types=real_types, c_expression={'f': "1.0F / sqrtf({src0})", 'd': "1.0 / sqrt({src0})"}), operation("sqrt", 1, source_types=real_types, c_expression={'f': "sqrtf({src0})", 'd': "sqrt({src0})"}), operation("exp", 1, source_types=(float_type,), c_expression="expf({src0})"), # Log base e on gentype ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Rewrite atan2 implementation to fix accuracy and handling of zero/ infinity.
Module: Mesa Branch: master Commit: e9ffd12827ac11a2d2002a42fa8eb1df847153ba URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9ffd12827ac11a2d2002a42fa8eb1df847153ba Author: Francisco Jerez Date: Sat Jan 21 13:41:08 2017 -0800 glsl: Rewrite atan2 implementation to fix accuracy and handling of zero/infinity. This addresses several issues of the current atan2 implementation: - Negative zero (and negative denorms which end up getting flushed to zero) isn't handled correctly by the current implementation. The reason is that it does 'y >= 0' and 'x < 0' comparisons to decide on which side of the branch cut the argument is, which causes us to return incorrect results (off by up to 2π) for very small negative values. - There is a serious precision problem for x values of large enough magnitude introduced by the floating point division operation being implemented as a mul+rcp sequence. This can lead to the quotient getting flushed to zero in some cases introducing an error of over 8e6 ULP in the result -- Or in the most catastrophic case will cause us to return NaN instead of the correct value ±π/2 for y=±∞ and x very large. We can fix this easily by scaling down both arguments when the absolute value of the denominator goes above certain threshold. The error of this atan2 implementation remains below 25 ULP in most of its domain except for a neighborhood of y=0 where it reaches a maximum error of about 180 ULP. - It emits a bunch of instructions including no less than three if-else branches per scalar component that don't seem to get optimized out later on. This implementation uses about 13% less instructions on Intel SKL hardware and doesn't emit any control flow instructions. v2: Fix up argument scaling to take into account the range and precision of exotic FP24 hardware. Flip coordinate system for arguments along the vertical line as if they were on the left half-plane in order to avoid division by zero which may give unspecified results on non-GLSL 4.1-capable hardware. Sprinkle in some more comments. Reviewed-by: Ian Romanick --- src/compiler/glsl/builtin_functions.cpp | 96 - 1 file changed, 60 insertions(+), 36 deletions(-) diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 4a6c5af..432df65 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3560,44 +3560,68 @@ builtin_builder::_acos(const glsl_type *type) ir_function_signature * builtin_builder::_atan2(const glsl_type *type) { - ir_variable *vec_y = in_var(type, "vec_y"); - ir_variable *vec_x = in_var(type, "vec_x"); - MAKE_SIG(type, always_available, 2, vec_y, vec_x); - - ir_variable *vec_result = body.make_temp(type, "vec_result"); - ir_variable *r = body.make_temp(glsl_type::float_type, "r"); - for (int i = 0; i < type->vector_elements; i++) { - ir_variable *y = body.make_temp(glsl_type::float_type, "y"); - ir_variable *x = body.make_temp(glsl_type::float_type, "x"); - body.emit(assign(y, swizzle(vec_y, i, 1))); - body.emit(assign(x, swizzle(vec_x, i, 1))); - - /* If |x| >= 1.0e-8 * |y|: */ - ir_if *outer_if = - new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y; - - ir_factory outer_then(&outer_if->then_instructions, mem_ctx); - - /* Then...call atan(y/x) */ - do_atan(outer_then, glsl_type::float_type, r, div(y, x)); - - /* ...and fix it up: */ - ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); - inner_if->then_instructions.push_tail( - if_tree(gequal(y, imm(0.0f)), - assign(r, add(r, imm(M_PIf))), - assign(r, sub(r, imm(M_PIf); - outer_then.emit(inner_if); - - /* Else... */ - outer_if->else_instructions.push_tail( - assign(r, mul(sign(y), imm(M_PI_2f; + const unsigned n = type->vector_elements; + ir_variable *y = in_var(type, "y"); + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 2, y, x); - body.emit(outer_if); + /* If we're on the left half-plane rotate the coordinates π/2 clock-wise +* for the y=0 discontinuity to end up aligned with the vertical +* discontinuity of atan(s/t) along t=0. This also makes sure that we +* don't attempt to divide by zero along the vertical line, which may give +* unspecified results on non-GLSL 4.1-capable hardware. +*/ + ir_variable *flip = body.make_temp(glsl_type::bvec(n), "flip"); + body.emit(assign(flip, gequal(imm(0.0f, n), x))); + ir_variable *s = body.make_temp(type, "s"); + body.emit(assign(s, csel(flip, abs(x), y))); + ir_variable *t = bo
Mesa (master): i965/fs: Fix nir_op_fsign of absolute value.
Module: Mesa Branch: master Commit: 69042a5be4664c7928a21bd23e4f6795bfb19f60 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=69042a5be4664c7928a21bd23e4f6795bfb19f60 Author: Francisco Jerez Date: Tue Jan 24 12:26:54 2017 -0800 i965/fs: Fix nir_op_fsign of absolute value. This does point at the front-end emitting silly code that could have been optimized out, but the current fsign implementation would emit bogus IR if abs was set for the argument (because it would apply the abs modifier on an unsigned integer type), and we shouldn't rely on the upper layer's optimization passes for correctness. Reviewed-by: Ian Romanick --- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index e1ab598..e0c2fa0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -701,7 +701,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr) break; case nir_op_fsign: { - if (type_sz(op[0].type) < 8) { + if (op[0].abs) { + /* Straightforward since the source can be assumed to be + * non-negative. + */ + set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0])); + set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(result, brw_imm_f(1.0f))); + + } else if (type_sz(op[0].type) < 8) { /* AND(val, 0x8000) gives the sign bit. * * Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Implement IEEE-compliant handling of atan2( ±∞, ±∞).
Module: Mesa Branch: master Commit: 013d40d1ceb1c23e8a95c8e4dbbb8cab581be919 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=013d40d1ceb1c23e8a95c8e4dbbb8cab581be919 Author: Francisco Jerez Date: Tue Jan 24 13:43:07 2017 -0800 glsl: Implement IEEE-compliant handling of atan2(±∞, ±∞). Reviewed-by: Ian Romanick Reviewed-by: Juan A. Suarez Romero --- src/compiler/glsl/builtin_functions.cpp | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp index 432df65..b8f9d8f 100644 --- a/src/compiler/glsl/builtin_functions.cpp +++ b/src/compiler/glsl/builtin_functions.cpp @@ -3604,11 +3604,31 @@ builtin_builder::_atan2(const glsl_type *type) body.emit(assign(rcp_scaled_t, rcp(mul(t, scale; ir_expression *s_over_t = mul(mul(s, scale), rcp_scaled_t); + /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily +* that ∞/∞ = 1) in order to comply with the rather artificial rules +* inherited from IEEE 754-2008, namely: +* +* "atan2(±∞, −∞) is ±3π/4 +* atan2(±∞, +∞) is ±π/4" +* +* Note that this is inconsistent with the rules for the neighborhood of +* zero that are based on iterated limits: +* +* "atan2(±0, −0) is ±π +* atan2(±0, +0) is ±0" +* +* but GLSL specifically allows implementations to deviate from IEEE rules +* at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as +* well). +*/ + ir_expression *tan = csel(equal(abs(x), abs(y)), + imm(1.0f, n), abs(s_over_t)); + /* Calculate the arctangent and fix up the result if we had flipped the * coordinate system. */ ir_variable *arc = body.make_temp(type, "arc"); - do_atan(body, type, arc, abs(s_over_t)); + do_atan(body, type, arc, tan); body.emit(assign(arc, add(arc, mul(b2f(flip), imm(M_PI_2f); /* Rather convoluted calculation of the sign of the result. When x < 0 we ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nir/spirv/glsl450: Implement IEEE-compliant handling of atan2(±∞, ±∞).
Module: Mesa Branch: master Commit: 11e9ebbf15ecf49d7ef02c2ec6c2d9d3ff0f1b6e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=11e9ebbf15ecf49d7ef02c2ec6c2d9d3ff0f1b6e Author: Francisco Jerez Date: Mon Jan 23 23:36:46 2017 -0800 nir/spirv/glsl450: Implement IEEE-compliant handling of atan2(±∞, ±∞). Reviewed-by: Ian Romanick Reviewed-by: Juan A. Suarez Romero --- src/compiler/spirv/vtn_glsl450.c | 22 +- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c index 8509f64..dd38cc9 100644 --- a/src/compiler/spirv/vtn_glsl450.c +++ b/src/compiler/spirv/vtn_glsl450.c @@ -339,12 +339,32 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x) nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale)); nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t); + /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily +* that ∞/∞ = 1) in order to comply with the rather artificial rules +* inherited from IEEE 754-2008, namely: +* +* "atan2(±∞, −∞) is ±3π/4 +* atan2(±∞, +∞) is ±π/4" +* +* Note that this is inconsistent with the rules for the neighborhood of +* zero that are based on iterated limits: +* +* "atan2(±0, −0) is ±π +* atan2(±0, +0) is ±0" +* +* but GLSL specifically allows implementations to deviate from IEEE rules +* at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as +* well). +*/ + nir_ssa_def *tan = nir_bcsel(b, nir_feq(b, nir_fabs(b, x), nir_fabs(b, y)), +one, nir_fabs(b, s_over_t)); + /* Calculate the arctangent and fix up the result if we had flipped the * coordinate system. */ nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip), nir_imm_float(b, M_PI_2f)), - build_atan(b, nir_fabs(b, s_over_t))); + build_atan(b, tan)); /* Rather convoluted calculation of the sign of the result. When x < 0 we * cannot use fsign because we need to be able to distinguish between ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa/program: Translate csel operation from GLSL IR.
Module: Mesa Branch: master Commit: e81130d7a146fe6a750bf903e910dc2c7c90d513 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e81130d7a146fe6a750bf903e910dc2c7c90d513 Author: Francisco Jerez Date: Mon Jan 23 23:53:03 2017 -0800 mesa/program: Translate csel operation from GLSL IR. This will be used internally by the GLSL front-end in order to implement some built-in functions. Plumb it through MESA IR for back-ends that rely on this translation pass. v2: Add comment. Reviewed-by: Ian Romanick Reviewed-by: Juan A. Suarez Romero --- src/mesa/program/ir_to_mesa.cpp | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 0ae797f..dc5f801 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -1360,13 +1360,20 @@ ir_to_mesa_visitor::visit(ir_expression *ir) emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); break; + case ir_triop_csel: + /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP + * selects src1 if src0 is < 0, src2 otherwise. + */ + op[0].negate = ~op[0].negate; + emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); + break; + case ir_binop_vector_extract: case ir_triop_fma: case ir_triop_bitfield_extract: case ir_triop_vector_insert: case ir_quadop_bitfield_insert: case ir_binop_ldexp: - case ir_triop_csel: case ir_binop_carry: case ir_binop_borrow: case ir_binop_imul_high: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl/ir_builder: Add rcp builder.
Module: Mesa Branch: master Commit: 7ec3af3f8ff6584542f029c28abc2bcae1402259 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ec3af3f8ff6584542f029c28abc2bcae1402259 Author: Francisco Jerez Date: Mon Jan 23 23:59:45 2017 -0800 glsl/ir_builder: Add rcp builder. Reviewed-by: Ian Romanick Reviewed-by: Juan A. Suarez Romero --- src/compiler/glsl/ir_builder.cpp | 6 ++ src/compiler/glsl/ir_builder.h | 1 + 2 files changed, 7 insertions(+) diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp index 0cee856..8d61533 100644 --- a/src/compiler/glsl/ir_builder.cpp +++ b/src/compiler/glsl/ir_builder.cpp @@ -315,6 +315,12 @@ exp(operand a) } ir_expression * +rcp(operand a) +{ + return expr(ir_unop_rcp, a); +} + +ir_expression * rsq(operand a) { return expr(ir_unop_rsq, a); diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h index 5ee9412..ff1ff70 100644 --- a/src/compiler/glsl/ir_builder.h +++ b/src/compiler/glsl/ir_builder.h @@ -148,6 +148,7 @@ ir_expression *neg(operand a); ir_expression *sin(operand a); ir_expression *cos(operand a); ir_expression *exp(operand a); +ir_expression *rcp(operand a); ir_expression *rsq(operand a); ir_expression *sqrt(operand a); ir_expression *log(operand a); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Check for executables before enqueueing a kernel
Module: Mesa Branch: master Commit: 4e0d171d7eb6accbf8f381530eedbc9ff86b54fb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e0d171d7eb6accbf8f381530eedbc9ff86b54fb Author: Pierre Moreau Date: Fri Dec 30 00:29:20 2016 +0100 clover: Check for executables before enqueueing a kernel Without this check, the kernel::bind() method would fail with a std::out_of_range exception, letting an exception escape from the library into the client, rather than returning the corresponding error code CL_INVALID_PROGRAM_EXECUTABLE. Signed-off-by: Pierre Moreau Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/api/kernel.cpp | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp b/src/gallium/state_trackers/clover/api/kernel.cpp index 73ba34a..b665773 100644 --- a/src/gallium/state_trackers/clover/api/kernel.cpp +++ b/src/gallium/state_trackers/clover/api/kernel.cpp @@ -215,7 +215,10 @@ namespace { }, kern.args())) throw error(CL_INVALID_KERNEL_ARGS); - if (!count(q.device(), kern.program().devices())) + // If the command queue's device is not associated to the program, we get + // a module, with no sections, which will also fail the following test. + auto &m = kern.program().build(q.device()).binary; + if (!any_of(type_equals(module::section::text_executable), m.secs)) throw error(CL_INVALID_PROGRAM_EXECUTABLE); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Use Clang's diagnostics
Module: Mesa Branch: master Commit: d9fef848a651b47520cbeb72c38b93d4fbf842a8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9fef848a651b47520cbeb72c38b93d4fbf842a8 Author: Vedran Miletić Date: Wed Dec 21 13:49:36 2016 +0100 clover: Use Clang's diagnostics Presently errors from frontend are handled only if they occur in clang::CompilerInvocation::CreateFromArgs(). This patch uses clang::DiagnosticsEngine to detect errors such as invalid values for Clang frontend arguments. Fixes Piglit's cl/program/build/fail/invalid-version-declaration.cl test. v2: fix inconsistent code formatting Signed-off-by: Vedran Miletić Reviewed-by: Francisco Jerez Tested-by: Aaron Watry --- src/gallium/state_trackers/clover/llvm/invocation.cpp | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 675cf19..f63ff3d 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -98,8 +98,9 @@ namespace { const std::vector &opts, std::string &r_log) { std::unique_ptr c { new clang::CompilerInstance }; + clang::TextDiagnosticBuffer *diag_buffer = new clang::TextDiagnosticBuffer; clang::DiagnosticsEngine diag { new clang::DiagnosticIDs, -new clang::DiagnosticOptions, new clang::TextDiagnosticBuffer }; +new clang::DiagnosticOptions, diag_buffer }; // Parse the compiler options. A file name should be present at the end // and must have the .cl extension in order for the CompilerInvocation @@ -111,6 +112,10 @@ namespace { c->getInvocation(), copts.data(), copts.data() + copts.size(), diag)) throw invalid_build_options_error(); + diag_buffer->FlushDiagnostics(diag); + if (diag.hasErrorOccurred()) + throw invalid_build_options_error(); + c->getTargetOpts().CPU = target.cpu; c->getTargetOpts().Triple = target.triple; c->getLangOpts().NoBuiltin = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): anv: Fix uniform and storage buffer offset alignment limits.
Module: Mesa Branch: master Commit: 79d08ed3d21bef21881303f320706ebb2098a50a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d08ed3d21bef21881303f320706ebb2098a50a Author: Francisco Jerez Date: Thu Dec 15 13:34:02 2016 -0800 anv: Fix uniform and storage buffer offset alignment limits. This fixes a regression in a bunch of image store vulkan CTS tests from commit ad38ba113491869ab0dffed937f7b3dd50e8a735, which started using OWORD block read messages to implement UBO loads. The reason for the failure is that we were giving bogus buffer alignment limits to the application (1B), so the CTS would happily come back with descriptor sets pointing at not even word-aligned uniform buffer addresses. Surprisingly the sampler messages used to fetch pull constants before that commit were able to cope with the non-texel aligned addresses, but the dataport messages used to fetch pull constants after that commit and the ones used to access storage buffers (before and after the same commit) aren't as permissive with unaligned addresses. Cc: Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99097 Reported-by: Mark Janes Reviewed-by: Jason Ekstrand --- src/intel/vulkan/anv_device.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index e3d278d..9245e5c 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -582,8 +582,8 @@ void anv_GetPhysicalDeviceProperties( .viewportSubPixelBits = 13, /* We take a float? */ .minMemoryMapAlignment= 4096, /* A page */ .minTexelBufferOffsetAlignment= 1, - .minUniformBufferOffsetAlignment = 1, - .minStorageBufferOffsetAlignment = 1, + .minUniformBufferOffsetAlignment = 16, + .minStorageBufferOffsetAlignment = 4, .minTexelOffset = -8, .maxTexelOffset = 7, .minTexelGatherOffset = -32, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode.
Module: Mesa Branch: master Commit: 23caf75182d010a60e2d8c8633acaacb3e7c065d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=23caf75182d010a60e2d8c8633acaacb3e7c065d Author: Francisco Jerez Date: Wed Apr 22 21:37:46 2015 +0300 i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode. Not used anymore. It was just a scalar MOV. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h| 1 - src/mesa/drivers/dri/i965/brw_fs.h | 3 --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 -- src/mesa/drivers/dri/i965/brw_shader.cpp | 2 -- 4 files changed, 33 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 1875380..a07d307 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1119,7 +1119,6 @@ enum opcode { FS_OPCODE_MOV_DISPATCH_TO_FLAGS, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_SAMPLE_ID, - FS_OPCODE_SET_SIMD4X2_OFFSET, FS_OPCODE_PACK_HALF_2x16_SPLIT, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 941c05f..d0e272b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -442,9 +442,6 @@ private: struct brw_reg src0, struct brw_reg src1); - void generate_set_simd4x2_offset(fs_inst *inst, -struct brw_reg dst, -struct brw_reg offset); void generate_discard_jump(fs_inst *inst); void generate_pack_half_2x16_split(fs_inst *inst, diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index db61d8e..aed3c72 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1379,29 +1379,6 @@ fs_generator::generate_pixel_interpolator_query(fs_inst *inst, inst->size_written / REG_SIZE); } - -/** - * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant - * sampler LD messages. - * - * We don't want to bake it into the send message's code generation because - * that means we don't get a chance to schedule the instructions. - */ -void -fs_generator::generate_set_simd4x2_offset(fs_inst *inst, - struct brw_reg dst, - struct brw_reg value) -{ - assert(value.file == BRW_IMMEDIATE_VALUE); - - brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); - brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); - brw_pop_insn_state(p); -} - /* Sets vstride=1, width=4, hstride=0 of register src1 during * the ADD instruction. */ @@ -2004,10 +1981,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_memory_fence(p, dst); break; - case FS_OPCODE_SET_SIMD4X2_OFFSET: - generate_set_simd4x2_offset(inst, dst, src[0]); - break; - case SHADER_OPCODE_FIND_LIVE_CHANNEL: { const struct brw_reg mask = brw_stage_has_packed_dispatch(devinfo, stage, diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 25f745d..afab4aa 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -363,8 +363,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case FS_OPCODE_SET_SAMPLE_ID: return "set_sample_id"; - case FS_OPCODE_SET_SIMD4X2_OFFSET: - return "set_simd4x2_offset"; case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Expose arbitrary pull constant load sizes to the IR.
Module: Mesa Branch: master Commit: 9b22a0d295316b7547667ebbfe1e1b6182439186 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b22a0d295316b7547667ebbfe1e1b6182439186 Author: Francisco Jerez Date: Thu Dec 8 20:05:18 2016 -0800 i965/fs: Expose arbitrary pull constant load sizes to the IR. Change the FS generator to ask the dataport for enough owords worth of constants to fill the execution size of the instruction -- Which means that the visitor now needs to set the execution size correctly for uniform pull constant load instructions, which we were kind of neglecting until now. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu_emit.c| 15 +++--- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 -- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 + 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 6141bfb..8536a13 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port constant cache. + * Read float[4] vectors from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p, const unsigned target_cache = (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE : BRW_DATAPORT_READ_TARGET_DATA_CACHE); + const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); /* On newer hardware, offset is in units of owords. */ if (devinfo->gen >= 6) @@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); - brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_set_default_predicate_control(p, BRW_PREDICATE_NONE); brw_set_default_compression_control(p, BRW_COMPRESSION_NONE); brw_set_default_mask_control(p, BRW_MASK_DISABLE); + brw_push_insn_state(p); + brw_set_default_exec_size(p, BRW_EXECUTE_8); brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); /* set message header global offset field (reg 0, element 2) */ @@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p, mrf.nr, 2), BRW_REGISTER_TYPE_UD), brw_imm_ud(offset)); + brw_pop_insn_state(p); brw_inst *insn = next_insn(p, BRW_OPCODE_SEND); @@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p, brw_inst_set_base_mrf(devinfo, insn, mrf.nr); } - brw_set_dp_read_message(p, - insn, - bind_table_index, - BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW, + brw_set_dp_read_message(p, insn, bind_table_index, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, target_cache, 1, /* msg_length */ true, /* header_present */ - 1); /* response_length (1 reg, 2 owords!) */ + DIV_ROUND_UP(exec_size, 8)); /* response_length */ brw_pop_insn_state(p); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b22dc9a..977fd8c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads() assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(8, 0); + const fs_builder ubld = ibld.exec_all().group(4, 0); struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, dst, brw_imm_ud(index), offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 8b9fa8e..93f4c41 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, struct brw_reg index, struct brw_reg offset) { + assert(type_sz(dst.type) == 4); assert(inst->mlen != 0); assert(index.file == BRW_IMMEDIATE_VALUE && @@ -1149,27 +1150,25 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, { assert(index.type == BRW_REGISTER_TYPE_UD); assert(pa
Mesa (master): i965/fs: Switch to the constant cache for uniform pull constants.
Module: Mesa Branch: master Commit: ad38ba113491869ab0dffed937f7b3dd50e8a735 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad38ba113491869ab0dffed937f7b3dd50e8a735 Author: Francisco Jerez Date: Wed Oct 26 14:25:06 2016 -0700 i965/fs: Switch to the constant cache for uniform pull constants. This reverts to using the oword block read messages for uniform pull constant loads, as used to be the case until 4c1fdae0a01b3f92ec03b61aac1d3df5. There are two important differences though: Now the L3 cacheability bits are set up correctly for UBOs (since 11f5d8a5d4fbb861ec161f68593e429cbd65d1cd), and we target the constant cache instead of the data cache. The latter used to get no L3 way allocation on boot on all platforms that existed at the time, so oword read messages wouldn't get cached on L3 regardless of the MOCS bits, what probably explains the apparent slowness of oword fetches. Constant cache loads seem to perform better than SIMD4x2 sampler loads in a number of cases, they alleviate some of the cache thrashing caused by the competition with textures for the L1/L2 sampler caches, and they allow fetching up to 128B worth of constants with a single oword fetch message. Note that IVB devices suffer from a hardware bug that leads to serialization of L3 read requests overlapping the same cacheline as result of a (on IVB buggy) mechanism of the L3 to preserve coherency. Since read requests for matching cachelines from any L3 client are not pipelined, throughput may decrease in cases where there are no non-overlapping requests left in the queue that can be processed between them. This situation should be relatively uncommon as long as we make sure that we don't use the 1/2 oword messages in cases where the shader intends to read from any other location of the same cacheline at some other point. This is generally a good idea anyway on all generations because using the 1 and 2 oword messages is expected to waste bandwidth since the minimum L3 request size for the DC is exactly 4 owords (i.e. one cacheline). A future commit will have this effect. I haven't been able to find any real-world example where this would still result in a regression on IVB, but if someone happens to find one it shouldn't be too difficult to add an IVB-specific check to have it fall back to the sampler cache for pull constant loads. Note that on SKL+ this change has the additional benefit of reducing the register footprint of pull constant loads. The following table summarizes the effect of the whole series on several shader-db stats: Total instructions Total cycles BWR: 4571248 -> 4568342 (-0.06%) 123375740 -> 123373296 (-0.00%) ELK: 3989020 -> 3985402 (-0.09%) 98757068 -> 98754058 (-0.00%) ILK: 6383591 -> 6376787 (-0.11%) 143649910 -> 143648914 (-0.00%) SNB: 7528395 -> 7501446 (-0.36%) 103503796 -> 102460370 (-1.01%) IVB: 6949221 -> 6943317 (-0.08%) 60592262 -> 60584422 (-0.01%) HSW: 6409753 -> 6403702 (-0.09%) 60609070 -> 60604414 (-0.01%) BDW: 8043467 -> 7976364 (-0.83%) 68427730 -> 68483042 (0.08%) CHV: 8045019 -> 7977916 (-0.83%) 68297426 -> 68352756 (0.08%) SKL: 8204037 -> 7939086 (-3.23%) 66583900 -> 65624378 (-1.44%) Lost->Gained Total spills Total fills BWR: 5 -> 51488 -> 1488 (0.00%) 1957 -> 1957 (0.00%) ELK: 5 -> 51489 -> 1489 (0.00%) 1958 -> 1958 (0.00%) ILK: 1 -> 41449 -> 1449 (0.00%) 1921 -> 1921 (0.00%) SNB: 0 -> 0 549 -> 549 (0.00%) 52 -> 52 (0.00%) IVB: 13 -> 31271 -> 1271 (0.00%) 1162 -> 1162 (0.00%) HSW: 11 -> 01271 -> 1271 (0.00%) 1162 -> 1162 (0.00%) BDW: 12 -> 01340 -> 1340 (0.00%) 1452 -> 1452 (0.00%) CHV: 12 -> 01340 -> 1340 (0.00%) 1452 -> 1452 (0.00%) SKL: 0 -> 1201269 -> 375 (-70.45%) 1563 -> 690 (-55.85%) v3: Non-trivial rebase. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu_emit.c| 5 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 42 +++--- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 78 +- 4 files changed, 36 insertions(+), 91 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 72b6df6..341f543 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2266,7 +2266,7 @@ gen7_block_read_scratch(struct brw_codegen *p, } /** - * Read a float[4] vector from the data port Data Cache (const buffer). + * Read a float[4] vector from the data port constant cache. * Location (in buffer) should be a multiple of 16. * Used for fetching shader constants. */ @@ -2278,8 +2278,7 @@ void brw_oword_block_read(struct brw_codegen *p, { const struct gen_device_info *devinfo = p->de
Mesa (master): i965/fs: Drop useless access mode override from pull constant generator code.
Module: Mesa Branch: master Commit: e014058195540a3e54085903821beca70f8f2ec5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e014058195540a3e54085903821beca70f8f2ec5 Author: Francisco Jerez Date: Thu Dec 8 19:08:33 2016 -0800 i965/fs: Drop useless access mode override from pull constant generator code. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 93f4c41..db61d8e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -1175,7 +1175,6 @@ fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); @@ -1311,7 +1310,6 @@ fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); - brw_set_default_access_mode(p, BRW_ALIGN_1); /* a0.0 = surf_index & 0xff */ brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/disasm: Decode dataport constant cache control fields.
Module: Mesa Branch: master Commit: fd3120d85c295eeeb3b6c9a60372506ae48f5fdb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd3120d85c295eeeb3b6c9a60372506ae48f5fdb Author: Francisco Jerez Date: Thu Dec 8 22:14:59 2016 -0800 i965/disasm: Decode dataport constant cache control fields. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_disasm.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c b/src/mesa/drivers/dri/i965/brw_disasm.c index 5e51be7..5930e44 100644 --- a/src/mesa/drivers/dri/i965/brw_disasm.c +++ b/src/mesa/drivers/dri/i965/brw_disasm.c @@ -1410,6 +1410,7 @@ brw_disassemble_inst(FILE *file, const struct gen_device_info *devinfo, } break; case GEN6_SFID_DATAPORT_SAMPLER_CACHE: + case GEN6_SFID_DATAPORT_CONSTANT_CACHE: /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */ if (devinfo->gen >= 6) { format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64")", ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/gen6+: Invalidate constant cache on brw_emit_mi_flush( ).
Module: Mesa Branch: master Commit: 591e14ec08b13e8d50636feb1afa578257175b9d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=591e14ec08b13e8d50636feb1afa578257175b9d Author: Francisco Jerez Date: Thu Dec 8 18:00:17 2016 -0800 i965/gen6+: Invalidate constant cache on brw_emit_mi_flush(). In order to make sure that the constant cache is coherent with previous rendering when we start using it for pull constant loads. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_pipe_control.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c b/src/mesa/drivers/dri/i965/brw_pipe_control.c index dd426bf..b8f7406 100644 --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c @@ -351,6 +351,7 @@ brw_emit_mi_flush(struct brw_context *brw) int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH; if (brw->gen >= 6) { flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE | + PIPE_CONTROL_CONST_CACHE_INVALIDATE | PIPE_CONTROL_DEPTH_CACHE_FLUSH | PIPE_CONTROL_VF_CACHE_INVALIDATE | PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965: Let the caller of brw_set_dp_write/ read_message control the target cache.
Module: Mesa Branch: master Commit: 3c78d31374422b028b19afa5799689c404a5b73e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c78d31374422b028b19afa5799689c404a5b73e Author: Francisco Jerez Date: Thu Apr 23 14:36:16 2015 +0300 i965: Let the caller of brw_set_dp_write/read_message control the target cache. brw_set_dp_read_message already had a target_cache argument, but its interpretation was rather convoluted (on Gen6 the render cache was used if the caller asked for it, otherwise it was ignored using the sampler cache instead), and the constant cache wasn't representable at all. brw_set_dp_write_message used the data cache on Gen7+ except for RENDER_TARGET_WRITE messages, in which case it would use the render cache. On Gen6 the render cache was always used. Instead of the above, provide the shared unit SFID that the caller expects will be used. Makes no functional changes. v3: Non-trivial rebase. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c | 69 +++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 -- 3 files changed, 43 insertions(+), 42 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 737a335..c44896b 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -233,6 +233,7 @@ void brw_set_dp_write_message(struct brw_codegen *p, unsigned binding_table_index, unsigned msg_control, unsigned msg_type, + unsigned target_cache, unsigned msg_length, bool header_present, unsigned last_render_target, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index ca04221..72b6df6 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -706,6 +706,7 @@ brw_set_dp_write_message(struct brw_codegen *p, unsigned binding_table_index, unsigned msg_control, unsigned msg_type, + unsigned target_cache, unsigned msg_length, bool header_present, unsigned last_render_target, @@ -714,20 +715,8 @@ brw_set_dp_write_message(struct brw_codegen *p, unsigned send_commit_msg) { const struct gen_device_info *devinfo = p->devinfo; - unsigned sfid; - - if (devinfo->gen >= 7) { - /* Use the Render Cache for RT writes; otherwise use the Data Cache */ - if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE) -sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else -sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - } else if (devinfo->gen == 6) { - /* Use the render cache for all write messages. */ - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_WRITE; - } + const unsigned sfid = (devinfo->gen >= 6 ? target_cache : + BRW_SFID_DATAPORT_WRITE); brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, header_present, end_of_thread); @@ -753,26 +742,8 @@ brw_set_dp_read_message(struct brw_codegen *p, unsigned response_length) { const struct gen_device_info *devinfo = p->devinfo; - unsigned sfid; - - if (devinfo->gen >= 7) { - if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) - sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE) - sfid = GEN7_SFID_DATAPORT_DATA_CACHE; - else if (target_cache == BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE) - sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; - else - unreachable("Invalid target cache"); - - } else if (devinfo->gen == 6) { - if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE) -sfid = GEN6_SFID_DATAPORT_RENDER_CACHE; - else -sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE; - } else { - sfid = BRW_SFID_DATAPORT_READ; - } + const unsigned sfid = (devinfo->gen >= 6 ? target_cache : + BRW_SFID_DATAPORT_READ); brw_set_message_descriptor(p, insn, sfid, msg_length, response_length, header_present, false); @@ -2073,6 +2044,10 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, unsigned offset) { const struct gen_device_info *devinfo = p->devinfo; + const unsigned target_cache = + (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : + devinfo->gen >= 6 ? GEN6_SFID_DAT
Mesa (master): i965/fs: Fetch one cacheline of pull constants at a time.
Module: Mesa Branch: master Commit: b56fa830c6095f8226456b2aeb62f2dfad804be5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b56fa830c6095f8226456b2aeb62f2dfad804be5 Author: Francisco Jerez Date: Thu Dec 8 19:18:00 2016 -0800 i965/fs: Fetch one cacheline of pull constants at a time. Asking the DC for less than one cacheline (4 owords) of data for uniform pull constants is suboptimal because the DC cannot request less than that from L3, resulting in wasted bandwidth and unnecessary message dispatch overhead, and exacerbating the IVB L3 serialization bug. The following table summarizes the overall framerate improvement (with statistical significance of 5% and sample size ~10) from the whole series up to this patch for several benchmarks and hardware generations: | SKL | BDW | HSW SynMark2 OglShMapPcf | 24.63% ±0.45% | 4.01% ±0.70% | 10.31% ±0.38% GfxBench4 gl_manhattan31 | 5.93% ±0.35% | 3.92% ±0.31% | 6.62% ±0.22% GfxBench4 gl_4 | 2.52% ±0.44% | 1.23% ±0.10% | N/A Unigine Valley | 0.83% ±0.17% | 0.23% ±0.05% | 0.74% ±0.45% Note that there are two versions of the Manhattan demo shipped with GfxBench4, one of them is the original gl_manhattan demo which doesn't use UBOs, so this patch will have no effect on it, and another one is the gl_manhattan31 demo based on GL 4.3/GLES 3.1, which this patch benefits as shown above. I haven't observed any statistically significant regressions in the benchmarks I have at hand. Note that the comparatively huge improvement on SKL in the OglShMapPcf test case is due to the combined effect of this patch and the register pressure benefit on SKL+ of "i965/fs: Switch to the constant cache for uniform pull constants.", part of the same series. Going up to 8 oword blocks would improve performance of pull constants even more, but at the cost of some additional bandwidth and register pressure, so it would have to be done on-demand based on the number of constants actually used by the shader. v2: Fix for Gen4 and 5. v3: Non-trivial rebase. Rework to allow the visitor specifiy arbitrary pull constant block sizes. Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_fs.cpp | 21 + src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +--- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 977fd8c..671b44b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2111,25 +2111,22 @@ fs_visitor::lower_constant_loads() if (pull_index == -1) continue; - const unsigned index = stage_prog_data->binding_table.pull_constants_start; - fs_reg dst; - - if (type_sz(inst->src[i].type) <= 4) -dst = vgrf(glsl_type::float_type); - else -dst = vgrf(glsl_type::double_type); - assert(inst->src[i].stride == 0); - const fs_builder ubld = ibld.exec_all().group(4, 0); - struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15); + const unsigned index = stage_prog_data->binding_table.pull_constants_start; + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ + const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0); + const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD); + const unsigned base = pull_index * 4; + ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, - dst, brw_imm_ud(index), offset); + dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1))); /* Rewrite the instruction to use the temporary VGRF. */ inst->src[i].file = VGRF; inst->src[i].nr = dst.nr; - inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4; + inst->src[i].offset = (base & (block_sz - 1)) + + inst->src[i].offset % 4; brw_mark_surface_used(prog_data, index); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 7df7423..9f2729a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -4059,21 +4059,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr * and we have to split it if necessary. */ const unsigned type_size = type_sz(dest.type); - const fs_builder ubld = bld.exec_all().group(4, 0); - const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F); + const unsigned block_sz = 64; /* Fetch one cacheline at a time. */ + const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0); + const fs_reg packed_consts = ubld.vgrf(BRW_REGI
Mesa (master): i965: Factor out oword block read and write message control calculation.
Module: Mesa Branch: master Commit: 7a6aadb76ff3f6ef73216b53b0dc5edda5bae978 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a6aadb76ff3f6ef73216b53b0dc5edda5bae978 Author: Francisco Jerez Date: Thu Dec 8 19:58:25 2016 -0800 i965: Factor out oword block read and write message control calculation. We'll need roughly the same logic in other places and it would be annoying to duplicate it. Instead factor it out into a function-like macro that takes the number of dwords per block (which will prove more convenient than taking the same value in owords or some other unit). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_defines.h | 6 ++ src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 ++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index b1b6248..1875380 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1669,6 +1669,12 @@ enum brw_message_target { #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4 +#define BRW_DATAPORT_OWORD_BLOCK_DWORDS(n) \ + ((n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW :\ +(n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : \ +(n) == 16 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \ +(n) == 32 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \ +(abort(), ~0)) #define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS2 diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 341f543..6141bfb 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2056,11 +2056,6 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, mrf = retype(mrf, BRW_REGISTER_TYPE_UD); const unsigned mlen = 1 + num_regs; - const unsigned msg_control = - (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : - num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : - num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); - assert(msg_control); /* Set up the message header. This is g0, with g0.2 filled with * the offset. We don't want to leave our offset around in g0 or @@ -2134,7 +2129,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p, brw_set_dp_write_message(p, insn, brw_scratch_surface_idx(p), - msg_control, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), msg_type, target_cache, mlen, @@ -2181,11 +2176,6 @@ brw_oword_block_read_scratch(struct brw_codegen *p, dest = retype(dest, BRW_REGISTER_TYPE_UW); const unsigned rlen = num_regs; - const unsigned msg_control = - (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS : - num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : - num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0); - assert(msg_control); const unsigned target_cache = (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE : devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE : @@ -,7 +2212,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p, brw_set_dp_read_message(p, insn, brw_scratch_surface_idx(p), - msg_control, + BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8), BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */ target_cache, 1, /* msg_length */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Restore support for LLVM <= 3.9.
Module: Mesa Branch: master Commit: 95ddb37708ca16ccbd0f607d17a82be2de0d07b6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=95ddb37708ca16ccbd0f607d17a82be2de0d07b6 Author: Vedran Miletić Date: Tue Nov 22 20:25:34 2016 +0100 clover: Restore support for LLVM <= 3.9. The commit 8e430ff8b060b4e8e922bae24b3c57837da6ea77 broke support for LLVM 3.9 and older versions in Clover. This patch restores it and refactors the support using Clover compatibility layer for LLVM. v2: merged #ifdef blocks v3: added support for LLVM 3.6-3.8 v4: add missing #ifdef around v5: simplify using templates and lambda Signed-off-by: Vedran Miletić Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98740 Tested-by[v4]: Pierre Moreau Tested-by: Vinson Lee Reviewed-by: Francisco Jerez Reviewed-by: Jan Vesely --- .../state_trackers/clover/llvm/codegen/bitcode.cpp | 9 +++-- src/gallium/state_trackers/clover/llvm/compat.hpp | 18 ++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp index 5dcc4f8..d09207b 100644 --- a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp +++ b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp @@ -32,6 +32,7 @@ /// #include "llvm/codegen.hpp" +#include "llvm/compat.hpp" #include "llvm/metadata.hpp" #include "core/error.hpp" #include "util/algorithm.hpp" @@ -99,13 +100,9 @@ clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx, auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef( as_string(m.secs[0].data), " "), ctx); - if (::llvm::Error err = mod.takeError()) { - std::string msg; - ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase &EIB) { - msg = EIB.message(); - fail(r_log, error(CL_INVALID_PROGRAM), msg.c_str()); + compat::handle_module_error(mod, [&](const std::string &s) { + fail(r_log, error(CL_INVALID_PROGRAM), s); }); - } return std::unique_ptr<::llvm::Module>(std::move(*mod)); } diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp b/src/gallium/state_trackers/clover/llvm/compat.hpp index a963cff..81592ce 100644 --- a/src/gallium/state_trackers/clover/llvm/compat.hpp +++ b/src/gallium/state_trackers/clover/llvm/compat.hpp @@ -39,6 +39,11 @@ #include #include #include +#if HAVE_LLVM >= 0x0400 +#include +#else +#include +#endif #if HAVE_LLVM >= 0x0307 #include @@ -158,6 +163,19 @@ namespace clover { #else const auto default_reloc_model = ::llvm::Reloc::Default; #endif + + template void + handle_module_error(M &mod, const F &f) { +#if HAVE_LLVM >= 0x0400 +if (::llvm::Error err = mod.takeError()) + ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase &eib) { + f(eib.message()); + }); +#else +if (!mod) + f(mod.getError().message()); +#endif + } } } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().
Module: Mesa Branch: master Commit: f3d387867f74ae758b41168f23992671f7dce254 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3d387867f74ae758b41168f23992671f7dce254 Author: Francisco Jerez Date: Tue Nov 1 11:56:13 2016 -0700 nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform(). Assuming the hardware is set up to use a screen coordinate system flipped vertically with respect to the GL's window coordinate system, the SYSTEM_VALUE_SAMPLE_POS vector will also be flipped vertically with respect to the value expected by the GL, so we need to give it the same treatment as gl_FragCoord. Fixes the following CTS tests on i965: ES31-CTS.functional.shaders.multisample_interpolation.interpolate_at_offset.at_sample_position.default_framebuffer ES31-CTS.functional.shaders.sample_variables.sample_pos.correctness.default_framebuffer when run with any multisample configuration, e.g. rgbad24s8ms4. Cc: Reviewed-by: Kenneth Graunke Reviewed-by: Anuj Phogat --- src/compiler/nir/nir_lower_wpos_ytransform.c | 24 1 file changed, 24 insertions(+) diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c b/src/compiler/nir/nir_lower_wpos_ytransform.c index 173f058..f211c73 100644 --- a/src/compiler/nir/nir_lower_wpos_ytransform.c +++ b/src/compiler/nir/nir_lower_wpos_ytransform.c @@ -273,6 +273,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state, } static void +lower_load_sample_pos(lower_wpos_ytransform_state *state, + nir_intrinsic_instr *intr) +{ + nir_builder *b = &state->b; + b->cursor = nir_after_instr(&intr->instr); + + nir_ssa_def *pos = &intr->dest.ssa; + nir_ssa_def *scale = nir_channel(b, get_transform(state), 0); + nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2); + /* Either y or 1-y for scale equal to 1 or -1 respectively. */ + nir_ssa_def *flipped_y = + nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)), +nir_fmul(b, nir_channel(b, pos, 1), scale)); + nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y); + + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos), + flipped_pos->parent_instr); +} + +static void lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block) { nir_foreach_instr_safe(instr, block) { @@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block /* gl_FragCoord should not have array/struct deref's: */ assert(dvar->deref.child == NULL); lower_fragcoord(state, intr); +} else if (var->data.mode == nir_var_system_value && + var->data.location == SYSTEM_VALUE_SAMPLE_POS) { + assert(dvar->deref.child == NULL); + lower_load_sample_pos(state, intr); } } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) { lower_interp_var_at_offset(state, intr); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Implement clGetExtensionFunctionAddressForPlatform.
Module: Mesa Branch: master Commit: cb0879985a40bcde1516e5341c5a3e5ea0968b87 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb0879985a40bcde1516e5341c5a3e5ea0968b87 Author: Serge Martin Date: Sun Sep 27 11:15:14 2015 +0200 clover: Implement clGetExtensionFunctionAddressForPlatform. Add clGetExtensionFunctionAddressForPlatform (CL 1.2). Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/api/dispatch.cpp | 2 +- src/gallium/state_trackers/clover/api/dispatch.hpp | 4 src/gallium/state_trackers/clover/api/platform.cpp | 16 3 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp b/src/gallium/state_trackers/clover/api/dispatch.cpp index f10babe..8f4cfdc 100644 --- a/src/gallium/state_trackers/clover/api/dispatch.cpp +++ b/src/gallium/state_trackers/clover/api/dispatch.cpp @@ -131,7 +131,7 @@ namespace clover { clEnqueueMigrateMemObjects, clEnqueueMarkerWithWaitList, clEnqueueBarrierWithWaitList, - NULL, // clGetExtensionFunctionAddressForPlatform + GetExtensionFunctionAddressForPlatform, NULL, // clCreateFromGLTexture NULL, // clGetDeviceIDsFromD3D11KHR NULL, // clCreateFromD3D11BufferKHR diff --git a/src/gallium/state_trackers/clover/api/dispatch.hpp b/src/gallium/state_trackers/clover/api/dispatch.hpp index 7f62282..0ec1b51 100644 --- a/src/gallium/state_trackers/clover/api/dispatch.hpp +++ b/src/gallium/state_trackers/clover/api/dispatch.hpp @@ -777,6 +777,10 @@ namespace clover { void * GetExtensionFunctionAddress(const char *p_name); + void * + GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name); + cl_int IcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms, cl_uint *rnum_platforms); diff --git a/src/gallium/state_trackers/clover/api/platform.cpp b/src/gallium/state_trackers/clover/api/platform.cpp index b1b1fdf..ed86163 100644 --- a/src/gallium/state_trackers/clover/api/platform.cpp +++ b/src/gallium/state_trackers/clover/api/platform.cpp @@ -92,6 +92,16 @@ clover::GetPlatformInfo(cl_platform_id d_platform, cl_platform_info param, } void * +clover::GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name) try { + obj(d_platform); + return GetExtensionFunctionAddress(p_name); + +} catch (error &e) { + return NULL; +} + +void * clover::GetExtensionFunctionAddress(const char *p_name) { std::string name { p_name }; @@ -118,6 +128,12 @@ clGetExtensionFunctionAddress(const char *p_name) { return GetExtensionFunctionAddress(p_name); } +CLOVER_ICD_API void * +clGetExtensionFunctionAddressForPlatform(cl_platform_id d_platform, + const char *p_name) { + return GetExtensionFunctionAddressForPlatform(d_platform, p_name); +} + CLOVER_ICD_API cl_int clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms, cl_uint *rnum_platforms) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables
Module: Mesa Branch: master Commit: 2fba72046da09dd28f54df02794b358773899d13 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2fba72046da09dd28f54df02794b358773899d13 Author: Vedran Miletić Date: Wed Sep 28 16:18:24 2016 +0200 clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables The options specified in the CLOVER_EXTRA_BUILD_OPTIONS shell variable are appended to the options specified by the OpenCL program in the clBuildProgram function call, if any. Analogously, the options specified in the CLOVER_EXTRA_COMPILE_OPTIONS and CLOVER_EXTRA_LINK_OPTIONS variables are appended to the options specified in clCompileProgram and clLinkProgram function calls, respectively. v2: * rename to CLOVER_EXTRA_COMPILER_OPTIONS * use debug_get_option * append to linker options as well v3: code cleanups v4: separate CLOVER_EXTRA_LINKER_OPTIONS options v5: * fix documentation typo * use CLOVER_EXTRA_COMPILER_OPTIONS in link stage v6: * separate in CLOVER_EXTRA_{BUILD,COMPILE,LINK}_OPTIONS * append options in cl{Build,Compile,Link}Program Signed-off-by: Vedran Miletić Reviewed-by[v1]: Edward O'Callaghan v7 [Francisco Jerez]: Slight simplification. Reviewed-by: Francisco Jerez --- docs/envvars.html | 15 +++ src/gallium/state_trackers/clover/api/program.cpp | 10 +++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/docs/envvars.html b/docs/envvars.html index cf57ca5..af1a30c 100644 --- a/docs/envvars.html +++ b/docs/envvars.html @@ -235,6 +235,21 @@ Setting to "tgsi", for example, will print all the TGSI shaders. See src/mesa/state_tracker/st_debug.c for other options. +Clover state tracker environment variables + + +CLOVER_EXTRA_BUILD_OPTIONS - allows specifying additional compiler and linker +options. Specified options are appended after the options set by the OpenCL +program in clBuildProgram. +CLOVER_EXTRA_COMPILE_OPTIONS - allows specifying additional compiler +options. Specified options are appended after the options set by the OpenCL +program in clCompileProgram. +CLOVER_EXTRA_LINK_OPTIONS - allows specifying additional linker +options. Specified options are appended after the options set by the OpenCL +program in clLinkProgram. + + + Softpipe driver environment variables SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders diff --git a/src/gallium/state_trackers/clover/api/program.cpp b/src/gallium/state_trackers/clover/api/program.cpp index c3f9cb9..ba4ce7a 100644 --- a/src/gallium/state_trackers/clover/api/program.cpp +++ b/src/gallium/state_trackers/clover/api/program.cpp @@ -22,6 +22,7 @@ #include "api/util.hpp" #include "core/program.hpp" +#include "util/u_debug.h" #include @@ -177,7 +178,8 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs, auto &prog = obj(d_prog); auto devs = (d_devs ? objs(d_devs, num_devs) : ref_vector(prog.context().devices())); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_BUILD_OPTIONS", ""); validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data); @@ -202,7 +204,8 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs, auto &prog = obj(d_prog); auto devs = (d_devs ? objs(d_devs, num_devs) : ref_vector(prog.context().devices())); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_COMPILE_OPTIONS", ""); header_map headers; validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data); @@ -271,7 +274,8 @@ clLinkProgram(cl_context d_ctx, cl_uint num_devs, const cl_device_id *d_devs, void (*pfn_notify) (cl_program, void *), void *user_data, cl_int *r_errcode) try { auto &ctx = obj(d_ctx); - auto opts = (p_opts ? p_opts : ""); + const auto opts = std::string(p_opts ? p_opts : "") + " " + + debug_get_option("CLOVER_EXTRA_LINK_OPTIONS", ""); auto progs = objs(d_progs, num_progs); auto prog = create(ctx); auto devs = validate_link_devices(progs, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): clover: Pass unquoted compiler arguments to Clang
Module: Mesa Branch: master Commit: e3272865c216933168e6c08766d266a33d0e1497 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e3272865c216933168e6c08766d266a33d0e1497 Author: Vedran Miletić Date: Wed Sep 28 17:11:43 2016 +0200 clover: Pass unquoted compiler arguments to Clang OpenCL apps can quote arguments they pass to the OpenCL compiler, most commonly include paths containing spaces. If the Clang OpenCL compiler was called via a shell, the shell would split the arguments with respect to to quotes and then remove quotes before passing the arguments to the compiler. Since we call Clang as a library, we have to split the argument with respect to quotes and then remove quotes before passing the arguments. v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS v3: simplify parsing logic, use more C++11 v4: restore error throwing, clarify a comment Signed-off-by: Vedran Miletić Reviewed-by: Francisco Jerez --- src/gallium/state_trackers/clover/llvm/util.hpp | 40 ++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp b/src/gallium/state_trackers/clover/llvm/util.hpp index 8db6f20..222becd 100644 --- a/src/gallium/state_trackers/clover/llvm/util.hpp +++ b/src/gallium/state_trackers/clover/llvm/util.hpp @@ -24,6 +24,7 @@ #ifndef CLOVER_LLVM_UTIL_HPP #define CLOVER_LLVM_UTIL_HPP +#include "core/error.hpp" #include "util/u_debug.h" #include @@ -42,11 +43,42 @@ namespace clover { inline std::vector tokenize(const std::string &s) { std::vector ss; - std::istringstream iss(s); - std::string t; + std::ostringstream oss; - while (getline(iss, t, ' ')) -ss.push_back(t); + // OpenCL programs can pass a quoted argument, most frequently the + // include path. This is useful so that path containing spaces is + // treated as a single argument instead of being split by the spaces. + // Additionally, the argument should also be unquoted before being + // passed to the compiler. We avoid using std::string::replace here to + // remove quotes, as the single and double quote characters can be a + // part of the file name. + bool escape_next = false; + bool in_quote_double = false; + bool in_quote_single = false; + + for (auto c : s) { +if (escape_next) { + oss.put(c); + escape_next = false; +} else if (c == '\\') { + escape_next = true; +} else if (c == '"' && !in_quote_single) { + in_quote_double = !in_quote_double; +} else if (c == '\'' && !in_quote_double) { + in_quote_single = !in_quote_single; +} else if (c != ' ' || in_quote_single || in_quote_double) { + oss.put(c); +} else if (oss.tellp() > 0) { + ss.emplace_back(oss.str()); + oss.str(""); +} + } + + if (oss.tellp() > 0) +ss.emplace_back(oss.str()); + + if (in_quote_double || in_quote_single) +throw invalid_build_options_error(); return ss; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 category.
Module: Mesa Branch: master Commit: 15a084a03998c5c86206137fdaf6f43b5f98485a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15a084a03998c5c86206137fdaf6f43b5f98485a Author: Francisco Jerez Date: Tue Oct 18 14:53:20 2016 -0700 glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 category. These two GLES 3.2 entry points were being defined in the category of the ARB_ES3_2_compatibility and KHR_blend_equation_advanced extensions respectively instead of in the ES3.2 category. Defining them in the ES3.2 category makes sure that the gl_procs.py generator emits declarations in the glprocs.h header file for the unsuffixed GLES-only entry points that PrimitiveBoundingBoxARB and BlendBarrierKHR respectively alias. This should avoid a compilation failure during scons builds in combination with "mapi: export all GLES 3.2 functions in libGLESv2.so". Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/gl_API.xml | 30 +- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml index 5998ccf..00c9bb7 100644 --- a/src/mapi/glapi/gen/gl_API.xml +++ b/src/mapi/glapi/gen/gl_API.xml @@ -8296,6 +8296,23 @@ http://www.w3.org/2001/XInclude"/> + + + + + + + + + + + + + + + + @@ -8316,7 +8333,6 @@ - @@ -8332,18 +8348,6 @@ - - - - - - - - - - - ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so""
Module: Mesa Branch: master Commit: 811eb7f178b8b85ac299121ac09a3180b9b55da2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=811eb7f178b8b85ac299121ac09a3180b9b55da2 Author: Francisco Jerez Date: Tue Oct 18 20:44:10 2016 -0700 Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so"" This reverts commit 85e9bbc14d93fa7166c9ae075ee7ae29a8313e3f. The previous commit should help with the scons build failure caused by the original commit. Cc: mesa-sta...@lists.freedesktop.org Reviewed-by: Dylan Baker --- src/mapi/glapi/gen/static_data.py | 12 1 file changed, 12 insertions(+) diff --git a/src/mapi/glapi/gen/static_data.py b/src/mapi/glapi/gen/static_data.py index 2f403e9..25e78bf 100644 --- a/src/mapi/glapi/gen/static_data.py +++ b/src/mapi/glapi/gen/static_data.py @@ -484,17 +484,22 @@ functions = [ "BindVertexBuffer", "BindVertexBuffers", "Bitmap", +"BlendBarrier", "BlendColor", "BlendColorEXT", "BlendEquation", "BlendEquationEXT", +"BlendEquationi", "BlendEquationiARB", "BlendEquationSeparate", +"BlendEquationSeparatei", "BlendEquationSeparateiARB", "BlendFunc", +"BlendFunci", "BlendFunciARB", "BlendFuncSeparate", "BlendFuncSeparateEXT", +"BlendFuncSeparatei", "BlendFuncSeparateiARB", "BlitFramebuffer", "BufferData", @@ -825,6 +830,7 @@ functions = [ "GetFramebufferAttachmentParameteriv", "GetFramebufferAttachmentParameterivEXT", "GetFramebufferParameteriv", +"GetGraphicsResetStatus", "GetGraphicsResetStatusARB", "GetHandleARB", "GetHistogram", @@ -864,8 +870,11 @@ functions = [ "GetnSeparableFilterARB", "GetnTexImageARB", "GetnUniformdvARB", +"GetnUniformfv", "GetnUniformfvARB", +"GetnUniformiv", "GetnUniformivARB", +"GetnUniformuiv", "GetnUniformuivARB", "GetObjectLabel", "GetObjectParameterfvARB", @@ -1160,6 +1169,7 @@ functions = [ "Orthof", "Orthox", "PassThrough", +"PatchParameteri", "PauseTransformFeedback", "PixelMapfv", "PixelMapuiv", @@ -1191,6 +1201,7 @@ functions = [ "PopDebugGroup", "PopMatrix", "PopName", +"PrimitiveBoundingBox", "PrimitiveRestartIndex", "PrimitiveRestartIndexNV", "PrimitiveRestartNV", @@ -1273,6 +1284,7 @@ functions = [ "RasterPos4s", "RasterPos4sv", "ReadBuffer", +"ReadnPixels", "ReadnPixelsARB", "ReadPixels", "Rectd", ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg
Module: Mesa Branch: master Commit: a2392cee48076f1fe6feab7d49214990cfa6a551 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2392cee48076f1fe6feab7d49214990cfa6a551 Author: Jason Ekstrand Date: Wed Sep 14 15:09:32 2016 -0700 i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg The state register sr0 is really a collection of dwords not a SIMD8 anything. It's much more convenient for brw_sr0_reg to return the particular dword you're looking for rather than a giant blob you have to massage into what you want. Signed-off-by: Jason Ekstrand [ Francisco Jerez: Trivial simplification of brw_ud1_reg(). ] Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_reg.h | 20 2 files changed, 9 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index d026bbd..5c44007 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6185,7 +6185,7 @@ fs_visitor::run_cs() if (devinfo->is_haswell && prog_data->total_shared > 0) { /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */ const fs_builder abld = bld.exec_all().group(1, 0); - abld.MOV(retype(suboffset(brw_sr0_reg(), 1), BRW_REGISTER_TYPE_UW), + abld.MOV(retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW), suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1)); } diff --git a/src/mesa/drivers/dri/i965/brw_reg.h b/src/mesa/drivers/dri/i965/brw_reg.h index d6f22ed..b71c63b 100644 --- a/src/mesa/drivers/dri/i965/brw_reg.h +++ b/src/mesa/drivers/dri/i965/brw_reg.h @@ -567,6 +567,12 @@ brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) } static inline struct brw_reg +brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) +{ + return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD); +} + +static inline struct brw_reg brw_imm_reg(enum brw_reg_type type) { return brw_reg(BRW_IMMEDIATE_VALUE, @@ -789,19 +795,9 @@ brw_notification_reg(void) } static inline struct brw_reg -brw_sr0_reg(void) +brw_sr0_reg(unsigned subnr) { - return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, - BRW_ARF_STATE, - 0, - 0, - 0, - BRW_REGISTER_TYPE_UD, - BRW_VERTICAL_STRIDE_8, - BRW_WIDTH_8, - BRW_HORIZONTAL_STRIDE_1, - BRW_SWIZZLE_XYZW, - WRITEMASK_XYZW); + return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr); } static inline struct brw_reg ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread dispatch.
Module: Mesa Branch: master Commit: f57f526fc5cfaedf26b2becf8f1899d5de0d0461 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f57f526fc5cfaedf26b2becf8f1899d5de0d0461 Author: Francisco Jerez Date: Thu Sep 15 17:20:23 2016 -0700 i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread dispatch. The eliminate_find_live_channel optimization eliminates FIND_LIVE_CHANNEL instructions in cases where control flow is known to be uniform, and replaces them with 'MOV 0', which in turn unblocks subsequent elimination of the BROADCAST instruction frequently used on the result of FIND_LIVE_CHANNEL. This is however not correct in per-sample fragment shader dispatch because the PSD can dispatch a fully unlit sample under certain conditions. Disable the optimization in that case. Reviewed-by: Jason Ekstrand v2: Add devinfo argument to brw_stage_has_packed_dispatch() to implement hardware generation check. --- src/mesa/drivers/dri/i965/brw_compiler.h | 49 src/mesa/drivers/dri/i965/brw_fs.cpp | 8 ++ src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 ++ 3 files changed, 65 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index 84d3dde..445c166 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -868,6 +868,55 @@ encode_slm_size(unsigned gen, uint32_t bytes) return slm_size; } +/** + * Return true if the given shader stage is dispatched contiguously by the + * relevant fixed function starting from channel 0 of the SIMD thread, which + * implies that the dispatch mask of a thread can be assumed to have the form + * '2^n - 1' for some n. + */ +static inline bool +brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo, + gl_shader_stage stage, + const struct brw_stage_prog_data *prog_data) +{ + /* The code below makes assumptions about the hardware's thread dispatch +* behavior that could be proven wrong in future generations -- Make sure +* to do a full test run with brw_fs_test_dispatch_packing() hooked up to +* the NIR front-end before changing this assertion. +*/ + assert(devinfo->gen <= 9); + + switch (stage) { + case MESA_SHADER_FRAGMENT: { + /* The PSD discards subspans coming in with no lit samples, which in the + * per-pixel shading case implies that each subspan will either be fully + * lit (due to the VMask being used to allow derivative computations), + * or not dispatched at all. In per-sample dispatch mode individual + * samples from the same subspan have a fixed relative location within + * the SIMD thread, so dispatch of unlit samples cannot be avoided in + * general and we should return false. + */ + const struct brw_wm_prog_data *wm_prog_data = + (const struct brw_wm_prog_data *)prog_data; + return !wm_prog_data->persample_dispatch; + } + case MESA_SHADER_COMPUTE: + /* Compute shaders will be spawned with either a fully enabled dispatch + * mask or with whatever bottom/right execution mask was given to the + * GPGPU walker command to be used along the workgroup edges -- In both + * cases the dispatch mask is required to be tightly packed for our + * invocation index calculations to work. + */ + return true; + default: + /* Most remaining fixed functions are limited to use a packed dispatch + * mask due to the hardware representation of the dispatch mask as a + * single counter representing the number of enabled channels. + */ + return true; + } +} + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c44007..b60ec71 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -2835,6 +2835,14 @@ fs_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; + if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) { + /* The optimization below assumes that channel zero is live on thread + * dispatch, which may not be the case if the fixed function dispatches + * threads sparsely. + */ + return false; + } + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { switch (inst->opcode) { case BRW_OPCODE_IF: diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 58c8a8a..6aa9102 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -1291,6 +1291,14 @@ vec4_visitor::eliminate_find_live_channel() bool progress = false; unsigned depth = 0; + if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data))
Mesa (master): i965/ir: Test thread dispatch packing assumptions.
Module: Mesa Branch: master Commit: e5311ba1acba738346a18ef661b0f8bbc33bba8e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5311ba1acba738346a18ef661b0f8bbc33bba8e Author: Francisco Jerez Date: Thu Sep 15 21:43:18 2016 -0700 i965/ir: Test thread dispatch packing assumptions. Not [originally] intended for upstream. Should cause a GPU hang if some thread is executed with a non-contiguous dispatch mask breaking assumptions of brw_stage_has_packed_dispatch(). Doesn't cause any CTS, DEQP or Piglit regressions, while replacing brw_stage_has_packed_dispatch() with a dummy implementation that unconditionally returns true on top of this patch causes multiple GPU hangs. v2: Refactor into a separate function instead of emitting the test code directly from emit_nir_code(), drop VEC4 test and clean up slightly for upstream. (Jason) Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs.cpp | 30 ++ 1 file changed, 30 insertions(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index b60ec71..1483f41 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -6786,3 +6786,33 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data, return g.get_assembly(final_assembly_size); } + +/** + * Test the dispatch mask packing assumptions of + * brw_stage_has_packed_dispatch(). Call this from e.g. the top of + * fs_visitor::emit_nir_code() to cause a GPU hang if any shader invocation is + * executed with an unexpected dispatch mask. + */ +static UNUSED void +brw_fs_test_dispatch_packing(const fs_builder &bld) +{ + const gl_shader_stage stage = bld.shader->stage; + + if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage, + bld.shader->stage_prog_data)) { + const fs_builder ubld = bld.exec_all().group(1, 0); + const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0); + const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : + brw_dmask_reg()); + + ubld.ADD(tmp, mask, brw_imm_ud(1)); + ubld.AND(tmp, mask, tmp); + + /* This will loop forever if the dispatch mask doesn't have the expected + * form '2^n-1', in which case tmp will be non-zero. + */ + bld.emit(BRW_OPCODE_DO); + bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ); + set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE)); + } +} ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/ir: Pass identity mask to brw_find_live_channel() in the packed dispatch case.
Module: Mesa Branch: master Commit: c05a4f11a03dd5614a9462b5cb28e8b630bfddc0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c05a4f11a03dd5614a9462b5cb28e8b630bfddc0 Author: Francisco Jerez Date: Thu Sep 15 17:24:10 2016 -0700 i965/ir: Pass identity mask to brw_find_live_channel() in the packed dispatch case. This avoids emitting a few extra instructions required to take the dispatch mask into account when it's known to be tightly packed. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 - src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 9 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index c510f42..842e125 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -2045,7 +2045,10 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) case SHADER_OPCODE_FIND_LIVE_CHANNEL: { const struct brw_reg mask = -stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg(); +brw_stage_has_packed_dispatch(devinfo, stage, + prog_data) ? brw_imm_ud(~0u) : +stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : +brw_dmask_reg(); brw_find_live_channel(p, dst, mask); break; } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index f9e6d1c..163cf9d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -1862,9 +1862,14 @@ generate_code(struct brw_codegen *p, brw_memory_fence(p, dst); break; - case SHADER_OPCODE_FIND_LIVE_CHANNEL: - brw_find_live_channel(p, dst, brw_dmask_reg()); + case SHADER_OPCODE_FIND_LIVE_CHANNEL: { + const struct brw_reg mask = +brw_stage_has_packed_dispatch(devinfo, nir->stage, + &prog_data->base) ? brw_imm_ud(~0u) : +brw_dmask_reg(); + brw_find_live_channel(p, dst, mask); break; + } case SHADER_OPCODE_BROADCAST: assert(inst->force_writemask_all); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Take Dispatch/ Vector mask into account in FIND_LIVE_CHANNEL
Module: Mesa Branch: master Commit: 8a468d186e6fc27c26dd12ba989192e7596f667a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a468d186e6fc27c26dd12ba989192e7596f667a Author: Jason Ekstrand Date: Wed Sep 14 15:09:33 2016 -0700 i965/fs: Take Dispatch/Vector mask into account in FIND_LIVE_CHANNEL On at least Sky Lake, ce0 does not contain the full story as far as enabled channels goes. It is possible to have completely disabled channels where the corresponding bits in ce0 are 1. In order to get the correct execution mask, you have to mask off those channels which were disabled from the beginning by taking the AND of ce0 with either sr0.2 or sr0.3 depending on the shader stage. Failure to do so can result in FIND_LIVE_CHANNEL returning a completely dead channel. Signed-off-by: Jason Ekstrand Cc: Francisco Jerez [ Francisco Jerez: Fix a couple of typos, add mask register type assertion, clarify reason why ce0 can have bits set for disabled channels, clarify that this may only be a problem when thread dispatch doesn't pack channels tightly in the SIMD thread. Apply same treatment to Align16 path. ] Reviewed-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_eu.h | 3 +- src/mesa/drivers/dri/i965/brw_eu_emit.c | 39 ++-- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 7 +++-- src/mesa/drivers/dri/i965/brw_reg.h | 12 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +- 5 files changed, 50 insertions(+), 13 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index 3e52764..737a335 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, void brw_find_live_channel(struct brw_codegen *p, - struct brw_reg dst); + struct brw_reg dst, + struct brw_reg mask); void brw_broadcast(struct brw_codegen *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 3b12030..c98867a 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p, } void -brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) +brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst, + struct brw_reg mask) { const struct gen_device_info *devinfo = p->devinfo; const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current); @@ -3369,6 +3370,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) brw_inst *inst; assert(devinfo->gen >= 7); + assert(mask.type == BRW_REGISTER_TYPE_UD); brw_push_insn_state(p); @@ -3377,18 +3379,32 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst) if (devinfo->gen >= 8) { /* Getting the first active channel index is easy on Gen8: Just find - * the first bit set in the mask register. The same register exists - * on HSW already but it reads back as all ones when the current + * the first bit set in the execution mask. The register exists on + * HSW already but it reads back as all ones when the current * instruction has execution masking disabled, so it's kind of * useless. */ - inst = brw_FBL(p, vec1(dst), -retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD)); + struct brw_reg exec_mask = +retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD); + + if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0x) { +/* Unfortunately, ce0 does not take into account the thread + * dispatch mask, which may be a problem in cases where it's not + * tightly packed (i.e. it doesn't have the form '2^n - 1' for + * some n). Combine ce0 with the given dispatch (or vector) mask + * to mask off those channels which were never dispatched by the + * hardware. + */ +brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8)); +brw_AND(p, vec1(dst), exec_mask, vec1(dst)); +exec_mask = vec1(dst); + } /* Quarter control has the effect of magically shifting the value of - * this register so you'll get the first active channel relative to - * the specified quarter control as result. + * ce0 so you'll get the first active channel relative to the + * specified quarter control as result. */ + inst = brw_FBL(p, vec1(dst), exec_mask); } else { const struct brw_reg flag = brw_flag_reg(1, 0); @@ -3422,9 +3438,14 @@ brw_find_liv
Mesa (master): 57 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d861968ca2f3e196ce4bcad4f2b91b5a63ce0f1 Author: Francisco Jerez Date: Thu Sep 1 22:37:57 2016 -0700 i965/vec4: Assert that pull constant load offsets are 16B-aligned. Non-16B-aligned pull constant loads are unlikely to be particularly useful given that you can get roughly the same effect by using swizzles on the result. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ca35c63673dad28854c00ce34ec6f085ba4ec5e Author: Francisco Jerez Date: Thu Sep 1 22:39:00 2016 -0700 i965/vec4: Assert that ATTR regions are register-aligned. It might be useful to actually handle this once copy propagation becomes smarter about register-misaligned offsets. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f33a8f8fcfb6ce3baa8813b32d5eff20506f3df1 Author: Francisco Jerez Date: Thu Sep 1 22:36:15 2016 -0700 i965/vec4: Don't spill non-GRF-aligned register regions. A better fix would be to do something along the lines of the FS back-end spilling code and emit a scratch read before any instruction that overwrites the register to spill partially due to a non-zero sub-register offset. In the meantime mark registers used with a non-zero sub-register offset as no-spill to prevent the spilling code from miscompiling the program. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8531f943d9aac13489a02e5a5b4bfa381c465a44 Author: Francisco Jerez Date: Thu Sep 1 22:31:43 2016 -0700 i965/vec4: Fix copy propagation for non-register-aligned regions. This prevents it from trying to propagate a copy through a register-misaligned region. MOV instructions with a misaligned destination shouldn't be treated as a direct GRF copy, because they only define the destination GRFs partially. Also fix the interference check implemented with is_channel_updated() to consider overlapping regions with different register offset to interfere, since the writemask check implemented in the function is only valid under the assumption that the source and destination regions are aligned component by component. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0e657b7b55bc7c83c8eb5258cd9522b0e5e581b7 Author: Francisco Jerez Date: Thu Sep 1 22:26:59 2016 -0700 i965/vec4: Compare full register offsets in cmod propagation. Cmod propagation would misoptimize the program if the destination offset of the generating instruction wasn't exactly the same as the source region offset of the copy instruction. In preparation for adding support for sub-GRF offsets to the VEC4 IR. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bed1adfc144d9ae8d55ccb9b277942da8a78064 Author: Francisco Jerez Date: Thu Sep 1 22:12:04 2016 -0700 i965/vec4: Assign correct destination offset to rewritten instruction in register coalesce. Because the pass already checks that the destination offset of each 'scan_inst' that needs to be rewritten matches 'inst->src[0].offset' exactly, the final offset of the rewritten instruction is just the original destination offset of the copy. This is in preparation for adding support for sub-GRF offsets to the VEC4 IR. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a74e437fdec02c28749c94bc1bcf21c3c4b48d7 Author: Francisco Jerez Date: Thu Sep 1 22:08:29 2016 -0700 i965/vec4: Don't coalesce registers with overlapping writes not matching the MOV source. In preparation for adding support for sub-GRF offsets to the VEC4 IR. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1bb5074474445ea9f54d0f52383f99ac0fa6128f Author: Francisco Jerez Date: Thu Sep 1 22:04:02 2016 -0700 i965/vec4: Compare full register offsets in opt_register_coalesce nop move check. In preparation for adding support for sub-GRF offsets to the VEC4 IR. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3be0d6d040753c62b25077fb6b85ad1f0808b258 Author: Francisco Jerez Date: Thu Sep 1 22:02:00 2016 -0700 i965/vec4: Check that the write offsets match when setting dependency controls. For simplicity just assume that two writes to the same GRF with different sub-GRF offsets will potentially interfere and break the dependency control chain. This is in preparation for adding sub-GRF offset support to the VEC4 IR. Reviewed-by: Iago Toral Quiroga URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b52fefc4d55a4627bf0d59c78ac531603f
Mesa (master): st/clover: Define __OPENCL_VERSION__ on the device side
Module: Mesa Branch: master Commit: cfa914a1b4e20e7ef416171f5212f21e8224befc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cfa914a1b4e20e7ef416171f5212f21e8224befc Author: Niels Ole Salscheider Date: Sun Aug 28 16:42:34 2016 +0200 st/clover: Define __OPENCL_VERSION__ on the device side This is required by the OpenCL standard. Signed-off-by: Niels Ole Salscheider Reviewed-by: Edward O'Callaghan Reviewed-by: Vedran Miletić --- src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp index 5490d72..b5e8b52 100644 --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp @@ -153,6 +153,9 @@ namespace { // Add libclc include c.getPreprocessorOpts().Includes.push_back("clc/clc.h"); + // Add definition for the OpenCL version + c.getPreprocessorOpts().addMacroDef("__OPENCL_VERSION__=110"); + // clc.h requires that this macro be defined: c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers"); c.getPreprocessorOpts().addRemappedFile( ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Fix gl_program:: OutputsWritten computation for dual-source blending.
Module: Mesa Branch: master Commit: fd04d048aec8f850d77f6908c0d13f88195df0da URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd04d048aec8f850d77f6908c0d13f88195df0da Author: Francisco Jerez Date: Sat Aug 20 14:55:19 2016 -0700 glsl: Fix gl_program::OutputsWritten computation for dual-source blending. In the fragment shader OutputsWritten is a bitset of FRAG_RESULT_* enumerants, which represent the location of each color output written by the shader. The secondary and primary color outputs of a given render target using dual-source blending have the same location, so the 'idx' computation below will give the wrong bit as result if the 'var->data.index' term is non-zero -- E.g. if the shader writes the primary and secondary colors of the FRAG_RESULT_COLOR output, ir_set_program_inouts will think that the shader writes both FRAG_RESULT_COLOR and FRAG_RESULT_SAMPLE_MASK, which is just bogus. That would cause the brw_wm_prog_key::nr_color_regions computation done in the i965 driver during fragment shader precompilation to be wrong, which currently leads to unnecessary recompilation of shaders that use dual-source blending, and triggers an assertion failure in fs_visitor::emit_fb_writes() on my i965-fb-fetch branch. Reviewed-by: Ilia Mirkin --- src/compiler/glsl/ir_set_program_inouts.cpp | 2 +- src/mesa/state_tracker/st_program.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp index 06d9973..4f6c886 100644 --- a/src/compiler/glsl/ir_set_program_inouts.cpp +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -96,7 +96,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, for (int i = 0; i < len; i++) { assert(var->data.location != -1); - int idx = var->data.location + var->data.index + offset + i; + int idx = var->data.location + offset + i; bool is_patch_generic = var->data.patch && idx != VARYING_SLOT_TESS_LEVEL_INNER && idx != VARYING_SLOT_TESS_LEVEL_OUTER; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 429d0c9..2a4edfa 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -586,9 +586,7 @@ bool st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp) { - GLuint outputMapping[2 * FRAG_RESULT_MAX] = - { 0 /* XXX - Avoid temporary regression due to bogus OutputsWritten - * bitset. */ }; + GLuint outputMapping[2 * FRAG_RESULT_MAX]; GLuint inputMapping[VARYING_SLOT_MAX]; GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Fix incorrect hard-coded location of the gl_SecondaryFragColorEXT built-in.
Module: Mesa Branch: master Commit: 965934f38ab36b77672b70693b5b7b9c983f852b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=965934f38ab36b77672b70693b5b7b9c983f852b Author: Francisco Jerez Date: Thu Jun 23 00:05:37 2016 -0700 glsl: Fix incorrect hard-coded location of the gl_SecondaryFragColorEXT built-in. gl_SecondaryFragColorEXT should have the same location as gl_FragColor for the secondary fragment color to be replicated to all fragment outputs. The incorrect location of gl_SecondaryFragColorEXT would cause the linker to mark both FRAG_RESULT_COLOR and FRAG_RESULT_DATA0 as being written to, which isn't allowed by the spec and would ultimately lead to an assertion failure in fs_visitor::emit_fb_writes() on my i965-fb-fetch branch. This should also fix the code below for multiple dual-source-blended render targets, which no driver currently supports but we have plans to enable eventually in the i965 driver (the comment saying that no hardware will ever support it seems rather hilarious). Reviewed-by: Ilia Mirkin --- src/compiler/glsl/builtin_variables.cpp | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index d379de6..f4ddeb9 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -1147,13 +1147,8 @@ builtin_variable_generator::generate_fs_special_vars() } if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { - /* We make an assumption here that there will only ever be one dual-source draw buffer - * In case this assumption is ever proven to be false, make sure to assert here - * since we don't handle this case. - * In practice, this issue will never arise since no hardware will support it. - */ - assert(state->Const.MaxDualSourceDrawBuffers <= 1); - add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); + add_index_output(FRAG_RESULT_COLOR, 1, vec4_t, + "gl_SecondaryFragColorEXT"); add_index_output(FRAG_RESULT_DATA0, 1, array(vec4_t, state->Const.MaxDualSourceDrawBuffers), "gl_SecondaryFragDataEXT"); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Calculate bitset of secondary outputs written in ir_set_program_inouts.
Module: Mesa Branch: master Commit: cb4b38af41952c2e5ee77253592f0d0833aefd28 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb4b38af41952c2e5ee77253592f0d0833aefd28 Author: Francisco Jerez Date: Tue Aug 23 11:15:57 2016 -0700 glsl: Calculate bitset of secondary outputs written in ir_set_program_inouts. Reviewed-by: Ilia Mirkin --- src/compiler/glsl/ir_set_program_inouts.cpp | 9 +++-- src/mesa/main/mtypes.h | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp index fcfbcd4..06d9973 100644 --- a/src/compiler/glsl/ir_set_program_inouts.cpp +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -135,10 +135,14 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, prog->SystemValuesRead |= bitfield; } else { assert(var->data.mode == ir_var_shader_out); - if (is_patch_generic) + if (is_patch_generic) { prog->PatchOutputsWritten |= bitfield; - else if (!var->data.read_only) + } else if (!var->data.read_only) { prog->OutputsWritten |= bitfield; +if (var->data.index > 0) + prog->SecondaryOutputsWritten |= bitfield; + } + if (var->data.fb_fetch_output) prog->OutputsRead |= bitfield; } @@ -446,6 +450,7 @@ do_set_program_inouts(exec_list *instructions, struct gl_program *prog, prog->InputsRead = 0; prog->OutputsWritten = 0; + prog->SecondaryOutputsWritten = 0; prog->OutputsRead = 0; prog->PatchInputsRead = 0; prog->PatchOutputsWritten = 0; diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 4013ca7..09b84f1 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -1919,6 +1919,7 @@ struct gl_program GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */ GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are read and are doubles */ GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written */ + GLbitfield64 SecondaryOutputsWritten; /**< Subset of OutputsWritten outputs written with non-zero index. */ GLbitfield64 OutputsRead; /**< Bitmask of which output regs are read */ GLbitfield PatchInputsRead; /**< VAR[0..31] usage for patch inputs (user-defined only) */ GLbitfield PatchOutputsWritten; /**< VAR[0..31] usage for patch outputs (user-defined only) */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): st/glsl_to_tgsi: Use SecondaryOutputsWritten to determine dual-source fragment outputs.
Module: Mesa Branch: master Commit: 342f945b1320d588e61e4efe1ccc7852a3c8ad9f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=342f945b1320d588e61e4efe1ccc7852a3c8ad9f Author: Francisco Jerez Date: Tue Aug 23 11:18:19 2016 -0700 st/glsl_to_tgsi: Use SecondaryOutputsWritten to determine dual-source fragment outputs. Currently the mesa state tracker relies on there being two bits set per dual-source output in the gl_program::OutputsWritten bitset, but that only worked due to a GLSL front-end bug that caused it to set the OutputsWritten bit for both location and location+1 even though at the GLSL level the primary and secondary color outputs used for dual-source blending have the same location. Fix it by extending outputMapping[] to 2*FRAG_RESULT_MAX elements in order to represent a mapping from a (location, index) pair to its TGSI output, which should also make it slightly easier to add support for dual-source blending in combination with multiple render targets in the long run. No Piglit regressions on llvmpipe. Reviewed-by: Ilia Mirkin --- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 5 +++-- src/mesa/state_tracker/st_program.c| 18 -- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index b7e47db..507a782 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -2419,7 +2419,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) entry = new(mem_ctx) variable_storage(var, PROGRAM_OUTPUT, var->data.location - + var->data.index); + + FRAG_RESULT_MAX * +var->data.index); } this->variables.push_tail(entry); break; @@ -5367,7 +5368,7 @@ dst_register(struct st_translate *t, gl_register_file file, unsigned index, case PROGRAM_OUTPUT: if (!array_id) { if (t->procType == PIPE_SHADER_FRAGMENT) -assert(index < FRAG_RESULT_MAX); +assert(index < 2 * FRAG_RESULT_MAX); else if (t->procType == PIPE_SHADER_TESS_CTRL || t->procType == PIPE_SHADER_TESS_EVAL) assert(index < VARYING_SLOT_TESS_MAX); diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 03a685c..429d0c9 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -586,7 +586,9 @@ bool st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp) { - GLuint outputMapping[FRAG_RESULT_MAX]; + GLuint outputMapping[2 * FRAG_RESULT_MAX] = + { 0 /* XXX - Avoid temporary regression due to bogus OutputsWritten + * bitset. */ }; GLuint inputMapping[VARYING_SLOT_MAX]; GLuint inputSlotToAttr[VARYING_SLOT_MAX]; GLuint interpMode[PIPE_MAX_SHADER_INPUTS]; /* XXX size? */ @@ -810,9 +812,13 @@ st_translate_fragment_program(struct st_context *st, } /* handle remaining outputs (color) */ - for (attr = 0; attr < FRAG_RESULT_MAX; attr++) { - if (outputsWritten & BITFIELD64_BIT(attr)) { -switch (attr) { + for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) { + const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten : +stfp->Base.Base.SecondaryOutputsWritten; + const unsigned loc = attr % FRAG_RESULT_MAX; + + if (written & BITFIELD64_BIT(loc)) { +switch (loc) { case FRAG_RESULT_DEPTH: case FRAG_RESULT_STENCIL: case FRAG_RESULT_SAMPLE_MASK: @@ -822,8 +828,8 @@ st_translate_fragment_program(struct st_context *st, case FRAG_RESULT_COLOR: write_all = GL_TRUE; /* fallthrough */ default: - assert(attr == FRAG_RESULT_COLOR || - (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX)); + assert(loc == FRAG_RESULT_COLOR || + (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX)); fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; fs_output_semantic_index[fs_num_outputs] = numColors; outputMapping[attr] = fs_num_outputs; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): i965/fs: Assert that the number of color targets is one when dual-source blend is enabled.
Module: Mesa Branch: master Commit: 6df215d97eab6e18a8c70c9966014f6ab2bbc20a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6df215d97eab6e18a8c70c9966014f6ab2bbc20a Author: Francisco Jerez Date: Thu Aug 25 18:35:06 2016 -0700 i965/fs: Assert that the number of color targets is one when dual-source blend is enabled. Requested by Anuj during review of 4a87e4ade778e56d4c65a58752b15a00ce69, adding as follow-up since it led to assertion failures due to various GLSL bugs that should be fixed now. --- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index cfb5bb6..48b5f40 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -487,6 +487,7 @@ fs_visitor::emit_fb_writes() } prog_data->dual_src_blend = (this->dual_src_output.file != BAD_FILE); + assert(!prog_data->dual_src_blend || key->nr_color_regions == 1); if (inst == NULL) { /* Even if there's no color buffers enabled, we still need to send ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): 32 new commits
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=da85b5a9f1b22a8f6cae1a3b335dc5f31011bcb1 Author: Francisco Jerez Date: Fri Jul 22 15:52:49 2016 -0700 i965: Expose shader framebuffer fetch extensions on Gen9+. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4135fc22ff735a40c36fcf051c1735fe23d154f2 Author: Francisco Jerez Date: Thu Aug 18 22:12:37 2016 -0700 i965/fs: Hook up coherent framebuffer reads to the NIR front-end. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=be12a1f36efcdd4628f199d4e11b01cc06787e8a Author: Francisco Jerez Date: Thu Jul 21 16:56:05 2016 -0700 i965/fs: Remove special casing of framebuffer writes in scheduler code. The reason why it was safe for the scheduler to ignore the side effects of framebuffer write instructions was that its side effects couldn't have had any influence on any other instruction in the program, because we weren't doing framebuffer reads, and framebuffer writes were always non-overlapping. We need actual memory dependency analysis in order to determine whether a side-effectful instruction can be reordered with respect to other instructions in the program. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3daa0fae4b39a271f50f473edbe44712b6c8f040 Author: Francisco Jerez Date: Wed Jul 6 20:49:58 2016 -0700 i965/fs: Don't CSE render target messages with different target index. We weren't checking the fs_inst::target field when comparing whether two instructions are equal. For FB writes it doesn't matter because they aren't CSE-able anyway, but this would have become a problem with FB reads which are expression-like instructions. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=db123df74773f458e573a9c034ee783570a3ed0f Author: Francisco Jerez Date: Thu Jul 21 16:55:45 2016 -0700 i965/fs: Define logical framebuffer read opcode and lower it to physical reads. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2f75b0cf05d2519d618c71b19d2187b8ed0d545 Author: Francisco Jerez Date: Thu Jul 21 16:52:33 2016 -0700 i965/fs: Define framebuffer read virtual opcode. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=71d639f69ee868fbeadd0a1b8bbdd76e17398b43 Author: Francisco Jerez Date: Tue Jul 19 11:52:23 2016 -0700 i965/disasm: Fix RC message type strings on Gen7+. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=26ac16fe2f73507041062f63646286dea60053da Author: Francisco Jerez Date: Thu Jul 21 19:13:55 2016 -0700 i965/eu: Add codegen support for the Gen9+ render target read message. Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29eb8059fd7906d2595ea99bc65a27691b9fbe53 Author: Francisco Jerez Date: Thu Jul 21 18:49:36 2016 -0700 i965/eu: Take into account the target cache argument in brw_set_dp_read_message. brw_set_dp_read_message() was setting the data cache as send message SFID on Gen7+ hardware, ignoring the target cache specified by the caller. Some of the callers were passing a bogus target cache value as argument relying on brw_set_dp_read_message not to take it into account. Fix them too. Reviewed-by: Iago Toral Quiroga Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a2f19a7772c80fcac85d6bdfa8e588d6cea1beb Author: Francisco Jerez Date: Tue Jul 19 15:23:30 2016 -0700 i965: Flip the non-coherent framebuffer fetch extension bit on G45-Gen8 hardware. This is not enabled on the original Gen4 part because it lacks surface state tile offsets so it may not be possible to sample from arbitrary non-zero layers of the framebuffer depending on the miptree layout (it should be possible to work around this by allocating a scratch surface and doing the same hack currently used for render targets, but meh...). On Gen9+ even though it should mostly work (feel free to force-enable it in order to compare the coherent and non-coherent paths in terms of performance), there are some corner cases like 1D array layered framebuffers that cannot be handled easily by the non-coherent path because of the incompatible layout in memory of 1D and 2D miptrees (it should be possible to work around this too by doing state-dependent recompiles, but it's hard to care enough since Gen9 has native support for coherent render target reads...) Reviewed-by: Kenneth Graunke URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecc4800383fb67cd274154469d933c6050782208 Author: Francisco Jer
Mesa (master): mesa: Add support for querying GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT.
Module: Mesa Branch: master Commit: 642aa58577bb0064c86fdd1a261a76a131886f06 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=642aa58577bb0064c86fdd1a261a76a131886f06 Author: Francisco Jerez Date: Tue Jul 5 21:28:11 2016 -0700 mesa: Add support for querying GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT. This can currently only give true as result since the only way you can expose EXT_shader_framebuffer_fetch right now is by flipping the MESA_shader_framebuffer_fetch bit, but that could potentially change in the future, see [1] for an explanation. [1] https://lists.freedesktop.org/archives/mesa-dev/2016-July/124028.html Reviewed-by: Kenneth Graunke --- src/mesa/main/get.c | 7 +++ src/mesa/main/get_hash_params.py | 4 src/mesa/main/glheader.h | 3 +++ 3 files changed, 14 insertions(+) diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c index 8cb0cc7..b017827 100644 --- a/src/mesa/main/get.c +++ b/src/mesa/main/get.c @@ -530,6 +530,13 @@ static const int extra_core_ARB_color_buffer_float_and_new_buffers[] = { EXTRA_END }; +static const int extra_EXT_shader_framebuffer_fetch[] = { + EXTRA_API_ES2, + EXTRA_API_ES3, + EXT(MESA_shader_framebuffer_fetch), + EXTRA_END +}; + /* This is the big table describing all the enums we accept in * glGet*v(). The table is partitioned into six parts: enums * understood by all GL APIs (OpenGL, GLES and GLES2), enums shared diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index cd8e47f..89d164d 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -424,6 +424,10 @@ descriptor=[ ]}, { "apis": ["GLES", "GLES2"], "params": [ +# GL_EXT_shader_framebuffer_fetch. Should be true if the MESA framebuffer +# fetch extension is supported since the latter imposes no restrictions on +# non-uniform per-sample discard. + [ "FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT", "CONTEXT_BOOL(Extensions.MESA_shader_framebuffer_fetch), extra_EXT_shader_framebuffer_fetch" ], # GL_OES_EGL_image_external [ "TEXTURE_BINDING_EXTERNAL_OES", "LOC_CUSTOM, TYPE_INT, TEXTURE_EXTERNAL_INDEX, extra_OES_EGL_image_external" ], [ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, extra_OES_EGL_image_external" ], diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h index 40fada1..3f2a923 100644 --- a/src/mesa/main/glheader.h +++ b/src/mesa/main/glheader.h @@ -140,6 +140,9 @@ typedef void *GLeglImageOES; #define GL_ETC1_RGB8_OES0x8D64 #endif +#ifndef GL_EXT_shader_framebuffer_fetch +#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52 +#endif /* Inexplicably, GL_HALF_FLOAT_OES has a different value than GL_HALF_FLOAT. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Add support for representing framebuffer fetch in the GLSL IR.
Module: Mesa Branch: master Commit: b49d8f20f43ec429e6c17e7d92c7c2d3f926ee5e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b49d8f20f43ec429e6c17e7d92c7c2d3f926ee5e Author: Francisco Jerez Date: Tue Jul 19 20:07:47 2016 -0700 glsl: Add support for representing framebuffer fetch in the GLSL IR. The GLSL IR representation of framebuffer fetch amounts to a single bit in the ir_variable object applicable to fragment shader outputs. The flag indicates that the variable will be implicitly initialized to the previous contents of the render buffer at the same fragment coordinates and sample index. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/ir.cpp | 1 + src/compiler/glsl/ir.h | 8 2 files changed, 9 insertions(+) diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp index 2aa4aff..4dadfd2 100644 --- a/src/compiler/glsl/ir.cpp +++ b/src/compiler/glsl/ir.cpp @@ -1686,6 +1686,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name, this->data.image_volatile = false; this->data.image_restrict = false; this->data.from_ssbo_unsized_array = false; + this->data.fb_fetch_output = false; if (type != NULL) { if (type->base_type == GLSL_TYPE_SAMPLER) diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h index 68e774c..5e1e9bf 100644 --- a/src/compiler/glsl/ir.h +++ b/src/compiler/glsl/ir.h @@ -831,6 +831,14 @@ public: unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */ unsigned implicit_sized_array:1; + + /** + * Whether this is a fragment shader output implicitly initialized with + * the previous contents of the specified render target at the + * framebuffer location corresponding to this shader invocation. + */ + unsigned fb_fetch_output:1; + /** * Emit a warning if this variable is accessed. */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl/linker: Allow fragment output overlap for gl_LastFragData.
Module: Mesa Branch: master Commit: 913ae618c6bdb42366f4d87265a6e35a88656e70 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=913ae618c6bdb42366f4d87265a6e35a88656e70 Author: Francisco Jerez Date: Thu Jul 14 12:57:14 2016 -0700 glsl/linker: Allow fragment output overlap for gl_LastFragData. gl_LastFragData overlaps gl_FragData by definition. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/linker.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp index a1a65ef..4b404ff 100644 --- a/src/compiler/glsl/linker.cpp +++ b/src/compiler/glsl/linker.cpp @@ -2673,6 +2673,9 @@ assign_attribute_or_color_locations(void *mem_ctx, } } + if (strcmp(var->name, "gl_LastFragData") == 0) + continue; + /* From GL4.5 core spec, section 15.2 (Shader Execution): * * "Output binding assignments will cause LinkProgram to fail: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Handle the inout qualifier in fragment shader output declarations.
Module: Mesa Branch: master Commit: 19e929a1774938cb826f68592dc87c520d048597 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=19e929a1774938cb826f68592dc87c520d048597 Author: Francisco Jerez Date: Tue Jul 19 20:10:21 2016 -0700 glsl: Handle the inout qualifier in fragment shader output declarations. According to the EXT_shader_framebuffer_fetch extension the inout qualifier can be used on ESSL 3.0+ shaders to declare a special kind of fragment output that gets implicitly initialized with the previous framebuffer contents at the current fragment coordinates. In addition we allow using the same language to define FB fetch outputs in GLSL 1.3+ shaders in preparation for the desktop MESA_shader_framebuffer_fetch extensions. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/ast_to_hir.cpp | 5 - src/compiler/glsl/glsl_parser.yy | 12 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp index c05fb17..c91ed53 100644 --- a/src/compiler/glsl/ast_to_hir.cpp +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -3713,7 +3713,7 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, */ assert(var->data.mode != ir_var_temporary); if (qual->flags.q.in && qual->flags.q.out) - var->data.mode = ir_var_function_inout; + var->data.mode = is_parameter ? ir_var_function_inout : ir_var_shader_out; else if (qual->flags.q.in) var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in; else if (qual->flags.q.attribute @@ -3730,6 +3730,9 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, else if (qual->flags.q.shared_storage) var->data.mode = ir_var_shader_shared; + var->data.fb_fetch_output = state->stage == MESA_SHADER_FRAGMENT && + qual->flags.q.in && qual->flags.q.out; + if (!is_parameter && is_varying_var(var, state->stage)) { /* User-defined ins/outs are not permitted in compute shaders. */ if (state->stage == MESA_SHADER_COMPUTE) { diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy index 5b65861..f2853da 100644 --- a/src/compiler/glsl/glsl_parser.yy +++ b/src/compiler/glsl/glsl_parser.yy @@ -1944,6 +1944,18 @@ storage_qualifier: $$.xfb_buffer = state->out_qualifier->xfb_buffer; } } + | INOUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + $$.flags.q.out = 1; + + if (!state->has_framebuffer_fetch() || + !state->is_version(130, 300) || + state->stage != MESA_SHADER_FRAGMENT) + _mesa_glsl_error(&@1, state, "A single interface variable cannot be " + "declared as both input and output"); + } | UNIFORM { memset(& $$, 0, sizeof($$)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Move shader memory barrier functions into barrier.c.
Module: Mesa Branch: master Commit: 6a976bbf84c9c8790fa61bbeb5eb24a2e646c76c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a976bbf84c9c8790fa61bbeb5eb24a2e646c76c Author: Francisco Jerez Date: Tue Jul 5 23:18:18 2016 -0700 mesa: Move shader memory barrier functions into barrier.c. Reviewed-by: Kenneth Graunke --- src/mesa/main/barrier.c | 51 + src/mesa/main/barrier.h | 6 ++ src/mesa/main/shaderimage.c | 51 - src/mesa/main/shaderimage.h | 6 -- 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c index beb48fb..7ae8fc6 100644 --- a/src/mesa/main/barrier.c +++ b/src/mesa/main/barrier.c @@ -57,3 +57,54 @@ _mesa_TextureBarrierNV(void) ctx->Driver.TextureBarrier(ctx); } + +void GLAPIENTRY +_mesa_MemoryBarrier(GLbitfield barriers) +{ + GET_CURRENT_CONTEXT(ctx); + + if (ctx->Driver.MemoryBarrier) + ctx->Driver.MemoryBarrier(ctx, barriers); +} + +void GLAPIENTRY +_mesa_MemoryBarrierByRegion(GLbitfield barriers) +{ + GET_CURRENT_CONTEXT(ctx); + + GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT | + GL_FRAMEBUFFER_BARRIER_BIT | + GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | + GL_SHADER_STORAGE_BARRIER_BIT | + GL_TEXTURE_FETCH_BARRIER_BIT | + GL_UNIFORM_BARRIER_BIT; + + if (ctx->Driver.MemoryBarrier) { + /* From section 7.11.2 of the OpenGL ES 3.1 specification: + * + *"When barriers is ALL_BARRIER_BITS, shader memory accesses will be + * synchronized relative to all these barrier bits, but not to other + * barrier bits specific to MemoryBarrier." + * + * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then all + * barriers allowed by glMemoryBarrierByRegion should be activated." + */ + if (barriers == GL_ALL_BARRIER_BITS) { + ctx->Driver.MemoryBarrier(ctx, all_allowed_bits); + return; + } + + /* From section 7.11.2 of the OpenGL ES 3.1 specification: + * + *"An INVALID_VALUE error is generated if barriers is not the special + * value ALL_BARRIER_BITS, and has any bits set other than those + * described above." + */ + if ((barriers & ~all_allowed_bits) != 0) { + _mesa_error(ctx, GL_INVALID_VALUE, + "glMemoryBarrierByRegion(unsupported barrier bit"); + } + + ctx->Driver.MemoryBarrier(ctx, barriers); + } +} diff --git a/src/mesa/main/barrier.h b/src/mesa/main/barrier.h index 0652d14..8eee583 100644 --- a/src/mesa/main/barrier.h +++ b/src/mesa/main/barrier.h @@ -41,4 +41,10 @@ _mesa_init_barrier_functions(struct dd_function_table *driver); extern void GLAPIENTRY _mesa_TextureBarrierNV(void); +void GLAPIENTRY +_mesa_MemoryBarrier(GLbitfield barriers); + +void GLAPIENTRY +_mesa_MemoryBarrierByRegion(GLbitfield barriers); + #endif /* BARRIER_H */ diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c index 90643c4..db36e3b 100644 --- a/src/mesa/main/shaderimage.c +++ b/src/mesa/main/shaderimage.c @@ -753,54 +753,3 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const GLuint *textures) _mesa_end_texture_lookups(ctx); } - -void GLAPIENTRY -_mesa_MemoryBarrier(GLbitfield barriers) -{ - GET_CURRENT_CONTEXT(ctx); - - if (ctx->Driver.MemoryBarrier) - ctx->Driver.MemoryBarrier(ctx, barriers); -} - -void GLAPIENTRY -_mesa_MemoryBarrierByRegion(GLbitfield barriers) -{ - GET_CURRENT_CONTEXT(ctx); - - GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT | - GL_FRAMEBUFFER_BARRIER_BIT | - GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | - GL_SHADER_STORAGE_BARRIER_BIT | - GL_TEXTURE_FETCH_BARRIER_BIT | - GL_UNIFORM_BARRIER_BIT; - - if (ctx->Driver.MemoryBarrier) { - /* From section 7.11.2 of the OpenGL ES 3.1 specification: - * - *"When barriers is ALL_BARRIER_BITS, shader memory accesses will be - * synchronized relative to all these barrier bits, but not to other - * barrier bits specific to MemoryBarrier." - * - * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then all - * barriers allowed by glMemoryBarrierByRegion should be activated." - */ - if (barriers == GL_ALL_BARRIER_BITS) { - ctx->Driver.MemoryBarrier(ctx, all_allowed_bits); - return; - } - - /* From section 7.11.2 of the OpenGL ES 3.1 specification: - * - *"An INVALID_VALUE error is g
Mesa (master): glsl: Don' t consider read-only fragment outputs to be written to.
Module: Mesa Branch: master Commit: 711213fb7226f25a7da4962aa7526d7265d38356 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=711213fb7226f25a7da4962aa7526d7265d38356 Author: Francisco Jerez Date: Tue Jul 19 20:29:55 2016 -0700 glsl: Don't consider read-only fragment outputs to be written to. Since they cannot be written. This prevents adding fragment outputs to the OutputsWritten set that are only read from via the gl_LastFragData array but never written to. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/ir_set_program_inouts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp index 060bea8..a6475b5 100644 --- a/src/compiler/glsl/ir_set_program_inouts.cpp +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -137,7 +137,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, int len, assert(var->data.mode == ir_var_shader_out); if (is_patch_generic) prog->PatchOutputsWritten |= bitfield; - else + else if (!var->data.read_only) prog->OutputsWritten |= bitfield; } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Define a gl_LastFragData built-in for older GLSL versions.
Module: Mesa Branch: master Commit: 6b33eab959433fdcb4f3fce7c571a83e8050cdf0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b33eab959433fdcb4f3fce7c571a83e8050cdf0 Author: Francisco Jerez Date: Tue Jul 19 20:11:53 2016 -0700 glsl: Define a gl_LastFragData built-in for older GLSL versions. The EXT_shader_framebuffer_fetch extension defines alternative language for GLES2 shaders where user-defined fragment outputs are not allowed. Instead of using inout user-defined fragment outputs the shader is expected to read from the gl_LastFragData built-in array. In addition this allows using the same language on desktop GLSL versions prior to 4.2 that support the deprecated gl_FragData built-in in preparation for the MESA_shader_framebuffer_fetch desktop GL extension. Both legacy and user-defined inout outputs have a common representation at the GLSL IR level, so it shouldn't make any difference for optimization passes and back-ends whether the application is using gl_LastFragData or user-defined outputs, all they'll see is a variable dereference of a fragment output at a certain interface location with the fb_fetch_output bit set to one. v2: Don't define the built-in variable on GLSL versions for which gl_FragData exists but is deprecated. (Ken) Reviewed-by: Kenneth Graunke --- src/compiler/glsl/builtin_variables.cpp | 10 ++ 1 file changed, 10 insertions(+) diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp index c9d8b1c..cb5f730 100644 --- a/src/compiler/glsl/builtin_variables.cpp +++ b/src/compiler/glsl/builtin_variables.cpp @@ -1134,6 +1134,16 @@ builtin_variable_generator::generate_fs_special_vars() array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); } + if (state->has_framebuffer_fetch() && !state->is_version(130, 300)) { + ir_variable *const var = + add_output(FRAG_RESULT_DATA0, +array(vec4_t, state->Const.MaxDrawBuffers), +"gl_LastFragData"); + var->data.precision = GLSL_PRECISION_MEDIUM; + var->data.read_only = 1; + var->data.fb_fetch_output = 1; + } + if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { /* We make an assumption here that there will only ever be one dual-source draw buffer * In case this assumption is ever proven to be false, make sure to assert here ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): glsl: Add parser state enables for the framebuffer fetch extensions.
Module: Mesa Branch: master Commit: d7cd7b9c49ab01b954702783493fe22cd2bb38f1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7cd7b9c49ab01b954702783493fe22cd2bb38f1 Author: Francisco Jerez Date: Mon Jul 25 17:24:52 2016 -0700 glsl: Add parser state enables for the framebuffer fetch extensions. Reviewed-by: Kenneth Graunke --- src/compiler/glsl/glsl_parser_extras.cpp | 1 + src/compiler/glsl/glsl_parser_extras.h | 13 + 2 files changed, 14 insertions(+) diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp index 14a5540..a185759 100644 --- a/src/compiler/glsl/glsl_parser_extras.cpp +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -652,6 +652,7 @@ static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { EXT(EXT_clip_cull_distance), EXT(EXT_gpu_shader5), EXT(EXT_separate_shader_objects), + EXT(EXT_shader_framebuffer_fetch), EXT(EXT_shader_integer_mix), EXT(EXT_shader_io_blocks), EXT(EXT_shader_samples_identical), diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h index 0294ef7..3311688 100644 --- a/src/compiler/glsl/glsl_parser_extras.h +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -308,6 +308,13 @@ struct _mesa_glsl_parse_state { is_version(450, 0); } + bool has_framebuffer_fetch() const + { + return EXT_shader_framebuffer_fetch_enable || + MESA_shader_framebuffer_fetch_enable || + MESA_shader_framebuffer_fetch_non_coherent_enable; + } + void process_version_directive(YYLTYPE *locp, int version, const char *ident); @@ -696,6 +703,8 @@ struct _mesa_glsl_parse_state { bool EXT_gpu_shader5_warn; bool EXT_separate_shader_objects_enable; bool EXT_separate_shader_objects_warn; + bool EXT_shader_framebuffer_fetch_enable; + bool EXT_shader_framebuffer_fetch_warn; bool EXT_shader_integer_mix_enable; bool EXT_shader_integer_mix_warn; bool EXT_shader_io_blocks_enable; @@ -710,6 +719,10 @@ struct _mesa_glsl_parse_state { bool EXT_texture_array_warn; bool EXT_texture_buffer_enable; bool EXT_texture_buffer_warn; + bool MESA_shader_framebuffer_fetch_enable; + bool MESA_shader_framebuffer_fetch_warn; + bool MESA_shader_framebuffer_fetch_non_coherent_enable; + bool MESA_shader_framebuffer_fetch_non_coherent_warn; bool MESA_shader_integer_functions_enable; bool MESA_shader_integer_functions_warn; /*@}*/ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-commit