from:"\"Francisco Jerez\""

Mesa (master): i965: Use intel_bufferobj_buffer() wrapper in image surface state setup.

2018-05-23 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 936cd3c87a212c28fe89a5c059fc4febd8b52ab7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=936cd3c87a212c28fe89a5c059fc4febd8b52ab7

Author: Francisco Jerez 
Date:   Fri Mar 16 14:28:59 2018 -0700

i965: Use intel_bufferobj_buffer() wrapper in image surface state setup.

Instead of directly using intel_obj->buffer.  Among other things
intel_bufferobj_buffer() will update intel_buffer_object::
gpu_active_start/end, which are used by glBufferSubData() to decide
which path to take.  Fixes a failure in the Piglit
ARB_shader_image_load_store-host-mem-barrier Buffer Update/WaW tests,
which could be reproduced with a non-standard glGetTexSubImage
implementation (see bug report).

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105351
Reported-by: Nanley Chery 
Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Nanley Chery 

---

 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 39e898243d..73cae9ef7c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1520,14 +1520,16 @@ update_image_surface(struct brw_context *brw,
   const unsigned format = get_image_format(brw, u->_ActualFormat, access);
 
   if (obj->Target == GL_TEXTURE_BUFFER) {
- struct intel_buffer_object *intel_obj =
-intel_buffer_object(obj->BufferObject);
  const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
   
_mesa_get_format_bytes(u->_ActualFormat));
  const unsigned buffer_size = buffer_texture_range_size(brw, obj);
+ struct brw_bo *const bo = !obj->BufferObject ? NULL :
+intel_bufferobj_buffer(brw, intel_buffer_object(obj->BufferObject),
+   obj->BufferOffset, buffer_size,
+   access != GL_READ_ONLY);
 
  brw_emit_buffer_surface_state(
-brw, surf_offset, intel_obj->buffer, obj->BufferOffset,
+brw, surf_offset, bo, obj->BufferOffset,
 format, buffer_size, texel_size,
 access != GL_READ_ONLY ? RELOC_WRITE : 0);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: Handle non-zero texture buffer offsets in buffer object range calculation.

2018-05-23 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e989acb03ba802737f762627dd16ac1d0b9f0d13
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e989acb03ba802737f762627dd16ac1d0b9f0d13

Author: Francisco Jerez 
Date:   Fri Mar 16 14:35:10 2018 -0700

i965: Handle non-zero texture buffer offsets in buffer object range calculation.

Otherwise the specified surface state will allow the GPU to access
memory up to BufferOffset bytes past the end of the buffer.  Found by
inspection.

v2: Protect against out-of-range BufferOffset (Nanley).
Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Nanley Chery 

---

 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index af629a17bf..39e898243d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -647,6 +647,7 @@ buffer_texture_range_size(struct brw_context *brw,
const unsigned texel_size = 
_mesa_get_format_bytes(obj->_BufferObjectFormat);
const unsigned buffer_size = (!obj->BufferObject ? 0 :
  obj->BufferObject->Size);
+   const unsigned buffer_offset = MIN2(buffer_size, obj->BufferOffset);
 
/* The ARB_texture_buffer_specification says:
 *
@@ -664,7 +665,8 @@ buffer_texture_range_size(struct brw_context *brw,
 * so that when ISL divides by stride to obtain the number of texels, that
 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 */
-   return MIN3((unsigned)obj->BufferSize, buffer_size,
+   return MIN3((unsigned)obj->BufferSize,
+   buffer_size - buffer_offset,
brw->ctx.Const.MaxTextureBufferSize * texel_size);
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: Move buffer texture size calculation into a common helper function.

2018-05-23 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 156d2c6e621d836c4d45c636b87669e1de3d4464
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=156d2c6e621d836c4d45c636b87669e1de3d4464

Author: Francisco Jerez 
Date:   Fri Mar 16 13:06:26 2018 -0700

i965: Move buffer texture size calculation into a common helper function.

The buffer texture size calculations (should be easy enough, right?)
are repeated in three different places, each of them subtly broken in
a different way.  E.g. the image load/store path was never fixed to
clamp to MaxTextureBufferSize, and none of them are taking into
account the buffer offset correctly.  It's easier to fix it all in one
place.

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106481
Reviewed-by: Nanley Chery 

---

 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 55 ++--
 1 file changed, 32 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 67438b0f7e..af629a17bf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -639,26 +639,14 @@ brw_emit_buffer_surface_state(struct brw_context *brw,
  .mocs = brw_get_bo_mocs(devinfo, bo));
 }
 
-void
-brw_update_buffer_texture_surface(struct gl_context *ctx,
-  unsigned unit,
-  uint32_t *surf_offset)
+static unsigned
+buffer_texture_range_size(struct brw_context *brw,
+  struct gl_texture_object *obj)
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
-   struct intel_buffer_object *intel_obj =
-  intel_buffer_object(tObj->BufferObject);
-   uint32_t size = tObj->BufferSize;
-   struct brw_bo *bo = NULL;
-   mesa_format format = tObj->_BufferObjectFormat;
-   const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
-   int texel_size = _mesa_get_format_bytes(format);
-
-   if (intel_obj) {
-  size = MIN2(size, intel_obj->Base.Size);
-  bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
-  false);
-   }
+   assert(obj->Target == GL_TEXTURE_BUFFER);
+   const unsigned texel_size = 
_mesa_get_format_bytes(obj->_BufferObjectFormat);
+   const unsigned buffer_size = (!obj->BufferObject ? 0 :
+ obj->BufferObject->Size);
 
/* The ARB_texture_buffer_specification says:
 *
@@ -676,7 +664,28 @@ brw_update_buffer_texture_surface(struct gl_context *ctx,
 * so that when ISL divides by stride to obtain the number of texels, that
 * texel count is clamped to MAX_TEXTURE_BUFFER_SIZE.
 */
-   size = MIN2(size, ctx->Const.MaxTextureBufferSize * (unsigned) texel_size);
+   return MIN3((unsigned)obj->BufferSize, buffer_size,
+   brw->ctx.Const.MaxTextureBufferSize * texel_size);
+}
+
+void
+brw_update_buffer_texture_surface(struct gl_context *ctx,
+  unsigned unit,
+  uint32_t *surf_offset)
+{
+   struct brw_context *brw = brw_context(ctx);
+   struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
+   struct intel_buffer_object *intel_obj =
+  intel_buffer_object(tObj->BufferObject);
+   const unsigned size = buffer_texture_range_size(brw, tObj);
+   struct brw_bo *bo = NULL;
+   mesa_format format = tObj->_BufferObjectFormat;
+   const enum isl_format isl_format = brw_isl_format_for_mesa_format(format);
+   int texel_size = _mesa_get_format_bytes(format);
+
+   if (intel_obj)
+  bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size,
+  false);
 
if (isl_format == ISL_FORMAT_UNSUPPORTED) {
   _mesa_problem(NULL, "bad format %s for texture buffer\n",
@@ -1477,8 +1486,7 @@ update_buffer_image_param(struct brw_context *brw,
   unsigned surface_idx,
   struct brw_image_param *param)
 {
-   struct gl_buffer_object *obj = u->TexObj->BufferObject;
-   const uint32_t size = MIN2((uint32_t)u->TexObj->BufferSize, obj->Size);
+   const unsigned size = buffer_texture_range_size(brw, u->TexObj);
update_default_image_param(brw, u, surface_idx, param);
 
param->size[0] = size / _mesa_get_format_bytes(u->_ActualFormat);
@@ -1514,10 +1522,11 @@ update_image_surface(struct brw_context *brw,
 intel_buffer_object(obj->BufferObject);
  const unsigned texel_size = (format == ISL_FORMAT_RAW ? 1 :
   
_mesa_get_format_bytes(u->_ActualFormat));
+ const unsigned buffer_size = buffer_texture_range_size(brw, obj);
 
  brw_emit_buffer_surface_state(

Mesa (master): Revert "mesa: simplify _mesa_is_image_unit_valid for buffers"

2018-05-23 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 5a6814780322988a7adee525899bca8a83907ab7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5a6814780322988a7adee525899bca8a83907ab7

Author: Francisco Jerez 
Date:   Fri Mar 16 13:43:27 2018 -0700

Revert "mesa: simplify _mesa_is_image_unit_valid for buffers"

This reverts commit c0ed52f6146c7e24e1275451773bd47c1eda3145.  It was
preventing the image format validation from being done on buffer
textures, which is required to ensure that the application doesn't
attempt to bind a buffer texture with an internal format incompatible
with the image unit format (e.g. of different texel size), which is
not allowed by the spec (it's not allowed for *any* texture target,
whether or not there is spec wording restricting this behavior
specifically for buffer textures) and will cause the driver to
calculate texel bounds incorrectly and potentially crash instead of
the expected behavior.

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Marek Olšák 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106465
Reviewed-by: Nanley Chery 

---

 src/mesa/main/shaderimage.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index feff8ccd91..31ac852d37 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -478,13 +478,6 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, struct 
gl_image_unit *u)
if (!t)
   return GL_FALSE;
 
-   /* The GL 4.5 Core spec doesn't say anything about buffers. In practice,
-* the image buffer format is always compatible with the underlying
-* buffer storage.
-*/
-   if (t->Target == GL_TEXTURE_BUFFER)
-  return GL_TRUE;
-
if (!t->_BaseComplete && !t->_MipmapComplete)
_mesa_test_texobj_completeness(ctx, t);
 
@@ -498,14 +491,20 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, struct 
gl_image_unit *u)
u->_Layer >= _mesa_get_texture_layers(t, u->Level))
   return GL_FALSE;
 
-   struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ?
-   t->Image[u->_Layer][u->Level] :
-   t->Image[0][u->Level]);
+   if (t->Target == GL_TEXTURE_BUFFER) {
+  tex_format = _mesa_get_shader_image_format(t->BufferObjectFormat);
 
-   if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples)
-  return GL_FALSE;
+   } else {
+  struct gl_texture_image *img = (t->Target == GL_TEXTURE_CUBE_MAP ?
+  t->Image[u->_Layer][u->Level] :
+  t->Image[0][u->Level]);
+
+  if (!img || img->Border || img->NumSamples > ctx->Const.MaxImageSamples)
+ return GL_FALSE;
+
+  tex_format = _mesa_get_shader_image_format(img->InternalFormat);
+   }
 
-   tex_format = _mesa_get_shader_image_format(img->InternalFormat);
if (!tex_format)
   return GL_FALSE;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/compiler: Memory fence commit must always be enabled for gen10+

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 56dc9f9f49638e0769d6bc696ff7f5dafccec9fc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=56dc9f9f49638e0769d6bc696ff7f5dafccec9fc

Author: Anuj Phogat 
Date:   Tue Feb  6 17:09:09 2018 -0800

intel/compiler: Memory fence commit must always be enabled for gen10+

Commit bit in the message descriptor (Bit 13) must be always set
to true in CNL+ for memory fence messages. It also fixes a piglit
GPU hang on cnl+ in simulation environment.
Piglit test: arb_shader_image_load_store-shader-mem-barrier
See HSD ES # 1404612949

Signed-off-by: Anuj Phogat 
Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Francisco Jerez 

---

 src/intel/compiler/brw_eu_emit.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 44abede16b..f8102e014e 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3287,7 +3287,9 @@ brw_memory_fence(struct brw_codegen *p,
  struct brw_reg dst)
 {
const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable = devinfo->gen == 7 && !devinfo->is_haswell;
+   const bool commit_enable =
+  devinfo->gen >= 10 || /* HSD ES # 1404612949 */
+  (devinfo->gen == 7 && !devinfo->is_haswell);
struct brw_inst *insn;
 
brw_push_insn_state(p);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Handle surface opcode sample masks via predication.

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: c063e88909e630bb4605037eb0fc072f40f8c2a2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c063e88909e630bb4605037eb0fc072f40f8c2a2

Author: Francisco Jerez 
Date:   Tue Dec 12 12:05:04 2017 -0800

intel/fs: Handle surface opcode sample masks via predication.

The main motivation is to enable HDC surface opcodes on ICL which no
longer allows the sample mask to be provided in a message header, but
this is enabled all the way back to IVB when possible because it
decreases the instruction count of some shaders using HDC messages
significantly, e.g. one of the SynMark2 CSDof compute shaders
decreases instruction count by about 40% due to the removal of header
setup boilerplate which in turn makes a number of send message
payloads more easily CSE-able.  Shader-db results on SKL:

 total instructions in shared programs: 15325319 -> 15314384 (-0.07%)
 instructions in affected programs: 311532 -> 300597 (-3.51%)
 helped: 491
 HURT: 1

Shader-db results on BDW where the optimization needs to be disabled
in some cases due to hardware restrictions:

 total instructions in shared programs: 15604794 -> 15598028 (-0.04%)
 instructions in affected programs: 220863 -> 214097 (-3.06%)
 helped: 351
 HURT: 0

The FPS of SynMark2 CSDof improves by 5.09% ±0.36% (n=10) on my SKL
laptop with this change.  According to Eero this improves performance
of the same test by 9% on BYT and by 7-8% on BXT J4205 and on SKL GT2
desktop.

Reviewed-by: Kenneth Graunke 
Tested-By: Eero Tamminen 

---

 src/intel/compiler/brw_fs.cpp | 43 ++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index c255a3b23b..b1e1d98f6e 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4460,6 +4460,8 @@ static void
 lower_surface_logical_send(const fs_builder &bld, fs_inst *inst, opcode op,
const fs_reg &sample_mask)
 {
+   const gen_device_info *devinfo = bld.shader->devinfo;
+
/* Get the logical send arguments. */
const fs_reg &addr = inst->src[0];
const fs_reg &src = inst->src[1];
@@ -4470,7 +4472,20 @@ lower_surface_logical_send(const fs_builder &bld, 
fs_inst *inst, opcode op,
/* Calculate the total number of components of the payload. */
const unsigned addr_sz = inst->components_read(0);
const unsigned src_sz = inst->components_read(1);
-   const unsigned header_sz = (sample_mask.file == BAD_FILE ? 0 : 1);
+   /* From the BDW PRM Volume 7, page 147:
+*
+*  "For the Data Cache Data Port*, the header must be present for the
+*   following message types: [...] Typed read/write/atomics"
+*
+* Earlier generations have a similar wording.  Because of this restriction
+* we don't attempt to implement sample masks via predication for such
+* messages prior to Gen9, since we have to provide a header anyway.  On
+* Gen11+ the header has been removed so we can only use predication.
+*/
+   const unsigned header_sz = devinfo->gen < 9 &&
+  (op == SHADER_OPCODE_TYPED_SURFACE_READ ||
+   op == SHADER_OPCODE_TYPED_SURFACE_WRITE ||
+   op == SHADER_OPCODE_TYPED_ATOMIC) ? 1 : 0;
const unsigned sz = header_sz + addr_sz + src_sz;
 
/* Allocate space for the payload. */
@@ -4490,6 +4505,32 @@ lower_surface_logical_send(const fs_builder &bld, 
fs_inst *inst, opcode op,
 
bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
 
+   /* Predicate the instruction on the sample mask if no header is
+* provided.
+*/
+   if (!header_sz && sample_mask.file != BAD_FILE &&
+   sample_mask.file != IMM) {
+  const fs_builder ubld = bld.group(1, 0).exec_all();
+  if (inst->predicate) {
+ assert(inst->predicate == BRW_PREDICATE_NORMAL);
+ assert(!inst->predicate_inverse);
+ assert(inst->flag_subreg < 2);
+ /* Combine the sample mask with the existing predicate by using a
+  * vertical predication mode.
+  */
+ inst->predicate = BRW_PREDICATE_ALIGN1_ALLV;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg + 2),
+ sample_mask.type),
+  sample_mask);
+  } else {
+ inst->flag_subreg = 2;
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = false;
+ ubld.MOV(retype(brw_flag_subreg(inst->flag_subreg), sample_mask.type),
+  sample_mask);
+  }
+   }
+
/* Update the original instruction. */
inst->opcode = op;
inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/ir: Allow representing additional flag subregisters in the IR.

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: cc0fc8b8ac608b036d260007a689eeeb8e815031
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc0fc8b8ac608b036d260007a689eeeb8e815031

Author: Francisco Jerez 
Date:   Tue Dec 12 12:05:02 2017 -0800

intel/ir: Allow representing additional flag subregisters in the IR.

This allows representing conditional mods and predicates on f1.0-f1.1
at the IR level by adding an extra bit to the flag_subreg
backend_instruction field.

Reviewed-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 

---

 src/intel/compiler/brw_fs.cpp| 12 +++-
 src/intel/compiler/brw_fs_generator.cpp  |  4 ++--
 src/intel/compiler/brw_reg.h |  7 +++
 src/intel/compiler/brw_schedule_instructions.cpp |  2 +-
 src/intel/compiler/brw_shader.h  |  4 ++--
 src/intel/compiler/brw_vec4.cpp  |  7 ---
 src/intel/compiler/brw_vec4_generator.cpp|  2 +-
 7 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0d7988dae4..16b6a06c69 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5488,9 +5488,10 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst, FILE *file)
fs_inst *inst = (fs_inst *)be_inst;
 
if (inst->predicate) {
-  fprintf(file, "(%cf0.%d) ",
- inst->predicate_inverse ? '-' : '+',
- inst->flag_subreg);
+  fprintf(file, "(%cf%d.%d) ",
+  inst->predicate_inverse ? '-' : '+',
+  inst->flag_subreg / 2,
+  inst->flag_subreg % 2);
}
 
fprintf(file, "%s", brw_instruction_name(devinfo, inst->opcode));
@@ -5502,7 +5503,8 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst, FILE *file)
   (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
 inst->opcode != BRW_OPCODE_IF &&
 inst->opcode != BRW_OPCODE_WHILE))) {
- fprintf(file, ".f0.%d", inst->flag_subreg);
+ fprintf(file, ".f%d.%d", inst->flag_subreg / 2,
+ inst->flag_subreg % 2);
   }
}
fprintf(file, "(%d) ", inst->exec_size);
@@ -5888,7 +5890,7 @@ fs_visitor::calculate_register_pressure()
 bool
 fs_visitor::opt_drop_redundant_mov_to_flags()
 {
-   bool flag_mov_found[2] = {false};
+   bool flag_mov_found[4] = {false};
bool progress = false;
 
/* Instructions removed by this pass can only be added if this were true */
diff --git a/src/intel/compiler/brw_fs_generator.cpp 
b/src/intel/compiler/brw_fs_generator.cpp
index a5a821a13b..557b098c20 100644
--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -1508,7 +1508,7 @@ 
fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
 void
 fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst)
 {
-   struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg);
+   struct brw_reg flags = brw_flag_subreg(inst->flag_subreg);
struct brw_reg dispatch_mask;
 
if (devinfo->gen >= 6)
@@ -1764,7 +1764,7 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
   brw_set_default_access_mode(p, BRW_ALIGN_1);
   brw_set_default_predicate_control(p, inst->predicate);
   brw_set_default_predicate_inverse(p, inst->predicate_inverse);
-  brw_set_default_flag_reg(p, 0, inst->flag_subreg);
+  brw_set_default_flag_reg(p, inst->flag_subreg / 2, inst->flag_subreg % 
2);
   brw_set_default_saturate(p, inst->saturate);
   brw_set_default_mask_control(p, inst->force_writemask_all);
   brw_set_default_acc_write_control(p, inst->writes_accumulator);
diff --git a/src/intel/compiler/brw_reg.h b/src/intel/compiler/brw_reg.h
index 17d5b97bf3..c41408104f 100644
--- a/src/intel/compiler/brw_reg.h
+++ b/src/intel/compiler/brw_reg.h
@@ -842,6 +842,13 @@ brw_flag_reg(int reg, int subreg)
   BRW_ARF_FLAG + reg, subreg);
 }
 
+static inline struct brw_reg
+brw_flag_subreg(unsigned subreg)
+{
+   return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
+  BRW_ARF_FLAG + subreg / 2, subreg % 2);
+}
+
 /**
  * Return the mask register present in Gen4-5, or the related register present
  * in Gen7.5 and later hardware referred to as "channel enable" register in
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp 
b/src/intel/compiler/brw_schedule_instructions.cpp
index 692f712532..0e793de4dd 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -974,7 +974,7 @@ fs_instruction_scheduler::calculate_deps()
 */
schedule_node *last_grf_write[grf_count * 16];
schedule_node *last_mrf_write[BRW_MA

Mesa (master): intel/ir: Allow arbitrary scratch flag registers for SHADER_OPCODE_FIND_LIVE_CHANNEL.

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6edb332b44b2570abac8fea2123050ea0f84e1e6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6edb332b44b2570abac8fea2123050ea0f84e1e6

Author: Francisco Jerez 
Date:   Thu Feb 22 12:49:01 2018 -0800

intel/ir: Allow arbitrary scratch flag registers for 
SHADER_OPCODE_FIND_LIVE_CHANNEL.

This shouldn't cause any functional change at this point, it changes
SHADER_OPCODE_FIND_LIVE_CHANNEL to use the flag register specified at
the IR level instead of the hard-coded f1.0, now that it can be
represented in backend_instruction::flag_subreg.  This will be
necessary for scheduling to behave correctly once more things start
making use of f1.0.

Reviewed-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 

---

 src/intel/compiler/brw_eu_emit.c| 5 +++--
 src/intel/compiler/brw_fs.cpp   | 3 ++-
 src/intel/compiler/brw_fs_builder.h | 2 +-
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index a96fe43556..14b1c592b6 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -3399,7 +3399,9 @@ brw_find_live_channel(struct brw_codegen *p, struct 
brw_reg dst,
   */
  inst = brw_FBL(p, vec1(dst), exec_mask);
   } else {
- const struct brw_reg flag = brw_flag_reg(1, 0);
+ const struct brw_reg flag = brw_flag_reg(
+brw_inst_flag_reg_nr(devinfo, p->current),
+brw_inst_flag_subreg_nr(devinfo, p->current));
 
  brw_set_default_exec_size(p, BRW_EXECUTE_1);
  brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
@@ -3418,7 +3420,6 @@ brw_find_live_channel(struct brw_codegen *p, struct 
brw_reg dst,
 brw_inst_set_mask_control(devinfo, inst, BRW_MASK_ENABLE);
 brw_inst_set_group(devinfo, inst, lower_size * i + 8 * 
qtr_control);
 brw_inst_set_cond_modifier(devinfo, inst, BRW_CONDITIONAL_Z);
-brw_inst_set_flag_reg_nr(devinfo, inst, 1);
 brw_inst_set_exec_size(devinfo, inst, cvt(lower_size) - 1);
  }
 
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 16b6a06c69..c255a3b23b 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -931,7 +931,8 @@ fs_inst::flags_written() const
if ((conditional_mod && (opcode != BRW_OPCODE_SEL &&
 opcode != BRW_OPCODE_IF &&
 opcode != BRW_OPCODE_WHILE)) ||
-   opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS) {
+   opcode == FS_OPCODE_MOV_DISPATCH_TO_FLAGS ||
+   opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL) {
   return flag_mask(this);
} else {
   return flag_mask(dst, size_written);
diff --git a/src/intel/compiler/brw_fs_builder.h 
b/src/intel/compiler/brw_fs_builder.h
index 874272b7af..b157e33c39 100644
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -406,7 +406,7 @@ namespace brw {
  const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD);
  const dst_reg dst = vgrf(src.type);
 
- ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index);
+ ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index)->flag_subreg = 
2;
  ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 
0));
 
  return src_reg(component(dst, 0));

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/l3: Don't allocate SLM partition on ICL+.

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 9ec3362e0ba293f20d08493753edeb29d13baadf
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ec3362e0ba293f20d08493753edeb29d13baadf

Author: Francisco Jerez 
Date:   Tue Dec 12 12:05:00 2017 -0800

intel/l3: Don't allocate SLM partition on ICL+.

SLM has a chunk of special-purpose memory separate from L3 on ICL+, we
shouldn't allocate a partition for it on L3 anymore.

Reviewed-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 

---

 src/intel/common/gen_l3_config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/common/gen_l3_config.c b/src/intel/common/gen_l3_config.c
index aff13c06ec..7d58ad8d7c 100644
--- a/src/intel/common/gen_l3_config.c
+++ b/src/intel/common/gen_l3_config.c
@@ -232,7 +232,7 @@ gen_get_default_l3_weights(const struct gen_device_info 
*devinfo,
 {
struct gen_l3_weights w = {{ 0 }};
 
-   w.w[GEN_L3P_SLM] = needs_slm;
+   w.w[GEN_L3P_SLM] = devinfo->gen < 11 && needs_slm;
w.w[GEN_L3P_URB] = 1.0;
 
if (devinfo->gen >= 8) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): Revert "i965/fs: Predicate byte scattered writes if needed"

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 4b4838b1ae46a0ce9fed88f275cc01167302cf24
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4b4838b1ae46a0ce9fed88f275cc01167302cf24

Author: Francisco Jerez 
Date:   Sat Feb 24 16:05:21 2018 -0800

Revert "i965/fs: Predicate byte scattered writes if needed"

This reverts commit a4031bdfa927fb4c3c5d0bdadc70634f3c1a5eac.  It's
redundant with the sample mask predication done at this point by the
common logical send lowering infrastructure, and rather buggy because
it wasn't applying the correct sample mask in shaders using discard,
since the dispatch mask returned by FS_OPCODE_MOV_DISPATCH_TO_FLAGS
doesn't reflect samples discarded by the shader, so it could have led
to data corruption in fragment shader invocations that execute discard
based on a non-dynamically uniform condition.

Reviewed-by: Kenneth Graunke 

---

 src/intel/compiler/brw_fs_nir.cpp | 15 +--
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/src/intel/compiler/brw_fs_nir.cpp 
b/src/intel/compiler/brw_fs_nir.cpp
index 47247875e8..554d61d71a 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4207,25 +4207,12 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
  * to rely on byte scattered in order to write 16-bit elements.
  * The byte_scattered_write message needs that every written 16-bit
  * type to be aligned 32-bits (stride=2).
- * Additionally, while on Untyped Surface messages the
- * bits of the execution mask are ANDed with the corresponding
- * bits of the Pixel/Sample Mask, that is not the case for byte
- * scattered writes. That is needed to avoid ssbo stores writing
- * on helper invocations. So when that can affect, we load the
- * sample mask, and predicate the send message.
  */
-brw_predicate pred = BRW_PREDICATE_NONE;
-
-if (stage == MESA_SHADER_FRAGMENT) {
-   bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
-   pred = BRW_PREDICATE_NORMAL;
-}
-
 emit_byte_scattered_write(bld, surf_index, offset_reg,
   write_src,
   1 /* dims */, 1,
   bit_size,
-  pred);
+  BRW_PREDICATE_NONE);
  } else {
 assert(num_components * type_size <= 16);
 assert((num_components * type_size) % 4 == 0);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/eu: Plumb header present bit to codegen helpers for HDC messages.

2018-03-02 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e7c9adca5726a8c96de20ae7c5f21a30061db392
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7c9adca5726a8c96de20ae7c5f21a30061db392

Author: Francisco Jerez 
Date:   Tue Dec 12 12:05:03 2017 -0800

intel/eu: Plumb header present bit to codegen helpers for HDC messages.

This makes sure that the header-present bit of the message descriptor
is in sync with the IR instruction fields, which gives the optimizer
more control to avoid the overhead of setting up a message header when
it's possible to do so.

Reviewed-by: Jordan Justen 
Reviewed-by: Kenneth Graunke 

---

 src/intel/compiler/brw_eu.h   | 18 --
 src/intel/compiler/brw_eu_emit.c  | 30 ++
 src/intel/compiler/brw_fs_generator.cpp   | 20 ++--
 src/intel/compiler/brw_vec4_generator.cpp | 11 ++-
 4 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index 2d0f56f793..a5f28d8fc6 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -444,7 +444,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
-   bool response_expected);
+   bool response_expected,
+   bool header_present);
 
 void
 brw_untyped_surface_read(struct brw_codegen *p,
@@ -459,7 +460,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
   struct brw_reg payload,
   struct brw_reg surface,
   unsigned msg_length,
-  unsigned num_channels);
+  unsigned num_channels,
+  bool header_present);
 
 void
 brw_typed_atomic(struct brw_codegen *p,
@@ -468,7 +470,8 @@ brw_typed_atomic(struct brw_codegen *p,
  struct brw_reg surface,
  unsigned atomic_op,
  unsigned msg_length,
- bool response_expected);
+ bool response_expected,
+ bool header_present);
 
 void
 brw_typed_surface_read(struct brw_codegen *p,
@@ -476,14 +479,16 @@ brw_typed_surface_read(struct brw_codegen *p,
struct brw_reg payload,
struct brw_reg surface,
unsigned msg_length,
-   unsigned num_channels);
+   unsigned num_channels,
+   bool header_present);
 
 void
 brw_typed_surface_write(struct brw_codegen *p,
 struct brw_reg payload,
 struct brw_reg surface,
 unsigned msg_length,
-unsigned num_channels);
+unsigned num_channels,
+bool header_present);
 
 void
 brw_byte_scattered_read(struct brw_codegen *p,
@@ -498,7 +503,8 @@ brw_byte_scattered_write(struct brw_codegen *p,
  struct brw_reg payload,
  struct brw_reg surface,
  unsigned msg_length,
- unsigned bit_size);
+ unsigned bit_size,
+ bool header_present);
 
 void
 brw_memory_fence(struct brw_codegen *p,
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 14b1c592b6..44abede16b 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -2883,7 +2883,8 @@ brw_untyped_atomic(struct brw_codegen *p,
struct brw_reg surface,
unsigned atomic_op,
unsigned msg_length,
-   bool response_expected)
+   bool response_expected,
+   bool header_present)
 {
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
@@ -2901,7 +2902,7 @@ brw_untyped_atomic(struct brw_codegen *p,
   p, sfid, brw_writemask(dst, mask), payload, surface, msg_length,
   brw_surface_payload_size(p, response_expected,
devinfo->gen >= 8 || devinfo->is_haswell, true),
-  align1);
+  header_present);
 
brw_set_dp_untyped_atomic_message(
   p, insn, atomic_op, response_expected);
@@ -2984,7 +2985,8 @@ brw_untyped_surface_write(struct brw_codegen *p,
   struct brw_reg payload,
   struct brw_reg surface,
   unsigned msg_length,
-  unsigned num_channels)
+  unsigned num_channels,
+  bool header_present)
 {
const struct gen_device_info *devinfo = p->devinfo;
const unsigned sfid = (devinfo->gen >= 8 || devin

Mesa (master): intel/ir: Fix invalid type aliasing with undefined behavior in test_eu_compact.

2018-02-27 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: cb309d27c52e9a6dbddb82a0f6eb75a6f263
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb309d27c52e9a6dbddb82a0f6eb75a6f263

Author: Francisco Jerez 
Date:   Fri Jan 26 11:48:02 2018 -0800

intel/ir: Fix invalid type aliasing with undefined behavior in test_eu_compact.

test_fuzz_compact_instruction() was attempting to modify the uint64_t
data array of a brw_inst through a pointer to uint32_t, which has
undefined behavior.  This was causing the test_eu_compact unit test to
fail mysteriously for me on GCC 7 with some additional
harmless-looking changes I had applied to my tree, which happened to
affect the order instructions are emitted by GCC causing the bit
twiddling to be done after the clear_pad_bits() call which is supposed
to overwrite the same data through a pointer of different type,
leading to data corruption.  A similar failure has been reported by
Vinson Lee on the master branch built with GCC 8.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105052
Tested-by: Vinson Lee 
Reviewed-by: Matt Turner 

---

 src/intel/compiler/test_eu_compact.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/test_eu_compact.cpp 
b/src/intel/compiler/test_eu_compact.cpp
index 1532e3b984..f6924abd36 100644
--- a/src/intel/compiler/test_eu_compact.cpp
+++ b/src/intel/compiler/test_eu_compact.cpp
@@ -149,13 +149,13 @@ test_fuzz_compact_instruction(struct brw_codegen *p, 
brw_inst src)
 
   for (int bit1 = 0; bit1 < 128; bit1++) {
  brw_inst instr = src;
-uint32_t *bits = (uint32_t *)&instr;
+uint64_t *bits = instr.data;
 
  if (skip_bit(p->devinfo, &src, bit1))
continue;
 
-bits[bit0 / 32] ^= (1 << (bit0 & 31));
-bits[bit1 / 32] ^= (1 << (bit1 & 31));
+bits[bit0 / 64] ^= (1ull << (bit0 & 63));
+bits[bit1 / 64] ^= (1ull << (bit1 & 63));
 
  clear_pad_bits(p->devinfo, &instr);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): util/bitset: Make C++ wrapper trivially constructible.

2018-02-27 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 69b4a9d21d00e1f72b52e818cc059ee1642f263e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=69b4a9d21d00e1f72b52e818cc059ee1642f263e

Author: Francisco Jerez 
Date:   Sat Feb 24 18:37:34 2018 -0800

util/bitset: Make C++ wrapper trivially constructible.

In order to fix a build failure on compilers not implementing
unrestricted unions, which is a C++11 feature.

v2: Provide signed integer comparison and assignment operators instead
of BITSET_WORD ones to avoid spurious ambiguity warnings on
comparisons with a signed integer literal.

Fixes: ba79a90fb52e1e81fb "glsl: Switch ast_type_qualifier to a 128-bit bitset."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=105238
Tested-by: Roland Scheidegger 
Tested-By: George Kyriazis 
Reviewed-by: Roland Scheidegger 

---

 src/compiler/glsl/ast.h  |  2 --
 src/compiler/glsl/glsl_parser.yy |  1 -
 src/util/bitset.h| 37 -
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index e5e4b572ff..a1ec0d566f 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -477,8 +477,6 @@ struct ast_type_qualifier {
DECLARE_BITSET_T(bitset_t, 128);
 
union flags {
-  flags() : i(0) {}
-
   struct {
 unsigned invariant:1;
  unsigned precise:1;
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index f1986ed0a8..e5ea41d4df 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -96,7 +96,6 @@ static bool match_layout_qualifier(const char *s1, const char 
*s2,
 %parse-param {struct _mesa_glsl_parse_state *state}
 
 %union {
-   YYSTYPE() {}
int n;
int64_t n64;
float real;
diff --git a/src/util/bitset.h b/src/util/bitset.h
index 7bb5f3c83c..b4c2152023 100644
--- a/src/util/bitset.h
+++ b/src/util/bitset.h
@@ -142,23 +142,6 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp,
  * it as, and N is the number of bits in the bitset.
  */
 #define DECLARE_BITSET_T(T, N) struct T {   \
-  /* XXX - Replace this with an implicitly-defined  \
-   * constructor when support for C++11 defaulted   \
-   * constructors can be assumed (available on GCC 4.4 and  \
-   * later) in order to make the object trivially   \
-   * constructible like a fundamental integer type for  \
-   * convenience.   \
-   */   \
-  T()   \
-  { \
-  } \
-\
-  T(BITSET_WORD x)  \
-  { \
- for (unsigned i = 0; i < BITSET_WORDS(N); i++, x = 0)  \
-words[i] = x;   \
-  } \
-\
   EXPLICIT_CONVERSION   \
   operator bool() const \
   { \
@@ -168,6 +151,13 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp,
  return false;  \
   } \
 \
+  T &   \
+  operator=(int x)  \
+  { \
+ const T c = {{ (BITSET_WORD)x }};  \
+ return *this = c;  \
+  } \
+\
   friend bool   \
   operator==(const T &b, const T &c)\
   { \
@@ -180,6 +170,19 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp,
  return !(b == c);  \
   } \
 \
+  friend bool   \
+  operator==(const T &b, int x) \
+  { \
+ const T c = {{ (BITSET_WORD)x }};

Mesa (master): util/bitset: Add C++ wrapper for static-size bitsets.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: bdbc2ffa4219b39e47a27decbc603d445286d92d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bdbc2ffa4219b39e47a27decbc603d445286d92d

Author: Francisco Jerez 
Date:   Mon Feb 12 14:09:24 2018 -0800

util/bitset: Add C++ wrapper for static-size bitsets.

Reviewed-by: Plamena Manolova 

---

 src/util/bitset.h | 114 ++
 1 file changed, 114 insertions(+)

diff --git a/src/util/bitset.h b/src/util/bitset.h
index 2404ce7f63..7bb5f3c83c 100644
--- a/src/util/bitset.h
+++ b/src/util/bitset.h
@@ -132,4 +132,118 @@ __bitset_next_set(unsigned i, BITSET_WORD *tmp,
for (__tmp = *(__set), __i = 0; \
 (__i = __bitset_next_set(__i, &__tmp, __set, __size)) < __size;)
 
+#ifdef __cplusplus
+
+/**
+ * Simple C++ wrapper of a bitset type of static size, with value semantics
+ * and basic bitwise arithmetic operators.  The operators defined below are
+ * expected to have the same semantics as the same operator applied to other
+ * fundamental integer types.  T is the name of the struct to instantiate
+ * it as, and N is the number of bits in the bitset.
+ */
+#define DECLARE_BITSET_T(T, N) struct T {   \
+  /* XXX - Replace this with an implicitly-defined  \
+   * constructor when support for C++11 defaulted   \
+   * constructors can be assumed (available on GCC 4.4 and  \
+   * later) in order to make the object trivially   \
+   * constructible like a fundamental integer type for  \
+   * convenience.   \
+   */   \
+  T()   \
+  { \
+  } \
+\
+  T(BITSET_WORD x)  \
+  { \
+ for (unsigned i = 0; i < BITSET_WORDS(N); i++, x = 0)  \
+words[i] = x;   \
+  } \
+\
+  EXPLICIT_CONVERSION   \
+  operator bool() const \
+  { \
+ for (unsigned i = 0; i < BITSET_WORDS(N); i++) \
+if (words[i])   \
+   return true; \
+ return false;  \
+  } \
+\
+  friend bool   \
+  operator==(const T &b, const T &c)\
+  { \
+ return BITSET_EQUAL(b.words, c.words); \
+  } \
+\
+  friend bool   \
+  operator!=(const T &b, const T &c)\
+  { \
+ return !(b == c);  \
+  } \
+\
+  friend T  \
+  operator~(const T &b) \
+  { \
+ T c;   \
+ for (unsigned i = 0; i < BITSET_WORDS(N); i++) \
+c.words[i] = ~b.words[i];   \
+ return c;  \
+  } \
+\
+  T &   \
+  operator|=(const T &b)\
+  { \
+ for (unsigned i = 0; i < BITSET_WORDS(N); i++) \
+words[i] |= b.words[i]; \
+ return *this;  \
+  } \
+\
+  friend T

Mesa (master): glsl: Switch ast_type_qualifier to a 128-bit bitset.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: ba79a90fb52e1e81fbfb38113e85a56b13497c50
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ba79a90fb52e1e81fbfb38113e85a56b13497c50

Author: Francisco Jerez 
Date:   Mon Feb 12 14:18:15 2018 -0800

glsl: Switch ast_type_qualifier to a 128-bit bitset.

This should end the drought of bits in the ast_type_qualifier object.
The bitset_t type works pretty much as a drop-in replacement for the
current uint64_t bitset.

The only catch is that the bitset_t type as defined in the previous
commit doesn't have a trivial constructor (because it has a
user-defined constructor), so it cannot be used as union member
without providing a user-defined constructor for the union (which
causes it in turn to be non-trivially constructible).  This annoyance
could be easily addressed in C++11 by declaring the default
constructor of bitset_t to be the implicitly defined one -- IMO one
more reason to drop support for GCC 4.2-4.3.

The other minor change was required because glsl_parser_extras.cpp was
hard-coding the type of bitset temporaries as uint64_t, which (unlike
would have been the case if the uint64_t had been replaced with
e.g. an __int128) would otherwise have caused a build failure, because
the boolean conversion operator of bitset_t is marked explicit (if
C++11 is available), so the bitset won't be silently truncated down to
1 bit in order to use it to initialize the uint64_t temporaries
(yikes).

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/ast.h  | 8 ++--
 src/compiler/glsl/glsl_parser.yy | 1 +
 src/compiler/glsl/glsl_parser_extras.cpp | 4 ++--
 3 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index eee2248281..2a38a4b1f7 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -28,6 +28,7 @@
 #include "list.h"
 #include "glsl_parser_extras.h"
 #include "compiler/glsl_types.h"
+#include "util/bitset.h"
 
 struct _mesa_glsl_parse_state;
 
@@ -473,8 +474,11 @@ enum {
 
 struct ast_type_qualifier {
DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
+   DECLARE_BITSET_T(bitset_t, 128);
+
+   union flags {
+  flags() : i(0) {}
 
-   union {
   struct {
 unsigned invariant:1;
  unsigned precise:1;
@@ -636,7 +640,7 @@ struct ast_type_qualifier {
   q;
 
   /** \brief Set of flags, accessed as a bitmask. */
-  uint64_t i;
+  bitset_t i;
} flags;
 
/** Precision of the type (highp/medium/lowp). */
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index 19147c7a3e..4faf9602a0 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -96,6 +96,7 @@ static bool match_layout_qualifier(const char *s1, const char 
*s2,
 %parse-param {struct _mesa_glsl_parse_state *state}
 
 %union {
+   YYSTYPE() {}
int n;
int64_t n64;
float real;
diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 81d74e92ce..106417c5c3 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -1011,7 +1011,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
"an instance name are not allowed");
}
 
-   uint64_t interface_type_mask;
+   ast_type_qualifier::bitset_t interface_type_mask;
struct ast_type_qualifier temp_type_qualifier;
 
/* Get a bitmask containing only the in/out/uniform/buffer
@@ -1030,7 +1030,7 @@ _mesa_ast_process_interface_block(YYLTYPE *locp,
 * production rule guarantees that only one bit will be set (and
 * it will be in/out/uniform).
 */
-   uint64_t block_interface_qualifier = q.flags.i;
+   ast_type_qualifier::bitset_t block_interface_qualifier = q.flags.i;
 
block->default_layout.flags.i |= block_interface_qualifier;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Expose EXT_shader_framebuffer_fetch(_non_coherent) on desktop and embedded GL.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 51562ea7a0678b8067f438f17a3d5fbe5280a997
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=51562ea7a0678b8067f438f17a3d5fbe5280a997

Author: Francisco Jerez 
Date:   Fri Feb 23 18:35:59 2018 -0800

mesa: Expose EXT_shader_framebuffer_fetch(_non_coherent) on desktop and 
embedded GL.

Reviewed-by: Plamena Manolova 

---

 docs/relnotes/18.1.0.html| 2 ++
 src/mesa/main/extensions_table.h | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index 8dd2550ced..1d5201717f 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -48,6 +48,8 @@ Note: some of the new features are only available with 
certain drivers.
 GL_ARB_bindless_texture on nvc0/maxwell+
 GL_EXT_semaphore on radeonsi
 GL_EXT_semaphore_fd on radeonsi
+GL_EXT_shader_framebuffer_fetch on i965 on desktop GL (GLES was already 
supported)
+GL_EXT_shader_framebuffer_fetch_non_coherent on i965
 Disk shader cache support for i965 enabled by default
 
 
diff --git a/src/mesa/main/extensions_table.h b/src/mesa/main/extensions_table.h
index 6be16c4407..492f7c3d20 100644
--- a/src/mesa/main/extensions_table.h
+++ b/src/mesa/main/extensions_table.h
@@ -252,7 +252,8 @@ EXT(EXT_semaphore   , EXT_semaphore
 EXT(EXT_semaphore_fd, EXT_semaphore_fd 
  , GLL, GLC,  x , ES2, 2017)
 EXT(EXT_separate_shader_objects , dummy_true   
  ,  x ,  x ,  x , ES2, 2013)
 EXT(EXT_separate_specular_color , dummy_true   
  , GLL,  x ,  x ,  x , 1997)
-EXT(EXT_shader_framebuffer_fetch, EXT_shader_framebuffer_fetch 
  ,  x ,  x ,  x , ES2, 2013)
+EXT(EXT_shader_framebuffer_fetch, EXT_shader_framebuffer_fetch 
  , GLL, GLC,  x , ES2, 2013)
+EXT(EXT_shader_framebuffer_fetch_non_coherent, 
EXT_shader_framebuffer_fetch_non_coherent, GLL, GLC,  x, ES2, 2018)
 EXT(EXT_shader_integer_mix  , EXT_shader_integer_mix   
  , GLL, GLC,  x ,  30, 2013)
 EXT(EXT_shader_io_blocks, dummy_true   
  ,  x ,  x ,  x ,  31, 2014)
 EXT(EXT_shader_samples_identical, EXT_shader_samples_identical 
  , GLL, GLC,  x ,  31, 2015)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Replace MESA_shader_framebuffer_fetch extension flags with EXT ones.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6ebefb0fd5065bde02611172928a7cdeb9d32726
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6ebefb0fd5065bde02611172928a7cdeb9d32726

Author: Francisco Jerez 
Date:   Mon Feb 12 14:54:27 2018 -0800

glsl: Replace MESA_shader_framebuffer_fetch extension flags with EXT ones.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/glsl_parser_extras.cpp | 1 +
 src/compiler/glsl/glsl_parser_extras.h   | 9 +++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 106417c5c3..275c4d7571 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -707,6 +707,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT_AEP(EXT_primitive_bounding_box),
EXT(EXT_separate_shader_objects),
EXT(EXT_shader_framebuffer_fetch),
+   EXT(EXT_shader_framebuffer_fetch_non_coherent),
EXT(EXT_shader_integer_mix),
EXT_AEP(EXT_shader_io_blocks),
EXT(EXT_shader_samples_identical),
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index f88cb78347..66bd1a3db6 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -317,8 +317,7 @@ struct _mesa_glsl_parse_state {
bool has_framebuffer_fetch() const
{
   return EXT_shader_framebuffer_fetch_enable ||
- MESA_shader_framebuffer_fetch_enable ||
- MESA_shader_framebuffer_fetch_non_coherent_enable;
+ EXT_shader_framebuffer_fetch_non_coherent_enable;
}
 
bool has_texture_cube_map_array() const
@@ -782,6 +781,8 @@ struct _mesa_glsl_parse_state {
bool EXT_separate_shader_objects_warn;
bool EXT_shader_framebuffer_fetch_enable;
bool EXT_shader_framebuffer_fetch_warn;
+   bool EXT_shader_framebuffer_fetch_non_coherent_enable;
+   bool EXT_shader_framebuffer_fetch_non_coherent_warn;
bool EXT_shader_integer_mix_enable;
bool EXT_shader_integer_mix_warn;
bool EXT_shader_io_blocks_enable;
@@ -800,10 +801,6 @@ struct _mesa_glsl_parse_state {
bool EXT_texture_cube_map_array_warn;
bool INTEL_conservative_rasterization_enable;
bool INTEL_conservative_rasterization_warn;
-   bool MESA_shader_framebuffer_fetch_enable;
-   bool MESA_shader_framebuffer_fetch_warn;
-   bool MESA_shader_framebuffer_fetch_non_coherent_enable;
-   bool MESA_shader_framebuffer_fetch_non_coherent_warn;
bool MESA_shader_integer_functions_enable;
bool MESA_shader_integer_functions_warn;
bool NV_image_formats_enable;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Specify framebuffer fetch coherency mode in lower_blend_equation_advanced().

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 537bb1da98c34eafbed714d468c56fc0af543e49
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=537bb1da98c34eafbed714d468c56fc0af543e49

Author: Francisco Jerez 
Date:   Wed Feb 14 11:53:49 2018 -0800

glsl: Specify framebuffer fetch coherency mode in 
lower_blend_equation_advanced().

This requires passing an extra argument to the lowering pass because
the KHR_blend_equation_advanced specification doesn't seem to define
any mechanism for the implementation to determine at compile-time
whether coherent blending can ever be used (not even an "#extension
KHR_blend_equation_advanced_coherent" directive seems to be required
in the shader source AFAICT).

In the long run we'll probably want to do state-dependent recompiles
based on the value of ctx->Color.BlendCoherent, but right now there
would be no benefit from that because the only driver that supports
coherent framebuffer fetch is i965 on SKL+ hardware, which are unable
to support the non-coherent path for the moment because of texture
layout issues, so framebuffer fetch coherency is always enabled for
them.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/ir_optimization.h | 2 +-
 src/compiler/glsl/lower_blend_equation_advanced.cpp | 3 ++-
 src/mesa/drivers/dri/i965/brw_link.cpp  | 3 ++-
 src/mesa/state_tracker/st_glsl_to_tgsi.cpp  | 3 ++-
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h 
b/src/compiler/glsl/ir_optimization.h
index 2b8c195151..81049a479e 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -166,7 +166,7 @@ bool lower_tess_level(gl_linked_shader *shader);
 
 bool lower_vertex_id(gl_linked_shader *shader);
 bool lower_cs_derived(gl_linked_shader *shader);
-bool lower_blend_equation_advanced(gl_linked_shader *shader);
+bool lower_blend_equation_advanced(gl_linked_shader *shader, bool coherent);
 
 bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state 
*state);
 void propagate_invariance(exec_list *instructions);
diff --git a/src/compiler/glsl/lower_blend_equation_advanced.cpp 
b/src/compiler/glsl/lower_blend_equation_advanced.cpp
index c6db58142c..b05a2e0f0b 100644
--- a/src/compiler/glsl/lower_blend_equation_advanced.cpp
+++ b/src/compiler/glsl/lower_blend_equation_advanced.cpp
@@ -462,7 +462,7 @@ get_main(gl_linked_shader *sh)
 }
 
 bool
-lower_blend_equation_advanced(struct gl_linked_shader *sh)
+lower_blend_equation_advanced(struct gl_linked_shader *sh, bool coherent)
 {
if (sh->Program->sh.fs.BlendSupport == 0)
   return false;
@@ -480,6 +480,7 @@ lower_blend_equation_advanced(struct gl_linked_shader *sh)
fb->data.location = FRAG_RESULT_DATA0;
fb->data.read_only = 1;
fb->data.fb_fetch_output = 1;
+   fb->data.memory_coherent = coherent;
fb->data.how_declared = ir_var_hidden;
 
ir_variable *mode = new(mem_ctx) ir_variable(glsl_type::uint_type,
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp 
b/src/mesa/drivers/dri/i965/brw_link.cpp
index f0598f591a..b08b56a935 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -99,7 +99,8 @@ process_glsl_ir(struct brw_context *brw,
 
ralloc_adopt(mem_ctx, shader->ir);
 
-   lower_blend_equation_advanced(shader);
+   lower_blend_equation_advanced(
+  shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
 
/* lower_packing_builtins() inserts arithmetic instructions, so it
 * must precede lower_instructions().
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index 7fef93949e..ccf4dabcc9 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7056,7 +7056,8 @@ st_link_shader(struct gl_context *ctx, struct 
gl_shader_program *prog)
   do_mat_op_to_vec(ir);
 
   if (stage == MESA_SHADER_FRAGMENT)
- lower_blend_equation_advanced(shader);
+ lower_blend_equation_advanced(
+shader, ctx->Extensions.KHR_blend_equation_advanced_coherent);
 
   lower_instructions(ir,
  MOD_TO_FLOOR |

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glapi: Update XML for last revision of EXT_shader_framebuffer_fetch.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e4124f9bc119ae22e34daea6f44bd3ddec454ec7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e4124f9bc119ae22e34daea6f44bd3ddec454ec7

Author: Francisco Jerez 
Date:   Mon Feb 12 14:46:39 2018 -0800

glapi: Update XML for last revision of EXT_shader_framebuffer_fetch.

Desktop GL is now supported, and there is an additional entry-point
for EXT_shader_framebuffer_fetch_non_coherent.

Reviewed-by: Plamena Manolova 

---

 src/mapi/glapi/gen/es_EXT.xml | 5 -
 src/mapi/glapi/gen/gl_API.xml | 6 ++
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/mapi/glapi/gen/es_EXT.xml b/src/mapi/glapi/gen/es_EXT.xml
index e5104259b6..a53fcd1e8a 100644
--- a/src/mapi/glapi/gen/es_EXT.xml
+++ b/src/mapi/glapi/gen/es_EXT.xml
@@ -842,11 +842,6 @@
 
 
 
-
-
-
-
-
 
 
 
diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index d13a3bfd83..38c1921047 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -12886,6 +12886,12 @@
 
 http://www.w3.org/2001/XInclude"/>
 
+
+
+
+
+
+
 
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Rename MESA_shader_framebuffer_fetch gl_extensions bits to EXT.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6a8ec78c2ab12d75f16e4a2f95e9be014dae021e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a8ec78c2ab12d75f16e4a2f95e9be014dae021e

Author: Francisco Jerez 
Date:   Mon Feb 12 14:31:32 2018 -0800

mesa: Rename MESA_shader_framebuffer_fetch gl_extensions bits to EXT.

The changes I had originally planned for the MESA_shader_framebuffer_fetch
extension have been merged into the EXT spec, there's no point in keeping
MESA_shader_framebuffer_fetch extension enables.

Reviewed-by: Plamena Manolova 

---

 src/mesa/drivers/dri/i965/brw_draw.c | 2 +-
 src/mesa/drivers/dri/i965/brw_program.c  | 2 +-
 src/mesa/drivers/dri/i965/brw_wm.c   | 4 ++--
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 2 +-
 src/mesa/drivers/dri/i965/intel_extensions.c | 4 ++--
 src/mesa/main/barrier.c  | 2 +-
 src/mesa/main/extensions_table.h | 2 +-
 src/mesa/main/get.c  | 2 +-
 src/mesa/main/get_hash_params.py | 7 +++
 src/mesa/main/mtypes.h   | 4 ++--
 10 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_draw.c 
b/src/mesa/drivers/dri/i965/brw_draw.c
index 50cf8b12c7..299e7f929e 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -513,7 +513,7 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw,
}
 
/* Resolve color buffers for non-coherent framebuffer fetch. */
-   if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
+   if (!ctx->Extensions.EXT_shader_framebuffer_fetch &&
ctx->FragmentProgram._Current &&
ctx->FragmentProgram._Current->info.outputs_read) {
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 684890e8ba..527f003977 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -318,7 +318,7 @@ brw_framebuffer_fetch_barrier(struct gl_context *ctx)
struct brw_context *brw = brw_context(ctx);
const struct gen_device_info *devinfo = &brw->screen->devinfo;
 
-   if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
+   if (!ctx->Extensions.EXT_shader_framebuffer_fetch) {
   if (devinfo->gen >= 6) {
  brw_emit_pipe_control_flush(brw,
  PIPE_CONTROL_RENDER_TARGET_FLUSH |
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index cfc2d47a67..68d4ab88d7 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -573,7 +573,7 @@ brw_wm_populate_key(struct brw_context *brw, struct 
brw_wm_prog_key *key)
key->program_string_id = fp->id;
 
/* Whether reads from the framebuffer should behave coherently. */
-   key->coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
+   key->coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
 }
 
 void
@@ -645,7 +645,7 @@ brw_fs_precompile(struct gl_context *ctx, struct gl_program 
*prog)
key.program_string_id = bfp->id;
 
/* Whether reads from the framebuffer should behave coherently. */
-   key.coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
+   key.coherent_fb_fetch = ctx->Extensions.EXT_shader_framebuffer_fetch;
 
uint32_t old_prog_offset = brw->wm.base.prog_offset;
struct brw_stage_prog_data *old_prog_data = brw->wm.base.prog_data;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 32d9e2c70f..23bf5a266c 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1032,7 +1032,7 @@ update_renderbuffer_read_surfaces(struct brw_context *brw)
   brw_wm_prog_data(brw->wm.base.prog_data);
 
if (wm_prog_data->has_render_target_reads &&
-   !ctx->Extensions.MESA_shader_framebuffer_fetch) {
+   !ctx->Extensions.EXT_shader_framebuffer_fetch) {
   /* _NEW_BUFFERS */
   const struct gl_framebuffer *fb = ctx->DrawBuffer;
 
diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 5a6b12e52a..127371c5b8 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -153,7 +153,7 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 
130;
 
if (devinfo->is_g4x || devinfo->gen >= 5) {
-  ctx->Extensions.MESA_shader_framebuffer_fetch_non_coherent = true;
+  ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent = true;
   ctx->Extensions.KHR_blend_equation_advanced =

Mesa (master): glsl: Add support for the framebuffer fetch layout(noncoherent) qualifier.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: ef9e3f63ca369e3549b4f17b39934dc4b3cbbb05
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef9e3f63ca369e3549b4f17b39934dc4b3cbbb05

Author: Francisco Jerez 
Date:   Mon Feb 12 15:54:33 2018 -0800

glsl: Add support for the framebuffer fetch layout(noncoherent) qualifier.

This allows the application to request framebuffer fetch coherency
with per-fragment output granularity.  Coherent framebuffer fetch
outputs (which is the default if no qualifier is present for
compatibility with older versions of the EXT_shader_framebuffer_fetch
extension) will have ir_variable_data::memory_coherent set to true.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/ast.h |  5 
 src/compiler/glsl/ast_to_hir.cpp| 45 +
 src/compiler/glsl/ast_type.cpp  |  6 +++--
 src/compiler/glsl/builtin_variables.cpp |  1 +
 src/compiler/glsl/glsl_parser.yy|  6 +
 5 files changed, 61 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index 2a38a4b1f7..e5e4b572ff 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -635,6 +635,11 @@ struct ast_type_qualifier {
  unsigned bound_sampler:1;
  unsigned bound_image:1;
  /** \} */
+
+ /** \name Layout qualifiers for 
GL_EXT_shader_framebuffer_fetch_non_coherent */
+ /** \{ */
+ unsigned non_coherent:1;
+ /** \} */
   }
   /** \brief Set of flags, accessed by name. */
   q;
diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 966450ca78..5acbaa321a 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -2008,6 +2008,20 @@ ast_expression::do_hir(exec_list *instructions,
 _mesa_glsl_warning(&loc, state, "`%s' used uninitialized",
this->primary_expression.identifier);
  }
+
+ /* From the EXT_shader_framebuffer_fetch spec:
+  *
+  *   "Unless the GL_EXT_shader_framebuffer_fetch extension has been
+  *enabled in addition, it's an error to use gl_LastFragData if it
+  *hasn't been explicitly redeclared with layout(noncoherent)."
+  */
+ if (var->data.fb_fetch_output && var->data.memory_coherent &&
+ !state->EXT_shader_framebuffer_fetch_enable) {
+_mesa_glsl_error(&loc, state,
+ "invalid use of framebuffer fetch output not "
+ "qualified with layout(noncoherent)");
+ }
+
   } else {
  _mesa_glsl_error(& loc, state, "`%s' undeclared",
   this->primary_expression.identifier);
@@ -4002,6 +4016,33 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
  var->data.fb_fetch_output = (strcmp(var->name, "gl_LastFragData") == 
0);
}
 
+   if (var->data.fb_fetch_output) {
+  var->data.memory_coherent = !qual->flags.q.non_coherent;
+
+  /* From the EXT_shader_framebuffer_fetch spec:
+   *
+   *   "It is an error to declare an inout fragment output not qualified
+   *with layout(noncoherent) if the GL_EXT_shader_framebuffer_fetch
+   *extension hasn't been enabled."
+   */
+  if (var->data.memory_coherent &&
+  !state->EXT_shader_framebuffer_fetch_enable)
+ _mesa_glsl_error(loc, state,
+  "invalid declaration of framebuffer fetch output not 
"
+  "qualified with layout(noncoherent)");
+
+   } else {
+  /* From the EXT_shader_framebuffer_fetch spec:
+   *
+   *   "Fragment outputs declared inout may specify the following layout
+   *qualifier: [...] noncoherent"
+   */
+  if (qual->flags.q.non_coherent)
+ _mesa_glsl_error(loc, state,
+  "invalid layout(noncoherent) qualifier not part of "
+  "framebuffer fetch output declaration");
+   }
+
if (!is_parameter && is_varying_var(var, state->stage)) {
   /* User-defined ins/outs are not permitted in compute shaders. */
   if (state->stage == MESA_SHADER_COMPUTE) {
@@ -4268,8 +4309,12 @@ get_variable_being_redeclared(ir_variable **var_ptr, 
YYLTYPE loc,
*   "By default, gl_LastFragData is declared with the mediump precision
*qualifier. This can be changed by redeclaring the corresponding
*variables with the desired precision qualifier."
+   *
+   *   "Fragment shaders may specify the following layout qualifier only 
for
+   *redeclaring the built-in gl_LastFragData array

Mesa (master): glsl: Silence warnings when reading from a framebuffer fetch output.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: c6c64d4d6a134231cbdbe09e3c6c87adb811ac7d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6c64d4d6a134231cbdbe09e3c6c87adb811ac7d

Author: Francisco Jerez 
Date:   Mon Feb 12 15:55:13 2018 -0800

glsl: Silence warnings when reading from a framebuffer fetch output.

Framebuffer fetch outputs are implicitly initialized upon entry to the
fragment shader.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/ast_to_hir.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 5acbaa321a..badfbe6816 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -4017,6 +4017,7 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
}
 
if (var->data.fb_fetch_output) {
+  var->data.assigned = true;
   var->data.memory_coherent = !qual->flags.q.non_coherent;
 
   /* From the EXT_shader_framebuffer_fetch spec:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): util: Add EXPLICIT_CONVERSION macro.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 8d1f1ce4124c1e0dbfc5f3d0578fbee6e24140c8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8d1f1ce4124c1e0dbfc5f3d0578fbee6e24140c8

Author: Francisco Jerez 
Date:   Mon Feb 12 16:32:20 2018 -0800

util: Add EXPLICIT_CONVERSION macro.

This can be used to specify that a C++ conversion operator is not
meant to be used for implicit conversions, which can lead to
unintended loss of information in some cases.  Implemented as a macro
in order to keep old GCC versions happy.

Reviewed-by: Plamena Manolova 

---

 src/util/macros.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/util/macros.h b/src/util/macros.h
index e3c785af50..6d3df90408 100644
--- a/src/util/macros.h
+++ b/src/util/macros.h
@@ -285,4 +285,14 @@ do {   \
 #define MIN3( A, B, C ) ((A) < (B) ? MIN2(A, C) : MIN2(B, C))
 #define MAX3( A, B, C ) ((A) > (B) ? MAX2(A, C) : MAX2(B, C))
 
+/**
+ * Macro for declaring an explicit conversion operator.  Defaults to an
+ * implicit conversion if C++11 is not supported.
+ */
+#if __cplusplus >= 201103L
+#define EXPLICIT_CONVERSION explicit
+#elif defined(__cplusplus)
+#define EXPLICIT_CONVERSION
+#endif
+
 #endif /* UTIL_MACROS_H */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: Fix KHR_blend_equation_advanced with some render targets.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 27c829da28ab3cfac0195d02ffb13afa8fe0e23d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=27c829da28ab3cfac0195d02ffb13afa8fe0e23d

Author: Francisco Jerez 
Date:   Tue Feb 13 14:16:03 2018 -0800

i965: Fix KHR_blend_equation_advanced with some render targets.

This reverts two bogus and seemingly useless changes from the commits
referenced below, which broke KHR_blend_equation_advanced (and
EXT_shader_framebuffer_fetch_non_coherent which wasn't exposed yet)
for any kind of render target surface that would cause the
get_isl_surf() call in brw_emit_surface_state() to do anything useful
(notice how the result of get_isl_surf() is completely ignored by the
caller right now), as was the case while using those extensions with
1D array or 3D framebuffers in particular.

Fixes: f5859b45b1686e8116380d87 "i965/miptree: Switch remaining surfaces to isl"
Fixes: bf24c3539e4b6989512968ca "i965/miptree: Clean-up unused"
Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Plamena Manolova 

---

 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 0b6016427b..32d9e2c70f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -89,6 +89,8 @@ get_isl_surf(struct brw_context *brw, struct 
intel_mipmap_tree *mt,
const enum isl_dim_layout dim_layout =
   get_isl_dim_layout(devinfo, mt->surf.tiling, target);
 
+   surf->dim = get_isl_surf_dim(target);
+
if (surf->dim_layout == dim_layout)
   return;
 
@@ -184,7 +186,7 @@ brw_emit_surface_state(struct brw_context *brw,
  brw->isl_dev.ss.align,
  surf_offset);
 
-   isl_surf_fill_state(&brw->isl_dev, state, .surf = &mt->surf, .view = &view,
+   isl_surf_fill_state(&brw->isl_dev, state, .surf = &surf, .view = &view,
.address = brw_state_reloc(&brw->batch,
   *surf_offset + 
brw->isl_dev.ss.addr_offset,
   mt->bo, offset, reloc_flags),

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Rename dd_function_table::BlendBarrier to match latest EXT spec.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: d0bef79f12aca8d3db323cc49881100be16905fb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d0bef79f12aca8d3db323cc49881100be16905fb

Author: Francisco Jerez 
Date:   Mon Feb 12 14:23:25 2018 -0800

mesa: Rename dd_function_table::BlendBarrier to match latest EXT spec.

This GL entry point was renamed to glFramebufferFetchBarrier() in the
EXT extension on request from Khronos members.  Update the Mesa
codebase to match the latest spec.

Reviewed-by: Plamena Manolova 

---

 src/mesa/drivers/dri/i965/brw_program.c   | 4 ++--
 src/mesa/main/barrier.c   | 2 +-
 src/mesa/main/dd.h| 6 +++---
 src/mesa/state_tracker/st_cb_texturebarrier.c | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index a513499516..684890e8ba 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -313,7 +313,7 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield 
barriers)
 }
 
 static void
-brw_blend_barrier(struct gl_context *ctx)
+brw_framebuffer_fetch_barrier(struct gl_context *ctx)
 {
struct brw_context *brw = brw_context(ctx);
const struct gen_device_info *devinfo = &brw->screen->devinfo;
@@ -443,7 +443,7 @@ void brwInitFragProgFuncs( struct dd_function_table 
*functions )
functions->LinkShader = brw_link_shader;
 
functions->MemoryBarrier = brw_memory_barrier;
-   functions->BlendBarrier = brw_blend_barrier;
+   functions->FramebufferFetchBarrier = brw_framebuffer_fetch_barrier;
 }
 
 struct shader_times {
diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c
index 5284f28dc0..2c8194e6eb 100644
--- a/src/mesa/main/barrier.c
+++ b/src/mesa/main/barrier.c
@@ -134,5 +134,5 @@ _mesa_BlendBarrier(void)
   return;
}
 
-   ctx->Driver.BlendBarrier(ctx);
+   ctx->Driver.FramebufferFetchBarrier(ctx);
 }
diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
index 7a39f939c9..3e6a0418a2 100644
--- a/src/mesa/main/dd.h
+++ b/src/mesa/main/dd.h
@@ -963,15 +963,15 @@ struct dd_function_table {
/** @} */
 
/**
-* GL_MESA_shader_framebuffer_fetch_non_coherent rendering barrier.
+* GL_EXT_shader_framebuffer_fetch_non_coherent rendering barrier.
 *
 * On return from this function any framebuffer contents written by
 * previous draw commands are guaranteed to be visible from subsequent
 * fragment shader invocations using the
-* MESA_shader_framebuffer_fetch_non_coherent interface.
+* EXT_shader_framebuffer_fetch_non_coherent interface.
 */
/** @{ */
-   void (*BlendBarrier)(struct gl_context *ctx);
+   void (*FramebufferFetchBarrier)(struct gl_context *ctx);
/** @} */
 
/**
diff --git a/src/mesa/state_tracker/st_cb_texturebarrier.c 
b/src/mesa/state_tracker/st_cb_texturebarrier.c
index 29cd37c16c..2bff03b484 100644
--- a/src/mesa/state_tracker/st_cb_texturebarrier.c
+++ b/src/mesa/state_tracker/st_cb_texturebarrier.c
@@ -55,10 +55,10 @@ st_TextureBarrier(struct gl_context *ctx)
 
 
 /**
- * Called via ctx->Driver.BlendBarrier()
+ * Called via ctx->Driver.FramebufferFetchBarrier()
  */
 static void
-st_BlendBarrier(struct gl_context *ctx)
+st_FramebufferFetchBarrier(struct gl_context *ctx)
 {
struct pipe_context *pipe = st_context(ctx)->pipe;
 
@@ -130,6 +130,6 @@ st_MemoryBarrier(struct gl_context *ctx, GLbitfield 
barriers)
 void st_init_texture_barrier_functions(struct dd_function_table *functions)
 {
functions->TextureBarrier = st_TextureBarrier;
-   functions->BlendBarrier = st_BlendBarrier;
+   functions->FramebufferFetchBarrier = st_FramebufferFetchBarrier;
functions->MemoryBarrier = st_MemoryBarrier;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Initialize ir_variable_data::fb_fetch_output earlier for GL(ES) 2.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 1bc01db95fb5162f01a2c4a9b2473dd7a5eddcd8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1bc01db95fb5162f01a2c4a9b2473dd7a5eddcd8

Author: Francisco Jerez 
Date:   Mon Feb 12 15:24:39 2018 -0800

glsl: Initialize ir_variable_data::fb_fetch_output earlier for GL(ES) 2.

At the same point where it is initialized on GL(ES) 3.0+ so we can
implement some common layout qualifier handling in a future commit.
Until now the fb_fetch_output flag would be inherited from the
original implicit gl_LastFragData declaration at a later point in the
AST to GLSL IR translation.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/ast_to_hir.cpp | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index 41e74815f3..966450ca78 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3994,8 +3994,13 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
else if (qual->flags.q.shared_storage)
   var->data.mode = ir_var_shader_shared;
 
-   var->data.fb_fetch_output = state->stage == MESA_SHADER_FRAGMENT &&
-   qual->flags.q.in && qual->flags.q.out;
+   if (!is_parameter && state->has_framebuffer_fetch() &&
+   state->stage == MESA_SHADER_FRAGMENT) {
+  if (state->is_version(130, 300))
+ var->data.fb_fetch_output = qual->flags.q.in && qual->flags.q.out;
+  else
+ var->data.fb_fetch_output = (strcmp(var->name, "gl_LastFragData") == 
0);
+   }
 
if (!is_parameter && is_varying_var(var, state->stage)) {
   /* User-defined ins/outs are not permitted in compute shaders. */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Allow layout token for EXT_shader_framebuffer_fetch_non_coherent.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 0aeec504b484cb37b856dd574974d739f35e968b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0aeec504b484cb37b856dd574974d739f35e968b

Author: Francisco Jerez 
Date:   Mon Feb 12 15:26:45 2018 -0800

glsl: Allow layout token for EXT_shader_framebuffer_fetch_non_coherent.

EXT_shader_framebuffer_fetch_non_coherent requires layout qualifiers
even on GL(ES) 2.

Reviewed-by: Plamena Manolova 

---

 src/compiler/glsl/glsl_lexer.ll | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll
index ed7a80a2bb..b7cf10018d 100644
--- a/src/compiler/glsl/glsl_lexer.ll
+++ b/src/compiler/glsl/glsl_lexer.ll
@@ -502,7 +502,8 @@ layout  {
  || yyextra->ARB_fragment_coord_conventions_enable
   || yyextra->ARB_shading_language_420pack_enable
   || yyextra->ARB_compute_shader_enable
-  || yyextra->ARB_tessellation_shader_enable) {
+  || yyextra->ARB_tessellation_shader_enable
+  || 
yyextra->EXT_shader_framebuffer_fetch_non_coherent_enable) {
  return LAYOUT_TOK;
   } else {
  return classify_identifier(yyextra, yytext, yyleng, 
yylval);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Implement glFramebufferFetchBarrierEXT entry point.

2018-02-25 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 378e918e2891b2712b64c4ad1ef92bfc539a13e7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=378e918e2891b2712b64c4ad1ef92bfc539a13e7

Author: Francisco Jerez 
Date:   Mon Feb 12 14:48:20 2018 -0800

mesa: Implement glFramebufferFetchBarrierEXT entry point.

Reviewed-by: Plamena Manolova 

---

 src/mesa/main/barrier.c | 17 +++--
 src/mesa/main/barrier.h |  3 +++
 src/mesa/main/tests/dispatch_sanity.cpp |  6 ++
 3 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c
index 0f0b0a210d..2be30220e4 100644
--- a/src/mesa/main/barrier.c
+++ b/src/mesa/main/barrier.c
@@ -127,8 +127,7 @@ _mesa_BlendBarrier(void)
 {
GET_CURRENT_CONTEXT(ctx);
 
-   if (!ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent &&
-   !ctx->Extensions.KHR_blend_equation_advanced) {
+   if (!ctx->Extensions.KHR_blend_equation_advanced) {
   _mesa_error(ctx, GL_INVALID_OPERATION,
   "glBlendBarrier(not supported)");
   return;
@@ -136,3 +135,17 @@ _mesa_BlendBarrier(void)
 
ctx->Driver.FramebufferFetchBarrier(ctx);
 }
+
+void GLAPIENTRY
+_mesa_FramebufferFetchBarrierEXT(void)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (!ctx->Extensions.EXT_shader_framebuffer_fetch_non_coherent) {
+  _mesa_error(ctx, GL_INVALID_OPERATION,
+  "glFramebufferFetchBarrierEXT(not supported)");
+  return;
+   }
+
+   ctx->Driver.FramebufferFetchBarrier(ctx);
+}
diff --git a/src/mesa/main/barrier.h b/src/mesa/main/barrier.h
index 53ecf863f0..acc15c6779 100644
--- a/src/mesa/main/barrier.h
+++ b/src/mesa/main/barrier.h
@@ -53,4 +53,7 @@ _mesa_MemoryBarrierByRegion(GLbitfield barriers);
 void GLAPIENTRY
 _mesa_BlendBarrier(void);
 
+void GLAPIENTRY
+_mesa_FramebufferFetchBarrierEXT(void);
+
 #endif /* BARRIER_H */
diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
b/src/mesa/main/tests/dispatch_sanity.cpp
index d697343627..83a4b04654 100644
--- a/src/mesa/main/tests/dispatch_sanity.cpp
+++ b/src/mesa/main/tests/dispatch_sanity.cpp
@@ -1023,6 +1023,9 @@ const struct function common_desktop_functions_possible[] 
= {
/* GL_ARB_gl_spirv */
{ "glSpecializeShaderARB", 45, -1 },
 
+   /* GL_EXT_shader_framebuffer_fetch_non_coherent */
+   { "glFramebufferFetchBarrierEXT", 20, -1 },
+
{ NULL, 0, -1 }
 };
 
@@ -2446,6 +2449,9 @@ const struct function gles2_functions_possible[] = {
{ "glGetQueryObjectui64vEXT", 20, -1 },
{ "glQueryCounterEXT", 20, -1 },
 
+   /* GL_EXT_shader_framebuffer_fetch_non_coherent */
+   { "glFramebufferFetchBarrierEXT", 20, -1 },
+
{ NULL, 0, -1 }
 };
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Optimize and simplify the copy propagation dataflow logic.

2018-01-18 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 11674dad8acef294bc920e7f02ef45185420fbce
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=11674dad8acef294bc920e7f02ef45185420fbce

Author: Francisco Jerez 
Date:   Mon Dec 18 15:22:04 2017 -0800

intel/fs: Optimize and simplify the copy propagation dataflow logic.

Previously the dataflow propagation algorithm would calculate the ACP
live-in and -out sets in a two-pass fixed-point algorithm.  The first
pass would update the live-out sets of all basic blocks of the program
based on their live-in sets, while the second pass would update the
live-in sets based on the live-out sets.  This is incredibly
inefficient in the typical case where the CFG of the program is
approximately acyclic, because it can take up to 2*n passes for an ACP
entry introduced at the top of the program to reach the bottom (where
n is the number of basic blocks in the program), until which point the
algorithm won't be able to reach a fixed point.

The same effect can be achieved in a single pass by computing the
live-in and -out sets in lock-step, because that makes sure that
processing of any basic block will pick up the updated live-out sets
of the lexically preceding blocks.  This gives the dataflow
propagation algorithm effectively O(n) run-time instead of O(n^2) in
the acyclic case.

The time spent in dataflow propagation is reduced by 30x in the
GLES31.functional.ssbo.layout.random.all_shared_buffer.5 dEQP
test-case on my CHV system (the improvement is likely to be of the
same order of magnitude on other platforms).  This more than reverses
an apparent run-time regression in this test-case from my previous
copy-propagation undefined-value handling patch, which was ultimately
caused by the additional work introduced in that commit to account for
undefined values being multiplied by a huge quadratic factor.

According to Chad this test was failing on CHV due to a 30s time-out
imposed by the Android CTS (this was the case regardless of my
undefined-value handling patch, even though my patch substantially
exacerbated the issue).  On my CHV system this patch reduces the
overall run-time of the test by approximately 12x, getting us to
around 13s, well below the time-out.

v2: Initialize live-out set to the universal set to avoid rather
pessimistic dataflow estimation in shaders with cycles (Addresses
performance regression reported by Eero in GpuTest Piano).
Performance numbers given above still apply.  No shader-db changes
with respect to master.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104271
Reported-by: Chad Versace 
Reviewed-by: Ian Romanick 

---

 src/intel/compiler/brw_fs_copy_propagation.cpp | 35 --
 1 file changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index af5635eace..92cc0a8de5 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -186,8 +186,7 @@ fs_copy_prop_dataflow::setup_initial_values()
 
/* Populate the initial values for the livein and liveout sets.  For the
 * block at the start of the program, livein = 0 and liveout = copy.
-* For the others, set liveout to 0 (the empty set) and livein to ~0
-* (the universal set).
+* For the others, set liveout and livein to ~0 (the universal set).
 */
foreach_block (block, cfg) {
   if (block->parents.is_empty()) {
@@ -197,7 +196,7 @@ fs_copy_prop_dataflow::setup_initial_values()
  }
   } else {
  for (int i = 0; i < bitset_words; i++) {
-bd[block->num].liveout[i] = 0u;
+bd[block->num].liveout[i] = ~0u;
 bd[block->num].livein[i] = ~0u;
  }
   }
@@ -228,34 +227,17 @@ fs_copy_prop_dataflow::run()
do {
   progress = false;
 
-  /* Update liveout for all blocks. */
   foreach_block (block, cfg) {
  if (block->parents.is_empty())
 continue;
 
  for (int i = 0; i < bitset_words; i++) {
 const BITSET_WORD old_liveout = bd[block->num].liveout[i];
-
-bd[block->num].liveout[i] =
-   bd[block->num].copy[i] | (bd[block->num].livein[i] &
- ~bd[block->num].kill[i]);
-
-if (old_liveout != bd[block->num].liveout[i])
-   progress = true;
- }
-  }
-
-  /* Update livein for all blocks.  If a copy is live out of all parent
-   * blocks, it's live coming in to this block.
-   */
-  foreach_block (block, cfg) {
- if (block->parents.is_empty())
-continue;
-
- for (int i = 0; i < bitset_words; i++) {
-const BITSET_WORD old_livein = bd[block->num].livein[i];
 BITSET_WORD livein_from_any_block = 0;
 
+/* Update livein for this block.  If a co

Mesa (master): i965/gen6-7/sol: Keep independent counters for the current and previous begin/end block.

2018-01-16 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: f476b3f6e7b9f61c5bd93cf463005fd88aacaeba
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f476b3f6e7b9f61c5bd93cf463005fd88aacaeba

Author: Francisco Jerez 
Date:   Thu Nov 16 14:27:41 2017 -0800

i965/gen6-7/sol: Keep independent counters for the current and previous 
begin/end block.

This allows us to aggregate the primitive counts of a completed
transform feedback begin/end block lazily, which in the most typical
case (where glDrawTransformFeedback is not used) will allow us to
avoid aggregating the primitive counters on the CPU altogether,
preventing a stall on previous rendering during
glBeginTransformFeedback(), which dramatically improves performance of
applications that rely heavily on transform feedback.

Improves performance of SynMark2 OglGSCloth by 65.52% ±0.25% (data
gathered on VLV).

Tested-By: Eero Tamminen 
Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_context.h|  9 ---
 src/mesa/drivers/dri/i965/gen6_sol.c   | 39 +-
 src/mesa/drivers/dri/i965/gen7_sol_state.c | 15 ++--
 3 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 950ede05fc..8d8ab71093 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -579,6 +579,12 @@ struct brw_transform_feedback_object {
struct brw_transform_feedback_counter counter;
 
/**
+* Count of primitives generated during the previous transform feedback
+* operation.  Used to implement DrawTransformFeedback().
+*/
+   struct brw_transform_feedback_counter previous_counter;
+
+   /**
 * Number of vertices written between last Begin/EndTransformFeedback().
 *
 * Used to implement DrawTransformFeedback().
@@ -1519,9 +1525,6 @@ brw_resume_transform_feedback(struct gl_context *ctx,
 void
 brw_save_primitives_written_counters(struct brw_context *brw,
  struct brw_transform_feedback_object 
*obj);
-void
-brw_compute_xfb_vertices_written(struct brw_context *brw,
- struct brw_transform_feedback_object *obj);
 GLsizei
 brw_get_transform_feedback_vertex_count(struct gl_context *ctx,
 struct gl_transform_feedback_object 
*obj,
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
b/src/mesa/drivers/dri/i965/gen6_sol.c
index a909339e16..b1baf01bcd 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -289,6 +289,8 @@ brw_save_primitives_written_counters(struct brw_context 
*brw,
/* Check if there's enough space for a new pair of four values. */
if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) {
   aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
+   &obj->previous_counter);
+  aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
&obj->counter);
}
 
@@ -316,6 +318,7 @@ brw_save_primitives_written_counters(struct brw_context 
*brw,
 static void
 compute_vertices_written_so_far(struct brw_context *brw,
 struct brw_transform_feedback_object *obj,
+struct brw_transform_feedback_counter *counter,
 uint64_t *vertices_written)
 {
const struct gl_context *ctx = &brw->ctx;
@@ -336,25 +339,26 @@ compute_vertices_written_so_far(struct brw_context *brw,
}
 
/* Get the number of primitives generated. */
-   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, 
&obj->counter);
+   aggregate_transform_feedback_counter(brw, obj->prim_count_bo, counter);
 
for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) {
-  vertices_written[i] = vertices_per_prim * obj->counter.accum[i];
+  vertices_written[i] = vertices_per_prim * counter->accum[i];
}
 }
 
 /**
- * Compute the number of vertices written by this transform feedback operation.
+ * Compute the number of vertices written by the last transform feedback
+ * begin/end block.
  */
-void
-brw_compute_xfb_vertices_written(struct brw_context *brw,
- struct brw_transform_feedback_object *obj)
+static void
+compute_xfb_vertices_written(struct brw_context *brw,
+ struct brw_transform_feedback_object *obj)
 {
if (obj->vertices_written_valid || !obj->base.EndedAnytime)
   return;
 
-   compute_vertices_written_so_far(brw, obj, obj->vertices_written);
-
+   compute_vertices_written_so_far(brw, obj, &obj->previous_counter,
+   obj->vertices_written);
obj->vertices_written_valid = true;
 }
 
@@ -376,7 +380,7 @@ brw_get_transform_feedback_vertex_

Mesa (master): i965/gen6-7/sol: Restructure primitive counter into a separate type.

2018-01-16 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: b0c8d61281d5e09cd216e1ff3f2c441f7c550a47
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b0c8d61281d5e09cd216e1ff3f2c441f7c550a47

Author: Francisco Jerez 
Date:   Fri Nov 17 14:06:04 2017 -0800

i965/gen6-7/sol: Restructure primitive counter into a separate type.

A primitive counter encapsulates a scalar aggregating counter for each
vertex stream along with a section within the primitive tally buffer
which hasn't been read out yet.  Defining this as a separate type will
allow us to keep multiple counter objects around for the same
transform feedback object without any code duplication.

Tested-By: Eero Tamminen 
Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_context.h| 38 ++---
 src/mesa/drivers/dri/i965/gen6_sol.c   | 53 ++
 src/mesa/drivers/dri/i965/gen7_sol_state.c |  6 +---
 3 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0f0aad8534..950ede05fc 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -523,6 +523,36 @@ struct intel_batchbuffer {
 
 #define BRW_MAX_XFB_STREAMS 4
 
+struct brw_transform_feedback_counter {
+   /**
+* Index of the first entry of this counter within the primitive count BO.
+* An entry is considered to be an N-tuple of 64bit values, where N is the
+* number of vertex streams supported by the platform.
+*/
+   unsigned bo_start;
+
+   /**
+* Index one past the last entry of this counter within the primitive
+* count BO.
+*/
+   unsigned bo_end;
+
+   /**
+* Primitive count values accumulated while this counter was active,
+* excluding any entries buffered between \c bo_start and \c bo_end, which
+* haven't been accounted for yet.
+*/
+   uint64_t accum[BRW_MAX_XFB_STREAMS];
+};
+
+static inline void
+brw_reset_transform_feedback_counter(
+   struct brw_transform_feedback_counter *counter)
+{
+   counter->bo_start = counter->bo_end;
+   memset(&counter->accum, 0, sizeof(counter->accum));
+}
+
 struct brw_transform_feedback_object {
struct gl_transform_feedback_object base;
 
@@ -541,14 +571,12 @@ struct brw_transform_feedback_object {
 */
unsigned max_index;
 
+   struct brw_bo *prim_count_bo;
+
/**
 * Count of primitives generated during this transform feedback operation.
-*  @{
 */
-   uint64_t prims_generated[BRW_MAX_XFB_STREAMS];
-   struct brw_bo *prim_count_bo;
-   unsigned prim_count_buffer_index; /**< in number of uint64_t units */
-   /** @} */
+   struct brw_transform_feedback_counter counter;
 
/**
 * Number of vertices written between last Begin/EndTransformFeedback().
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
b/src/mesa/drivers/dri/i965/gen6_sol.c
index 7a510940c8..a909339e16 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -233,37 +233,36 @@ brw_delete_transform_feedback(struct gl_context *ctx,
  * Note that we expose one stream pre-Gen7, so the above is just (start, end).
  */
 static void
-tally_prims_generated(struct brw_context *brw,
-  struct brw_transform_feedback_object *obj)
+aggregate_transform_feedback_counter(
+   struct brw_context *brw,
+   struct brw_bo *bo,
+   struct brw_transform_feedback_counter *counter)
 {
-   const struct gl_context *ctx = &brw->ctx;
-   const int streams = ctx->Const.MaxVertexStreams;
+   const unsigned streams = brw->ctx.Const.MaxVertexStreams;
 
/* If the current batch is still contributing to the number of primitives
 * generated, flush it now so the results will be present when mapped.
 */
-   if (brw_batch_references(&brw->batch, obj->prim_count_bo))
+   if (brw_batch_references(&brw->batch, bo))
   intel_batchbuffer_flush(brw);
 
-   if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo)))
+   if (unlikely(brw->perf_debug && brw_bo_busy(bo)))
   perf_debug("Stalling for # of transform feedback primitives written.\n");
 
-   uint64_t *prim_counts = brw_bo_map(brw, obj->prim_count_bo, MAP_READ);
+   uint64_t *prim_counts = brw_bo_map(brw, bo, MAP_READ);
+   prim_counts += counter->bo_start * streams;
 
-   assert(obj->prim_count_buffer_index % (2 * streams) == 0);
-   int pairs = obj->prim_count_buffer_index / (2 * streams);
+   for (unsigned i = counter->bo_start; i + 1 < counter->bo_end; i += 2) {
+  for (unsigned s = 0; s < streams; s++)
+ counter->accum[s] += prim_counts[streams + s] - prim_counts[s];
 
-   for (int i = 0; i < pairs; i++) {
-  for (int s = 0; s < streams; s++) {
- obj->prims_generated[s] += prim_counts[streams + s] - prim_counts[s];
-  }
-  prim_counts += 2 * streams; /* move to the next pai

Mesa (master): i965/gen6-7/sol: Bump primitive counter BO size.

2018-01-16 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 53d8508f1d964423123b7a444e07eabe2d723f7e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=53d8508f1d964423123b7a444e07eabe2d723f7e

Author: Francisco Jerez 
Date:   Fri Nov 17 14:07:21 2017 -0800

i965/gen6-7/sol: Bump primitive counter BO size.

Improves performance of SynMark2 OglGSCloth by a further 9.65%±0.59%
due to the reduction in overwraps of the primitive count buffer that
lead to a CPU stall on previous rendering.  Cummulative performance
improvement from the series 81.50% ±0.96% (data gathered on VLV).

Tested-By: Eero Tamminen 
Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/gen6_sol.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
b/src/mesa/drivers/dri/i965/gen6_sol.c
index b1baf01bcd..355acd4218 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -197,7 +197,7 @@ brw_new_transform_feedback(struct gl_context *ctx, GLuint 
name)
brw_obj->offset_bo =
   brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64);
brw_obj->prim_count_bo =
-  brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64);
+  brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 16384, 64);
 
return &brw_obj->base;
 }
@@ -287,7 +287,8 @@ brw_save_primitives_written_counters(struct brw_context 
*brw,
assert(obj->prim_count_bo != NULL);
 
/* Check if there's enough space for a new pair of four values. */
-   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >= 4096) {
+   if ((obj->counter.bo_end + 2) * streams * sizeof(uint64_t) >=
+   obj->prim_count_bo->size) {
   aggregate_transform_feedback_counter(brw, obj->prim_count_bo,
&obj->previous_counter);
   aggregate_transform_feedback_counter(brw, obj->prim_count_bo,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Initialize fs_visitor::grf_used on construction.

2017-12-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: b3e3cb990125c71c1fd172588852bd92bcfb8904
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3e3cb990125c71c1fd172588852bd92bcfb8904

Author: Francisco Jerez 
Date:   Sun Dec 17 00:21:13 2017 -0800

intel/fs: Initialize fs_visitor::grf_used on construction.

This should shut up some Valgrind errors during pre-regalloc
scheduling.  The errors were harmless since they could only have led
to the estimation of the bank conflict penalty of an instruction
pre-regalloc, which is inaccurate at that point of the program
compilation, but no less accurate than the intended "return 0"
fall-back path.  The scheduling pass is normally re-run after regalloc
with a well-defined grf_used value and accurate bank conflict
information.

Fixes: acf98ff933d "intel/fs: Teach instruction scheduler about GRF bank 
conflict cycles."
Reported-by: Eero Tamminen 
Reviewed-by: Ian Romanick 

---

 src/intel/compiler/brw_fs_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/intel/compiler/brw_fs_visitor.cpp 
b/src/intel/compiler/brw_fs_visitor.cpp
index 481d9c51e7..7a5f6451f2 100644
--- a/src/intel/compiler/brw_fs_visitor.cpp
+++ b/src/intel/compiler/brw_fs_visitor.cpp
@@ -898,6 +898,7 @@ fs_visitor::init()
 
this->promoted_constants = 0,
 
+   this->grf_used = 0;
this->spilled_any_registers = false;
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs/bank_conflicts: Use posix_memalign() instead of overaligned new to obtain vector storage.

2017-12-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 1aa79d5ed5fbc9d3ee3c4d279892c49e8393fd3b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1aa79d5ed5fbc9d3ee3c4d279892c49e8393fd3b

Author: Francisco Jerez 
Date:   Sun Dec 17 13:05:55 2017 -0800

intel/fs/bank_conflicts: Use posix_memalign() instead of overaligned new to 
obtain vector storage.

The weight_vector_type constructor was inadvertently assuming C++17
semantics of the new operator applied on a type with alignment
requirement greater than the largest fundamental alignment.
Unfortunately on earlier C++ dialects the implementation was allowed
to raise an allocation failure when the alignment requirement of the
allocated type was unsupported, in an implementation-defined fashion.
It's expected that a C++ implementation recent enough to implement
P0035R4 would have honored allocation requests for such over-aligned
types even if the C++17 dialect wasn't active, which is likely the
reason why this problem wasn't caught by our CI system.

A more elegant fix would involve wrapping the __SSE2__ block in a
'__cpp_aligned_new >= 201606' preprocessor conditional and continue
taking advantage of the language feature, but that would yield lower
compile-time performance on old compilers not implementing it
(e.g. GCC versions older than 7.0).

Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation pass."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104226
Reported-by: Józef Kucia 
Reviewed-by: Ian Romanick 

---

 src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 --
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index 0cd880d44f..e87fcbfc5e 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -277,13 +277,10 @@ namespace {
struct weight_vector_type {
   weight_vector_type() : v(NULL), size(0) {}
 
-  weight_vector_type(unsigned n) :
- v(new vector_type[DIV_ROUND_UP(n, vector_width)]()),
- size(n) {}
+  weight_vector_type(unsigned n) : v(alloc(n)), size(n) {}
 
   weight_vector_type(const weight_vector_type &u) :
- v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()),
- size(u.size)
+ v(alloc(u.size)), size(u.size)
   {
  memcpy(v, u.v,
 DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type));
@@ -291,7 +288,7 @@ namespace {
 
   ~weight_vector_type()
   {
- delete[] v;
+ free(v);
   }
 
   weight_vector_type &
@@ -304,6 +301,19 @@ namespace {
 
   vector_type *v;
   unsigned size;
+
+   private:
+  static vector_type *
+  alloc(unsigned n)
+  {
+ const unsigned align = MAX2(sizeof(void *), __alignof__(vector_type));
+ const unsigned size = DIV_ROUND_UP(n, vector_width) * 
sizeof(vector_type);
+ void *p;
+ if (posix_memalign(&p, align, size))
+return NULL;
+ memset(p, 0, size);
+ return reinterpret_cast(p);
+  }
};
 
/**

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs/bank_conflicts: Don' t touch Gen7 MRF hack registers.

2017-12-12 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: acab52f5201683ec3f3698d25045ed1441ecdd14
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=acab52f5201683ec3f3698d25045ed1441ecdd14

Author: Francisco Jerez 
Date:   Mon Dec 11 20:24:53 2017 -0800

intel/fs/bank_conflicts: Don't touch Gen7 MRF hack registers.

Fixes: af2c320190f3c731 "intel/fs: Implement GRF bank conflict mitigation pass."
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=104199
Reported-by: Darius Spitznagel 
Reviewed-by: Matt Turner 

---

 src/intel/compiler/brw_fs.cpp|  2 +-
 src/intel/compiler/brw_fs.h  |  2 +-
 src/intel/compiler/brw_fs_bank_conflicts.cpp | 22 +-
 3 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 0e3ab381fa..3717c50e32 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -945,7 +945,7 @@ fs_inst::flags_written() const
  * instruction -- the FS opcodes often generate MOVs in addition.
  */
 int
-fs_visitor::implied_mrf_writes(fs_inst *inst)
+fs_visitor::implied_mrf_writes(fs_inst *inst) const
 {
if (inst->mlen == 0)
   return 0;
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 9c160068a7..63373580ee 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -277,7 +277,7 @@ public:
 
struct brw_reg interp_reg(int location, int channel);
 
-   int implied_mrf_writes(fs_inst *inst);
+   int implied_mrf_writes(fs_inst *inst) const;
 
virtual void dump_instructions();
virtual void dump_instructions(const char *name);
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index 42cdc6ef7d..0cd880d44f 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -530,12 +530,12 @@ namespace {
   for (unsigned reg = 0; reg < 2; reg++)
  constrained[p.atom_of_reg(reg)] = true;
 
-  /* Assume that anything referenced via fixed GRFs is baked into the
-   * hardware's fixed-function logic and may be unsafe to move around.
-   * Also take into account the source GRF restrictions of EOT
-   * send-message instructions.
-   */
   foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
+ /* Assume that anything referenced via fixed GRFs is baked into the
+  * hardware's fixed-function logic and may be unsafe to move around.
+  * Also take into account the source GRF restrictions of EOT
+  * send-message instructions.
+  */
  if (inst->dst.file == FIXED_GRF)
 constrained[p.atom_of_reg(reg_of(inst->dst))] = true;
 
@@ -544,6 +544,18 @@ namespace {
 (is_grf(inst->src[i]) && inst->eot))
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
  }
+
+ /* The location of the Gen7 MRF hack registers is hard-coded in the
+  * rest of the compiler back-end.  Don't attempt to move them around.
+  */
+ if (v->devinfo->gen >= 7) {
+assert(inst->dst.file != MRF);
+
+for (int i = 0; i < v->implied_mrf_writes(inst); i++) {
+   const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i;
+   constrained[p.atom_of_reg(reg)] = true;
+}
+ }
   }
 
   return constrained;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/cfg: Represent divergent control flow paths caused by non-uniform loop execution.

2017-12-07 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 4d1959e69328cf0d59f0ec7aeea5a2b704ef0c5f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4d1959e69328cf0d59f0ec7aeea5a2b704ef0c5f

Author: Francisco Jerez 
Date:   Fri Oct 13 17:52:00 2017 -0700

intel/cfg: Represent divergent control flow paths caused by non-uniform loop 
execution.

This addresses a long-standing back-end compiler bug that could lead
to cross-channel data corruption in loops executed non-uniformly.  In
some cases live variables extending through a loop divergence point
(e.g. a non-uniform break) into a convergence point (e.g. the end of
the loop) wouldn't be considered live along all physical control flow
paths the SIMD thread could possibly have taken in between due to some
channels remaining in the loop for additional iterations.

This patch fixes the problem by extending the CFG with physical edges
that don't exist in the idealized non-vectorized program, but
represent valid control flow paths the SIMD EU may take due to the
divergence of logical threads.  This makes sense because the i965 IR
is explicitly SIMD, and it's not uncommon for instructions to have an
influence on neighboring channels (e.g. a force_writemask_all header
setup), so the behavior of the SIMD thread as a whole needs to be
considered.

No changes in shader-db.

Reviewed-by: Jason Ekstrand 
Reviewed-by: Kenneth Graunke 

---

 src/intel/compiler/brw_cfg.cpp | 75 ++
 1 file changed, 69 insertions(+), 6 deletions(-)

diff --git a/src/intel/compiler/brw_cfg.cpp b/src/intel/compiler/brw_cfg.cpp
index fad12eec58..600b428a49 100644
--- a/src/intel/compiler/brw_cfg.cpp
+++ b/src/intel/compiler/brw_cfg.cpp
@@ -98,6 +98,7 @@ ends_block(const backend_instruction *inst)
   op == BRW_OPCODE_ELSE ||
   op == BRW_OPCODE_CONTINUE ||
   op == BRW_OPCODE_BREAK ||
+  op == BRW_OPCODE_DO ||
   op == BRW_OPCODE_WHILE;
 }
 
@@ -268,13 +269,57 @@ cfg_t::cfg_t(exec_list *instructions)
  }
 
  cur->instructions.push_tail(inst);
+
+ /* Represent divergent execution of the loop as a pair of alternative
+  * edges coming out of the DO instruction: For any physical iteration
+  * of the loop a given logical thread can either start off enabled
+  * (which is represented as the "next" successor), or disabled (if it
+  * has reached a non-uniform exit of the loop during a previous
+  * iteration, which is represented as the "cur_while" successor).
+  *
+  * The disabled edge will be taken by the logical thread anytime we
+  * arrive at the DO instruction through a back-edge coming from a
+  * conditional exit of the loop where divergent control flow started.
+  *
+  * This guarantees that there is a control-flow path from any
+  * divergence point of the loop into the convergence point
+  * (immediately past the WHILE instruction) such that it overlaps the
+  * whole IP region of divergent control flow (potentially the whole
+  * loop) *and* doesn't imply the execution of any instructions part
+  * of the loop (since the corresponding execution mask bit will be
+  * disabled for a diverging thread).
+  *
+  * This way we make sure that any variables that are live throughout
+  * the region of divergence for an inactive logical thread are also
+  * considered to interfere with any other variables assigned by
+  * active logical threads within the same physical region of the
+  * program, since otherwise we would risk cross-channel data
+  * corruption.
+  */
+ next = new_block();
+ cur->add_successor(mem_ctx, next);
+ cur->add_successor(mem_ctx, cur_while);
+ set_next_block(&cur, next, ip);
 break;
 
   case BRW_OPCODE_CONTINUE:
  cur->instructions.push_tail(inst);
 
+ /* A conditional CONTINUE may start a region of divergent control
+  * flow until the start of the next loop iteration (*not* until the
+  * end of the loop which is why the successor is not the top-level
+  * divergence point at cur_do).  The live interval of any variable
+  * extending through a CONTINUE edge is guaranteed to overlap the
+  * whole region of divergent execution, because any variable live-out
+  * at the CONTINUE instruction will also be live-in at the top of the
+  * loop, and therefore also live-out at the bottom-most point of the
+  * loop which is reachable from the top (since a control flow path
+  * exists from a definition of the variable through this CONTINUE
+  * instruction, the top of the loop, the (reachable) bottom of the
+  * loop, the top of the loop again, into a use of the variable).
+  */

Mesa (master): intel/fs: Restrict live intervals to the subset possibly reachable from any definition.

2017-12-07 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: c3c1aa5aeb921caa2ec18c2320ceb94854e0f47c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3c1aa5aeb921caa2ec18c2320ceb94854e0f47c

Author: Francisco Jerez 
Date:   Thu Sep  7 00:26:03 2017 -0700

intel/fs: Restrict live intervals to the subset possibly reachable from any 
definition.

Currently the liveness analysis pass would extend a live interval up
to the top of the program when no unconditional and complete
definition of the variable is found that dominates all of its uses.

This can lead to a serious performance problem in shaders containing
many partial writes, like scalar arithmetic, FP64 and soon FP16
operations.  The number of oversize live intervals in such workloads
can cause the compilation time of the shader to explode because of the
worse than quadratic behavior of the register allocator and scheduler
when running out of registers, and it can also cause the running time
of the shader to explode due to the amount of spilling it leads to,
which is orders of magnitude slower than GRF memory.

This patch fixes it by computing the intersection of our current live
intervals with the subset of the program that can possibly be reached
from any definition of the variable.  Extending the storage allocation
of the variable beyond that is pretty useless because its value is
guaranteed to be undefined at a point that cannot be reached from any
definition.

According to Jason, this improves performance of the subgroup Vulkan
CTS tests significantly (e.g. the runtime of the dvec4 broadcast test
improves by nearly 50x).

No significant change in the running time of shader-db (with 5%
statistical significance).

shader-db results on IVB:

  total cycles in shared programs: 61108780 -> 60932856 (-0.29%)
  cycles in affected programs: 16335482 -> 16159558 (-1.08%)
  helped: 5121
  HURT: 4347

  total spills in shared programs: 1309 -> 1288 (-1.60%)
  spills in affected programs: 249 -> 228 (-8.43%)
  helped: 3
  HURT: 0

  total fills in shared programs: 1652 -> 1597 (-3.33%)
  fills in affected programs: 262 -> 207 (-20.99%)
  helped: 4
  HURT: 0

  LOST:   2
  GAINED: 209

shader-db results on BDW:

  total cycles in shared programs: 67617262 -> 67361220 (-0.38%)
  cycles in affected programs: 23397142 -> 23141100 (-1.09%)
  helped: 8045
  HURT: 6488

  total spills in shared programs: 1456 -> 1252 (-14.01%)
  spills in affected programs: 465 -> 261 (-43.87%)
  helped: 3
  HURT: 0

  total fills in shared programs: 1720 -> 1465 (-14.83%)
  fills in affected programs: 471 -> 216 (-54.14%)
  helped: 4
  HURT: 0

  LOST:   2
  GAINED: 162

shader-db results on SKL:

  total cycles in shared programs: 65436248 -> 65245186 (-0.29%)
  cycles in affected programs: 22560936 -> 22369874 (-0.85%)
  helped: 8457
  HURT: 6247

  total spills in shared programs: 437 -> 437 (0.00%)
  spills in affected programs: 0 -> 0
  helped: 0
  HURT: 0

  total fills in shared programs: 870 -> 854 (-1.84%)
  fills in affected programs: 16 -> 0
  helped: 1
  HURT: 0

  LOST:   0
  GAINED: 107

Reviewed-by: Jason Ekstrand 

---

 src/intel/compiler/brw_fs_live_variables.cpp | 34 
 src/intel/compiler/brw_fs_live_variables.h   | 12 ++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_fs_live_variables.cpp 
b/src/intel/compiler/brw_fs_live_variables.cpp
index c449672a51..059f076fa5 100644
--- a/src/intel/compiler/brw_fs_live_variables.cpp
+++ b/src/intel/compiler/brw_fs_live_variables.cpp
@@ -83,9 +83,11 @@ fs_live_variables::setup_one_write(struct block_data *bd, 
fs_inst *inst,
/* The def[] bitset marks when an initialization in a block completely
 * screens off previous updates of that variable (VGRF channel).
 */
-   if (inst->dst.file == VGRF && !inst->is_partial_write()) {
-  if (!BITSET_TEST(bd->use, var))
+   if (inst->dst.file == VGRF) {
+  if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
  BITSET_SET(bd->def, var);
+
+  BITSET_SET(bd->defout, var);
}
 }
 
@@ -199,6 +201,28 @@ fs_live_variables::compute_live_variables()
  }
   }
}
+
+   /* Propagate defin and defout down the CFG to calculate the union of live
+* variables potentially defined along any possible control flow path.
+*/
+   do {
+  cont = false;
+
+  foreach_block (block, cfg) {
+ const struct block_data *bd = &block_data[block->num];
+
+foreach_list_typed(bblock_link, child_link, link, &block->children) {
+struct block_data *child_bd = &block_data[child_link->block->num];
+
+   for (int i = 0; i < bitset_words; i++) {
+   const BITSET_WORD new_def = bd->defout[i] & ~child_bd->defin[i];
+   child_bd->defin[i] |= new_def;
+   child_bd->defout[i] |= new_def;

Mesa (master): intel/fs: Don' t let undefined values prevent copy propagation.

2017-12-07 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 9355116bdad6ee9914554de8e48ba271bd36a8eb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9355116bdad6ee9914554de8e48ba271bd36a8eb

Author: Francisco Jerez 
Date:   Mon Oct 23 13:47:10 2017 -0700

intel/fs: Don't let undefined values prevent copy propagation.

This makes the dataflow propagation logic of the copy propagation pass
more intelligent in cases where the destination of a copy is known to
be undefined for some incoming CFG edges, building upon the
definedness information provided by the last patch.  Helps a few
programs, and avoids a handful shader-db regressions from the next
patch.

shader-db results on ILK:

  total instructions in shared programs: 6541547 -> 6541523 (-0.00%)
  instructions in affected programs: 360 -> 336 (-6.67%)
  helped: 8
  HURT: 0

  LOST:   0
  GAINED: 10

shader-db results on BDW:

  total instructions in shared programs: 8174323 -> 8173882 (-0.01%)
  instructions in affected programs: 7730 -> 7289 (-5.71%)
  helped: 5
  HURT: 2

  LOST:   0
  GAINED: 4

shader-db results on SKL:

  total instructions in shared programs: 8185669 -> 8184598 (-0.01%)
  instructions in affected programs: 10364 -> 9293 (-10.33%)
  helped: 5
  HURT: 2

  LOST:   0
  GAINED: 2

Reviewed-by: Jason Ekstrand 

---

 src/intel/compiler/brw_fs_copy_propagation.cpp | 50 --
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp 
b/src/intel/compiler/brw_fs_copy_propagation.cpp
index d4d01d783c..af5635eace 100644
--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
+++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
@@ -36,9 +36,12 @@
 
 #include "util/bitset.h"
 #include "brw_fs.h"
+#include "brw_fs_live_variables.h"
 #include "brw_cfg.h"
 #include "brw_eu.h"
 
+using namespace brw;
+
 namespace { /* avoid conflict with opt_copy_propagation_elements */
 struct acp_entry : public exec_node {
fs_reg dst;
@@ -77,12 +80,19 @@ struct block_data {
 * course of this block.
 */
BITSET_WORD *kill;
+
+   /**
+* Which entries in the fs_copy_prop_dataflow acp table are guaranteed to
+* have a fully uninitialized destination at the end of this block.
+*/
+   BITSET_WORD *undef;
 };
 
 class fs_copy_prop_dataflow
 {
 public:
fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+ const fs_live_variables *live,
  exec_list *out_acp[ACP_HASH_SIZE]);
 
void setup_initial_values();
@@ -92,6 +102,7 @@ public:
 
void *mem_ctx;
cfg_t *cfg;
+   const fs_live_variables *live;
 
acp_entry **acp;
int num_acp;
@@ -102,8 +113,9 @@ public:
 } /* anonymous namespace */
 
 fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
+ const fs_live_variables *live,
  exec_list *out_acp[ACP_HASH_SIZE])
-   : mem_ctx(mem_ctx), cfg(cfg)
+   : mem_ctx(mem_ctx), cfg(cfg), live(live)
 {
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
 
@@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, 
cfg_t *cfg,
   bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
   bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words);
   bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
+  bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words);
 
   for (int i = 0; i < ACP_HASH_SIZE; i++) {
  foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
@@ -189,6 +202,18 @@ fs_copy_prop_dataflow::setup_initial_values()
  }
   }
}
+
+   /* Initialize the undef set. */
+   foreach_block (block, cfg) {
+  for (int i = 0; i < num_acp; i++) {
+ BITSET_SET(bd[block->num].undef, i);
+ for (unsigned off = 0; off < acp[i]->size_written; off += REG_SIZE) {
+if (BITSET_TEST(live->block_data[block->num].defout,
+live->var_from_reg(byte_offset(acp[i]->dst, off
+   BITSET_CLEAR(bd[block->num].undef, i);
+ }
+  }
+   }
 }
 
 /**
@@ -229,13 +254,30 @@ fs_copy_prop_dataflow::run()
 
  for (int i = 0; i < bitset_words; i++) {
 const BITSET_WORD old_livein = bd[block->num].livein[i];
+BITSET_WORD livein_from_any_block = 0;
 
 bd[block->num].livein[i] = ~0u;
 foreach_list_typed(bblock_link, parent_link, link, 
&block->parents) {
bblock_t *parent = parent_link->block;
-   bd[block->num].livein[i] &= bd[parent->num].liveout[i];
+   /* Consider ACP entries with a known-undefined destination to
+* be available from the parent.  This is valid beca

Mesa (master): intel/fs: Teach instruction scheduler about GRF bank conflict cycles.

2017-12-07 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: acf98ff933d338c521d7c6a57c17a010149eb344
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=acf98ff933d338c521d7c6a57c17a010149eb344

Author: Francisco Jerez 
Date:   Wed Dec  6 11:42:54 2017 -0800

intel/fs: Teach instruction scheduler about GRF bank conflict cycles.

This should allow the post-RA scheduler to do a slightly better job at
hiding latency in presence of instructions incurring bank conflicts.
The main purpuse of this patch is not to improve performance though,
but to get conflict cycles to show up in shader-db statistics in order
to make sure that regressions in the bank conflict mitigation pass
don't go unnoticed.

Acked-by: Matt Turner 

---

 src/intel/compiler/brw_fs.h  |  1 +
 src/intel/compiler/brw_fs_bank_conflicts.cpp | 19 +++
 src/intel/compiler/brw_schedule_instructions.cpp |  5 +++--
 3 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 0cec6fdcba..9c160068a7 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -146,6 +146,7 @@ public:
bool opt_drop_redundant_mov_to_flags();
bool opt_register_renaming();
bool opt_bank_conflicts();
+   unsigned bank_conflict_cycles(const fs_inst *inst) const;
bool register_coalesce();
bool compute_to_mrf();
bool eliminate_find_live_channel();
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
b/src/intel/compiler/brw_fs_bank_conflicts.cpp
index b64a3d4a8a..42cdc6ef7d 100644
--- a/src/intel/compiler/brw_fs_bank_conflicts.cpp
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -891,3 +891,22 @@ fs_visitor::opt_bank_conflicts()
delete[] constrained;
return true;
 }
+
+/**
+ * Estimate the number of GRF bank conflict cycles incurred by an instruction.
+ *
+ * Note that this neglects conflict cycles prior to register allocation
+ * because we don't know which bank each VGRF is going to end up aligned to.
+ */
+unsigned
+fs_visitor::bank_conflict_cycles(const fs_inst *inst) const
+{
+   if (grf_used && inst->is_3src(devinfo) &&
+   is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
+   bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
+   !is_conflict_optimized_out(devinfo, inst)) {
+  return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
+   } else {
+  return 0;
+   }
+}
diff --git a/src/intel/compiler/brw_schedule_instructions.cpp 
b/src/intel/compiler/brw_schedule_instructions.cpp
index a1e825c661..692f712532 100644
--- a/src/intel/compiler/brw_schedule_instructions.cpp
+++ b/src/intel/compiler/brw_schedule_instructions.cpp
@@ -1543,10 +1543,11 @@ 
vec4_instruction_scheduler::choose_instruction_to_schedule()
 int
 fs_instruction_scheduler::issue_time(backend_instruction *inst)
 {
+   const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
if (is_compressed((fs_inst *)inst))
-  return 4;
+  return 4 + overhead;
else
-  return 2;
+  return 2 + overhead;
 }
 
 int

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Implement GRF bank conflict mitigation pass.

2017-12-07 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: af2c320190f3c73180f1610c8df955a7fa2a4d09
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=af2c320190f3c73180f1610c8df955a7fa2a4d09

Author: Francisco Jerez 
Date:   Thu Jun 15 15:23:57 2017 -0700

intel/fs: Implement GRF bank conflict mitigation pass.

Unnecessary GRF bank conflicts increase the issue time of ternary
instructions (the overwhelmingly most common of which is MAD) by
roughly 50%, leading to reduced ALU throughput.  This pass attempts to
minimize the number of bank conflicts by rearranging the layout of the
GRF space post-register allocation.  It's in general not possible to
eliminate all of them without introducing extra copies, which are
typically more expensive than the bank conflict itself.

In a shader-db run on SKL this helps roughly 46k shaders:

   total conflicts in shared programs: 1008981 -> 600461 (-40.49%)
   conflicts in affected programs: 816222 -> 407702 (-50.05%)
   helped: 46234
   HURT: 72

The running time of shader-db itself on SKL seems to be increased by
roughly 2.52%±1.13% with n=20 due to the additional work done by the
compiler back-end.

On earlier generations the pass is somewhat less effective in relative
terms because the hardware incurs a bank conflict anytime the last two
sources of the instruction are duplicate (e.g. while trying to square
a value using MAD), which is impossible to avoid without introducing
copies.  E.g. for a shader-db run on SNB:

   total conflicts in shared programs: 944636 -> 623185 (-34.03%)
   conflicts in affected programs: 853258 -> 531807 (-37.67%)
   helped: 31052
   HURT: 19

And on BDW:

   total conflicts in shared programs: 1418393 -> 987539 (-30.38%)
   conflicts in affected programs: 1179787 -> 748933 (-36.52%)
   helped: 47592
   HURT: 70

On SKL GT4e this improves performance of GpuTest Volplosion by 3.64%
±0.33% with n=16.

NOTE: This patch intentionally disregards some i965 coding conventions
  for the sake of reviewability.  This is addressed by the next
  squash patch which introduces an amount of (for the most part
  boring) boilerplate that might distract reviewers from the
  non-trivial algorithmic details of the pass.

The following patch is squashed in:

SQUASH: intel/fs/bank_conflicts: Roll back to the nineties.

Acked-by: Matt Turner 

---

 src/intel/Makefile.sources   |   1 +
 src/intel/compiler/brw_fs.cpp|   2 +
 src/intel/compiler/brw_fs.h  |   1 +
 src/intel/compiler/brw_fs_bank_conflicts.cpp | 893 +++
 src/intel/compiler/meson.build   |   1 +
 5 files changed, 898 insertions(+)

diff --git a/src/intel/Makefile.sources b/src/intel/Makefile.sources
index cdb10ece35..1c62bad816 100644
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -48,6 +48,7 @@ COMPILER_FILES = \
compiler/brw_eu_util.c \
compiler/brw_eu_validate.c \
compiler/brw_fs_builder.h \
+   compiler/brw_fs_bank_conflicts.cpp \
compiler/brw_fs_cmod_propagation.cpp \
compiler/brw_fs_combine_constants.cpp \
compiler/brw_fs_copy_propagation.cpp \
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 93bb6b4673..c5d4f5634d 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -6065,6 +6065,8 @@ fs_visitor::allocate_registers(unsigned 
min_dispatch_width, bool allow_spilling)
if (failed)
   return;
 
+   opt_bank_conflicts();
+
schedule_instructions(SCHEDULE_POST);
 
if (last_scratch > 0) {
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index 30557324d5..0cec6fdcba 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -145,6 +145,7 @@ public:
exec_list *acp);
bool opt_drop_redundant_mov_to_flags();
bool opt_register_renaming();
+   bool opt_bank_conflicts();
bool register_coalesce();
bool compute_to_mrf();
bool eliminate_find_live_channel();
diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp 
b/src/intel/compiler/brw_fs_bank_conflicts.cpp
new file mode 100644
index 00..b64a3d4a8a
--- /dev/null
+++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp
@@ -0,0 +1,893 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Softwa

Mesa (master): anv: Check that in_fence fd is valid before closing it.

2017-08-22 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e29ccaac298d04ad4272af2d8b8d7a953c523e28
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e29ccaac298d04ad4272af2d8b8d7a953c523e28

Author: Francisco Jerez 
Date:   Fri Aug 18 12:04:55 2017 -0700

anv: Check that in_fence fd is valid before closing it.

Probably harmless, but will overwrite errno with a failure status
code.  Reported by coverity.

CID 1416600: Argument cannot be negative (NEGATIVE_RETURNS)
Fixes: 5c4e4932e02 (anv: Implement support for exporting semaphores as FENCE_FD)
Reviewed-by: Lionel Landwerlin 

---

 src/intel/vulkan/anv_batch_chain.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index 26b5375903..1e7455f71e 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1571,7 +1571,8 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
 
/* Execbuf does not consume the in_fence.  It's our job to close it. */
-   close(in_fence);
+   if (in_fence != -1)
+  close(in_fence);
 
for (uint32_t i = 0; i < num_in_semaphores; i++) {
   ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): anv: Add error handling to setup_empty_execbuf().

2017-08-22 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 7ca124a6a3987fbfc09bc530761d44714c0da773
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ca124a6a3987fbfc09bc530761d44714c0da773

Author: Francisco Jerez 
Date:   Fri Aug 18 11:00:42 2017 -0700

anv: Add error handling to setup_empty_execbuf().

The anv_execbuf_add_bo() call can actually fail in practice, which
should cause the QueueSubmit operation to fail.  Reported by Coverity.

CID: 1416606: Unchecked return value (CHECKED_RETURN)
Fixes: 017cdb10cf (anv: Submit a dummy batch when only semaphores are provided.)
Reviewed-by: Lionel Landwerlin 

---

 src/intel/vulkan/anv_batch_chain.c | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/intel/vulkan/anv_batch_chain.c 
b/src/intel/vulkan/anv_batch_chain.c
index 0078cc5142..26b5375903 100644
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1424,11 +1424,13 @@ setup_execbuf_for_cmd_buffer(struct anv_execbuf 
*execbuf,
return VK_SUCCESS;
 }
 
-static void
+static VkResult
 setup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
 {
-   anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo, NULL, 0,
-  &device->alloc);
+   VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo,
+NULL, 0, &device->alloc);
+   if (result != VK_SUCCESS)
+  return result;
 
execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
   .buffers_ptr = (uintptr_t) execbuf->objects,
@@ -1439,6 +1441,8 @@ setup_empty_execbuf(struct anv_execbuf *execbuf, struct 
anv_device *device)
   .rsvd1 = device->context_id,
   .rsvd2 = 0,
};
+
+   return VK_SUCCESS;
 }
 
 VkResult
@@ -1541,13 +1545,13 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
   }
}
 
-   if (cmd_buffer) {
+   if (cmd_buffer)
   result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
-  if (result != VK_SUCCESS)
- return result;
-   } else {
-  setup_empty_execbuf(&execbuf, device);
-   }
+   else
+  result = setup_empty_execbuf(&execbuf, device);
+
+   if (result != VK_SUCCESS)
+  return result;
 
if (execbuf.fence_count > 0) {
   assert(device->instance->physicalDevice.has_syncobj);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Take into account amount of data read in spilling cost heuristic.

2017-04-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 58324389be7bc7c5e10093b9cc0a8efa9b4c93a9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=58324389be7bc7c5e10093b9cc0a8efa9b4c93a9

Author: Francisco Jerez 
Date:   Thu Apr 20 11:42:27 2017 -0700

intel/fs: Take into account amount of data read in spilling cost heuristic.

Until now the spilling cost calculation was neglecting the amount of
data read from the register during the spilling cost calculation.
This caused it to make suboptimal decisions in some cases leading to
higher memory bandwidth usage than necessary.

Improves Unigine Heaven performance by ~4% on BDW, reversing an
unintended FPS regression from my previous commit
147e71242ce539ff28e282f009c332818c35f5ac with n=12 and statistical
significance 5%.  In addition SynMark2 OglCSDof performance is
improved by an additional ~5% on SKL, and a Kerbal Space Program
apitrace around the Moho planet I can provide on request improves by
~20%.

Cc: 
Reviewed-by: Plamena Manolova 
Reviewed-by: Jason Ekstrand 

---

 src/intel/compiler/brw_fs_reg_allocate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp 
b/src/intel/compiler/brw_fs_reg_allocate.cpp
index 2d4d46ef33..ec8e116cb3 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -822,7 +822,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
foreach_block_and_inst(block, fs_inst, inst, cfg) {
   for (unsigned int i = 0; i < inst->sources; i++) {
 if (inst->src[i].file == VGRF)
-spill_costs[inst->src[i].nr] += block_scale;
+spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
   }
 
   if (inst->dst.file == VGRF)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy.

2017-04-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: ecc19e12dca95d2571d3761dea6dec24b061013c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecc19e12dca95d2571d3761dea6dec24b061013c

Author: Francisco Jerez 
Date:   Thu Apr 20 11:44:01 2017 -0700

intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy.

This is what we use later on to compute the number of registers that
will actually get spilled to memory, so it's more likely to match
reality than the current open-coded approximation.

Cc: 
Reviewed-by: Plamena Manolova 
Reviewed-by: Jason Ekstrand 

---

 src/intel/compiler/brw_fs_reg_allocate.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp 
b/src/intel/compiler/brw_fs_reg_allocate.cpp
index c981d72e4f..2d4d46ef33 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -826,8 +826,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
   }
 
   if (inst->dst.file == VGRF)
- spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, 
REG_SIZE)
-  * block_scale;
+ spill_costs[inst->dst.nr] += regs_written(inst) * block_scale;
 
   switch (inst->opcode) {
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: enable ARB_vertex_attrib_64bit for gen7+

2017-04-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 0aed1212ae54c3286c7f6e155c129b1973723c46
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0aed1212ae54c3286c7f6e155c129b1973723c46

Author: Juan A. Suarez Romero 
Date:   Fri Oct 21 16:57:25 2016 +0200

i965: enable ARB_vertex_attrib_64bit for gen7+

Reviewed-by: Andreas Boll 
Reviewed-by: Francisco Jerez 

---

 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 467a0d3e84..53b5eaf8a0 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -221,6 +221,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_texture_compression_bptc = true;
   ctx->Extensions.ARB_texture_view = true;
   ctx->Extensions.ARB_shader_storage_buffer_object = true;
+  ctx->Extensions.ARB_vertex_attrib_64bit = true;
   ctx->Extensions.EXT_shader_samples_identical = true;
   ctx->Extensions.OES_primitive_bounding_box = true;
   ctx->Extensions.OES_texture_buffer = true;
@@ -247,7 +248,6 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_shader_precision = true;
   ctx->Extensions.ARB_stencil_texturing = true;
   ctx->Extensions.ARB_texture_stencil8 = true;
-  ctx->Extensions.ARB_vertex_attrib_64bit = true;
   ctx->Extensions.OES_geometry_shader = true;
   ctx->Extensions.OES_texture_cube_map_array = true;
   ctx->Extensions.OES_viewport_array = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: enable OpenGL 4.2 in Ivybridge

2017-04-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 1877982aca7d50541618a8997fdd72c5286b4b67
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1877982aca7d50541618a8997fdd72c5286b4b67

Author: Juan A. Suarez Romero 
Date:   Wed Mar 29 11:41:35 2017 +0200

i965: enable OpenGL 4.2 in Ivybridge

Reviewed-by: Andreas Boll 
Reviewed-by: Francisco Jerez 

---

 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 src/mesa/drivers/dri/i965/intel_screen.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index fc974b9860..0133fa1006 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -139,7 +139,7 @@ intelInitExtensions(struct gl_context *ctx)
else if (brw->is_haswell && can_do_pipelined_register_writes(brw->screen))
   ctx->Const.GLSLVersion = 450;
else if (brw->gen >= 7 && can_do_pipelined_register_writes(brw->screen))
-  ctx->Const.GLSLVersion = 400;
+  ctx->Const.GLSLVersion = 420;
else if (brw->gen >= 6)
   ctx->Const.GLSLVersion = 330;
else
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 9e536f58b3..39e463d264 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1654,7 +1654,7 @@ set_max_gl_versions(struct intel_screen *screen)
case 7:
   dri_screen->max_gl_core_version = 33;
   if (can_do_pipelined_register_writes(screen)) {
- dri_screen->max_gl_core_version = screen->devinfo.is_haswell ? 42 : 
40;
+ dri_screen->max_gl_core_version = 42;
  if (screen->devinfo.is_haswell && can_do_compute_dispatch(screen))
 dri_screen->max_gl_core_version = 43;
  if (screen->devinfo.is_haswell && can_do_mi_math_and_lrr(screen))

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): docs: mark GL_ARB_vertex_attrib_64bit and OpenGL 4.2 as supported by i965/gen7+

2017-04-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 96dfc014fd33a4f38e31fa1d4c9c4ea52d85a0b8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=96dfc014fd33a4f38e31fa1d4c9c4ea52d85a0b8

Author: Francisco Jerez 
Date:   Fri Apr 14 15:59:52 2017 -0700

docs: mark GL_ARB_vertex_attrib_64bit and OpenGL 4.2 as supported by i965/gen7+

v2 (Andreas Boll):
- Mark GL 4.1 as supported by i965/gen7+
- Mark GL_ARB_shader_precision as supported by i965/gen7+
- Update release notes

Reviewed-by: Andreas Boll 
Reviewed-by: Francisco Jerez 

---

 docs/features.txt | 8 
 docs/relnotes/17.1.0.html | 3 +++
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 3dd4094865..5f63632e82 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -136,17 +136,17 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, 
radeonsi
   GL_ARB_transform_feedback3DONE (i965/gen7+, 
llvmpipe, softpipe, swr)
 
 
-GL 4.1, GLSL 4.10 --- all DONE: i965/hsw+, nvc0, r600, radeonsi
+GL 4.1, GLSL 4.10 --- all DONE: i965/gen7+, nvc0, r600, radeonsi
 
   GL_ARB_ES2_compatibility  DONE (i965, nv50, 
llvmpipe, softpipe, swr)
   GL_ARB_get_program_binary DONE (0 binary formats)
   GL_ARB_separate_shader_objectsDONE (all drivers)
-  GL_ARB_shader_precision   DONE (i965/hsw+, all 
drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bitDONE (i965/hsw+, 
llvmpipe, softpipe)
+  GL_ARB_shader_precision   DONE (i965/gen7+, all 
drivers that support GLSL 4.10)
+  GL_ARB_vertex_attrib_64bitDONE (i965/gen7+, 
llvmpipe, softpipe)
   GL_ARB_viewport_array DONE (i965, nv50, 
llvmpipe, softpipe)
 
 
-GL 4.2, GLSL 4.20 -- all DONE: i965/hsw+, nvc0, radeonsi
+GL 4.2, GLSL 4.20 -- all DONE: i965/gen7+, nvc0, radeonsi
 
   GL_ARB_texture_compression_bptc   DONE (i965, r600)
   GL_ARB_compressed_texture_pixel_storage   DONE (all drivers)
diff --git a/docs/relnotes/17.1.0.html b/docs/relnotes/17.1.0.html
index ba21b61f91..4f3e0030a8 100644
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -44,15 +44,18 @@ Note: some of the new features are only available with 
certain drivers.
 
 
 
+OpenGL 4.2 on i965/ivb
 GL_ARB_gpu_shader_fp64 on i965/ivybridge
 GL_ARB_gpu_shader_int64 on i965/gen8+, nvc0, radeonsi, softpipe, 
llvmpipe
 GL_ARB_shader_ballot on nvc0, radeonsi
 GL_ARB_shader_clock on nv50, nvc0, radeonsi
 GL_ARB_shader_group_vote on radeonsi
+GL_ARB_shader_precision on i965/ivb
 GL_ARB_shader_viewport_layer_array on radeonsi
 GL_ARB_sparse_buffer on radeonsi/CIK+
 GL_ARB_transform_feedback2 on i965/gen6
 GL_ARB_transform_feedback_overflow_query on i965/gen6+
+GL_ARB_vertex_attrib_64bit on i965/ivb
 GL_NV_fill_rectangle on nvc0
 Geometry shaders enabled on swr
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: enable ARB_shader_precision in gen7+

2017-04-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 92d4dc76eaec64e99194f3d2afcc55eb7c7b46ba
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=92d4dc76eaec64e99194f3d2afcc55eb7c7b46ba

Author: Samuel Iglesias Gonsálvez 
Date:   Mon Oct 17 14:40:06 2016 +

i965: enable ARB_shader_precision in gen7+

Reviewed-by: Andreas Boll 
Reviewed-by: Francisco Jerez 

---

 src/mesa/drivers/dri/i965/intel_extensions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 53b5eaf8a0..fc974b9860 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -216,6 +216,7 @@ intelInitExtensions(struct gl_context *ctx)
   ctx->Extensions.ARB_shader_clock = true;
   ctx->Extensions.ARB_shader_image_load_store = true;
   ctx->Extensions.ARB_shader_image_size = true;
+  ctx->Extensions.ARB_shader_precision = true;
   ctx->Extensions.ARB_shader_texture_image_samples = true;
   ctx->Extensions.ARB_tessellation_shader = true;
   ctx->Extensions.ARB_texture_compression_bptc = true;
@@ -245,7 +246,6 @@ intelInitExtensions(struct gl_context *ctx)
}
 
if (brw->gen >= 8 || brw->is_haswell) {
-  ctx->Extensions.ARB_shader_precision = true;
   ctx->Extensions.ARB_stencil_texturing = true;
   ctx->Extensions.ARB_texture_stencil8 = true;
   ctx->Extensions.OES_geometry_shader = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): 30 new commits

2017-04-14 Thread Francisco Jerez

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8973ae3162aec112b22cdf58f47d0ee12c4a09cd
Author: Samuel Iglesias Gonsálvez 
Date:   Wed Apr 5 06:23:43 2017 +0200

docs/relnotes: add GL_ARB_gpu_shader_fp64 support on i965/ivybridge

Signed-off-by: Samuel Iglesias Gonsálvez 
Acked-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef49dda2df94c8060047b845a3a027460c45ba7c
Author: Samuel Iglesias Gonsálvez 
Date:   Tue Oct 11 10:59:52 2016 +0200

docs: mark GL_ARB_gpu_shader_fp64 and OpenGL 4.0 as supported by i965/gen7+

Signed-off-by: Samuel Iglesias Gonsálvez 
Acked-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a494afdb8e09640956743649354fbb7147231d1d
Author: Samuel Iglesias Gonsálvez 
Date:   Fri Aug 26 07:39:04 2016 +0200

i965: enable OpenGL 4.0 to Ivybridge/Baytrail

Signed-off-by: Samuel Iglesias Gonsálvez 
Reviewed-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cd0a6b2fc2ef6e04ffb262072821113cb49cd530
Author: Samuel Iglesias Gonsálvez 
Date:   Fri Aug 26 07:37:42 2016 +0200

i965: enable ARB_gpu_shader_fp64 for Ivybridge/Baytrail

Signed-off-by: Samuel Iglesias Gonsálvez 
Reviewed-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2eeb1b0ad9453ba135b72aaeec6c0d4dbf9ac87c
Author: Matt Turner 
Date:   Fri Jan 20 13:35:33 2017 -0800

i965: Use correct VertStride on align16 instructions.

In commit c35fa7a, we changed the "width" of DF source registers to 2,
which is conceptually fine. Unfortunately a VertStride of 2 is not
allowed by align16 instructions on IVB/BYT, and the regular VertStride
of 4 works fine in any case.

See 
generated_tests/spec/arb_gpu_shader_fp64/execution/built-in-functions/vs-round-double.shader_test
for example:

cmp.ge.f0(8)g18<1>DFg1<0>.xyxyDF-g8<2>DF{ align16 
1Q };
ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed
cmp.ge.f0(8)g19<1>DFg1<0>.xyxyDF-g9<2>DF{ align16 
2N };
ERROR: In Align16 mode, only VertStride of 0 or 4 is allowed

v2:
- Add spec quote (Curro).
- Change the condition to only BRW_VERTICAL_STRIDE_2 (Curro)

Reviewed-by: Samuel Iglesias Gonsálvez 
Reviewed-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d8441e2276912d353d4fc6c0cf6b781ab5153ee7
Author: Samuel Iglesias Gonsálvez 
Date:   Fri Mar 17 11:57:25 2017 +0100

i965/vec4/dce: improve track of partial flag register writes

This is required for correctness in presence of multiple 4-wide flag
writes (e.g. 4-wide instructions with a conditional mod set) which
update a different portion of the same 8-bit flag subregister.

Right now we keep track of flag dataflow with 8-bit granularity and
consider flag writes to have killed any previous definition of the
same subregister even if the write was less than 8 channels wide,
which can cause live flag register updates to be dead
code-eliminated incorrectly.

Signed-off-by: Samuel Iglesias Gonsálvez 
Reviewed-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c1fc8fad47f60bda857fc45c4052c5f4effe0d84
Author: Samuel Iglesias Gonsálvez 
Date:   Fri Mar 17 11:55:49 2017 +0100

i965/vec4: don't do horizontal stride on some register file types

horiz_offset() shouldn't be doing anything for scalar registers,
because all channels of any SIMD instructions will end up reading or
writing the same component of the register, so shifting the register
offset would be wrong.
    
Signed-off-by: Samuel Iglesias Gonsálvez 
[ Francisco Jerez: Re-implement in terms of is_uniform() for
  simplicity.  Pass argument by const reference.  Clarify commit
  message. ]
Reviewed-by: Francisco Jerez 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=21e8e3a8484241508ac2c250fc4367234fa337df
Author: Matt Turner 
Date:   Fri Jan 20 13:35:32 2017 -0800

i965/vec4: Fix exec size for MOVs {SET,PICK}_{HIGH,LOW}_32BIT.

Otherwise for a pack_double_2x32_split opcode, we emit:

   vec1 64 ssa_135 = pack_double_2x32_split ssa_133, ssa_134
mov(8)  g5<1>UD g5<4>.xUD   { align16 
1Q compacted };
mov(8)  g7<2>UD g5<4,4,1>UD { align1 1Q 
};
ERROR: When the destination spans two registers, the source must 
span two registers
   (exceptions for scalar source and packed-word to 
packed-dword expansion)
mov(8)  g8<2>UD g5.4<4,4,1>UD   { align1 2N 
};
ERROR: The offset from the two source registers must be the same
mov(8)

Mesa (master): i965/fs: Take into account lower frequency of conditional blocks in spilling cost heuristic.

2017-04-11 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 147e71242ce539ff28e282f009c332818c35f5ac
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=147e71242ce539ff28e282f009c332818c35f5ac

Author: Francisco Jerez 
Date:   Sun Apr  9 17:28:58 2017 -0700

i965/fs: Take into account lower frequency of conditional blocks in spilling 
cost heuristic.

The individual branches of an if/else/endif construct will be executed
some unknown number of times between 0 and 1 relative to the parent
block.  Use some factor in between as weight while approximating the
cost of spill/fill instructions within a conditional if-else branch.
This favors spilling registers used within conditional branches which
are likely to be executed less frequently than registers used at the
top level.

Improves the framerate of the SynMark2 OglCSDof benchmark by ~1.9x on
my SKL GT4e.  Should have a comparable effect on other platforms.  No
significant regressions.

Reviewed-by: Jason Ekstrand 
Reviewed-by: Kenneth Graunke 
Reviewed-by: Matt Turner 

---

 src/intel/compiler/brw_fs_reg_allocate.cpp | 19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp 
b/src/intel/compiler/brw_fs_reg_allocate.cpp
index 5c6f3d490f..c981d72e4f 100644
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -806,7 +806,7 @@ emit_spill(const fs_builder &bld, fs_reg src,
 int
 fs_visitor::choose_spill_reg(struct ra_graph *g)
 {
-   float loop_scale = 1.0;
+   float block_scale = 1.0;
float spill_costs[this->alloc.count];
bool no_spill[this->alloc.count];
 
@@ -822,23 +822,32 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
foreach_block_and_inst(block, fs_inst, inst, cfg) {
   for (unsigned int i = 0; i < inst->sources; i++) {
 if (inst->src[i].file == VGRF)
-spill_costs[inst->src[i].nr] += loop_scale;
+spill_costs[inst->src[i].nr] += block_scale;
   }
 
   if (inst->dst.file == VGRF)
  spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, 
REG_SIZE)
-  * loop_scale;
+  * block_scale;
 
   switch (inst->opcode) {
 
   case BRW_OPCODE_DO:
-loop_scale *= 10;
+block_scale *= 10;
 break;
 
   case BRW_OPCODE_WHILE:
-loop_scale /= 10;
+block_scale /= 10;
 break;
 
+  case BRW_OPCODE_IF:
+  case BRW_OPCODE_IFF:
+ block_scale *= 0.5;
+ break;
+
+  case BRW_OPCODE_ENDIF:
+ block_scale /= 0.5;
+ break;
+
   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
 if (inst->src[0].file == VGRF)
 no_spill[inst->src[0].nr] = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): drirc: Set glsl_zero_init for Kerbal Space Program.

2017-04-04 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 0de17f52a515e655682b4b894c44ad9d7308794e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0de17f52a515e655682b4b894c44ad9d7308794e

Author: Francisco Jerez 
Date:   Tue Apr  4 14:12:59 2017 -0700

drirc: Set glsl_zero_init for Kerbal Space Program.

This fixes the stripes of garbage rendered on the floor of the vehicle
assembly building among other rendering issues.  The reason for the
misrendering seems to be that some of the GLSL shaders used by the
application use variables before initializing them, incorrectly
assuming that they will be implicitly set to zero by the
implementation.

Acked-by: Matt Turner 

---

 src/mesa/drivers/dri/common/drirc | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/common/drirc 
b/src/mesa/drivers/dri/common/drirc
index 23d09fabb1..14d7713fdc 100644
--- a/src/mesa/drivers/dri/common/drirc
+++ b/src/mesa/drivers/dri/common/drirc
@@ -128,5 +128,13 @@ TODO: document the other workarounds.
 
 
 
+
+
+
+
+
+
+
+
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallium/tgsi: Treat UCMP sources as floats to match the GLSL-to-TGSI pass expectations.

2017-03-15 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e6469ec43b25898e99766a30aa8f54cc64c3bc04
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e6469ec43b25898e99766a30aa8f54cc64c3bc04

Author: Francisco Jerez 
Date:   Mon Mar 13 17:31:39 2017 -0700

gallium/tgsi: Treat UCMP sources as floats to match the GLSL-to-TGSI pass 
expectations.

Currently the GLSL-to-TGSI translation pass assumes it can use
floating point source modifiers on the UCMP instruction.  See the bug
report linked below for an example where an unrelated change in the
GLSL built-in lowering code for atan2 (e9ffd12827ac11a2d2002a42fa8eb1)
caused the generation of floating-point ir_unop_neg instructions
followed by ir_triop_csel, which is translated into UCMP with a negate
modifier on back-ends with native integer support.

Allowing floating-point source modifiers on an integer instruction
seems like rather dubious design for a transport IR, since the same
semantics could be represented as a sequence of MOV+UCMP instructions
instead, but supposedly this matches the expectations of TGSI
back-ends other than tgsi_exec, and the expectations of the DX10 API.
I take no responsibility for future headaches caused by this
inconsistency.

Fixes a regression of piglit glsl-fs-tan-1 on softpipe introduced by
the above-mentioned glsl front-end commit.  Even though the commit
that triggered the regression doesn't seem to have made it to any
stable branches yet, this might be worth back-porting since I don't
see any reason why the bug couldn't have been reproduced before that
point.

Suggested-by: Roland Scheidegger 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99817
Reviewed-by: Roland Scheidegger 

---

 src/gallium/auxiliary/tgsi/tgsi_exec.c | 54 ++
 src/gallium/docs/source/tgsi.rst   |  8 +++--
 2 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 3c15306..48d91af 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3359,6 +3359,46 @@ exec_up2h(struct tgsi_exec_machine *mach,
 }
 
 static void
+micro_ucmp(union tgsi_exec_channel *dst,
+   const union tgsi_exec_channel *src0,
+   const union tgsi_exec_channel *src1,
+   const union tgsi_exec_channel *src2)
+{
+   dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
+   dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
+   dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
+   dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
+}
+
+static void
+exec_ucmp(struct tgsi_exec_machine *mach,
+  const struct tgsi_full_instruction *inst)
+{
+   unsigned int chan;
+   struct tgsi_exec_vector dst;
+
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+  if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ union tgsi_exec_channel src[3];
+
+ fetch_source(mach, &src[0], &inst->Src[0], chan,
+  TGSI_EXEC_DATA_UINT);
+ fetch_source(mach, &src[1], &inst->Src[1], chan,
+  TGSI_EXEC_DATA_FLOAT);
+ fetch_source(mach, &src[2], &inst->Src[2], chan,
+  TGSI_EXEC_DATA_FLOAT);
+ micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
+  }
+   }
+   for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
+  if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
+ store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan,
+TGSI_EXEC_DATA_FLOAT);
+  }
+   }
+}
+
+static void
 exec_scs(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
@@ -4997,18 +5037,6 @@ micro_uarl(union tgsi_exec_channel *dst,
dst->i[3] = src->u[3];
 }
 
-static void
-micro_ucmp(union tgsi_exec_channel *dst,
-   const union tgsi_exec_channel *src0,
-   const union tgsi_exec_channel *src1,
-   const union tgsi_exec_channel *src2)
-{
-   dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0];
-   dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1];
-   dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2];
-   dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3];
-}
-
 /**
  * Signed bitfield extract (i.e. sign-extend the extracted bits)
  */
@@ -5911,7 +5939,7 @@ exec_instruction(
   break;
 
case TGSI_OPCODE_UCMP:
-  exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, 
TGSI_EXEC_DATA_UINT);
+  exec_ucmp(mach, inst);
   break;
 
case TGSI_OPCODE_IABS:
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 18b42fb..9976875 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -28,9 +28,11 @@ Modifiers
 
 TGSI supports modifiers on inputs (as well as saturat

Mesa (master): nir/spirv/glsl450: Rewrite atan2 implementation to fix accuracy and handling of zero/ infinity.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 7215375c445f533e3962a09b8e3b075880c1382f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7215375c445f533e3962a09b8e3b075880c1382f

Author: Francisco Jerez 
Date:   Fri Jan 20 15:24:30 2017 -0800

nir/spirv/glsl450: Rewrite atan2 implementation to fix accuracy and handling of 
zero/infinity.

See "glsl: Rewrite atan2 implementation to fix accuracy and handling
of zero/infinity." for the rationale, but note that the instruction
count benefit discussed there is somewhat less important for the SPIRV
implementation, because the current code already emitted no control
flow instructions -- Still this saves us one hardware instruction per
scalar component on Intel SKL hardware.

Fixes the following Vulkan CTS tests on Intel hardware:

dEQP-VK.glsl.builtin.precision.atan2.highp_compute.scalar
dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec2
dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec3
dEQP-VK.glsl.builtin.precision.atan2.highp_compute.vec4
dEQP-VK.glsl.builtin.precision.atan2.mediump_compute.vec2
dEQP-VK.glsl.builtin.precision.atan2.mediump_compute.vec4

Note that most of the test-cases above expect IEEE-compliant handling
of atan2(±∞, ±∞), which this patch doesn't explicitly handle, so
except for the last two the test-cases above weren't expected to pass
yet.  The reason they do is that the i965 back-end implementation of
the NIR fmin and fmax instructions is not quite GLSL-compliant (it
complies with IEEE 754 recommendations though), because fmin/fmax of a
NaN and a non-NaN argument currently always return the non-NaN
argument, which causes atan() to flush NaN to one and return the
expected value.  The front-end should probably not be relying on this
behavior for correctness though because other back-ends are likely to
behave differently -- A follow-up patch will handle the atan2(±∞, ±∞)
corner cases explicitly.

v2: Fix up argument scaling to take into account the range and
precision of exotic FP24 hardware.  Flip coordinate system for
arguments along the vertical line as if they were on the left
half-plane in order to avoid division by zero which may give
unspecified results on non-GLSL 4.1-capable hardware.  Sprinkle in
some more comments.

Reviewed-by: Ian Romanick 

---

 src/compiler/spirv/vtn_glsl450.c | 77 
 1 file changed, 55 insertions(+), 22 deletions(-)

diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index 0d32fdd..8509f64 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -302,28 +302,61 @@ build_atan(nir_builder *b, nir_ssa_def *y_over_x)
 static nir_ssa_def *
 build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def *x)
 {
-   nir_ssa_def *zero = nir_imm_float(b, 0.0f);
-
-   /* If |x| >= 1.0e-8 * |y|: */
-   nir_ssa_def *condition =
-  nir_fge(b, nir_fabs(b, x),
-  nir_fmul(b, nir_imm_float(b, 1.0e-8f), nir_fabs(b, y)));
-
-   /* Then...call atan(y/x) and fix it up: */
-   nir_ssa_def *atan1 = build_atan(b, nir_fdiv(b, y, x));
-   nir_ssa_def *r_then =
-  nir_bcsel(b, nir_flt(b, x, zero),
-   nir_fadd(b, atan1,
-   nir_bcsel(b, nir_fge(b, y, zero),
-nir_imm_float(b, M_PIf),
-nir_imm_float(b, -M_PIf))),
-   atan1);
-
-   /* Else... */
-   nir_ssa_def *r_else =
-  nir_fmul(b, nir_fsign(b, y), nir_imm_float(b, M_PI_2f));
-
-   return nir_bcsel(b, condition, r_then, r_else);
+   nir_ssa_def *zero = nir_imm_float(b, 0);
+   nir_ssa_def *one = nir_imm_float(b, 1);
+
+   /* If we're on the left half-plane rotate the coordinates π/2 clock-wise
+* for the y=0 discontinuity to end up aligned with the vertical
+* discontinuity of atan(s/t) along t=0.  This also makes sure that we
+* don't attempt to divide by zero along the vertical line, which may give
+* unspecified results on non-GLSL 4.1-capable hardware.
+*/
+   nir_ssa_def *flip = nir_fge(b, zero, x);
+   nir_ssa_def *s = nir_bcsel(b, flip, nir_fabs(b, x), y);
+   nir_ssa_def *t = nir_bcsel(b, flip, y, nir_fabs(b, x));
+
+   /* If the magnitude of the denominator exceeds some huge value, scale down
+* the arguments in order to prevent the reciprocal operation from flushing
+* its result to zero, which would cause precision problems, and for s
+* infinite would cause us to return a NaN instead of the correct finite
+* value.
+*
+* If fmin and fmax are respectively the smallest and largest positive
+* normalized floating point values representable by the implementation,
+* the constants below should be in agreement with:
+*
+*huge <= 1 / fmin
+*scale <= 1 / fmin / fmax (for |t| >= huge)
+*
+* In addition scale should be a negative power of two in ord

Mesa (master): glsl: Fix constant evaluation of the rcp op.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6643a97de308bc100a497f18fed8819f6f6f570b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6643a97de308bc100a497f18fed8819f6f6f570b

Author: Francisco Jerez 
Date:   Tue Jan 24 11:41:46 2017 -0800

glsl: Fix constant evaluation of the rcp op.

Will avoid a regression in a future commit that introduces some
additional rcp operations.  According to the GLSL 4.10 specification:

"Dividing by 0 results in the appropriately signed IEEE Inf."

Reviewed-by: Ian Romanick 
Reviewed-by: Juan A. Suarez Romero 

---

 src/compiler/glsl/ir_expression_operation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_expression_operation.py 
b/src/compiler/glsl/ir_expression_operation.py
index f91ac9b..4ac1ffb 100644
--- a/src/compiler/glsl/ir_expression_operation.py
+++ b/src/compiler/glsl/ir_expression_operation.py
@@ -422,7 +422,7 @@ ir_expression_operation = [
operation("neg", 1, source_types=numeric_types, c_expression={'u': "-((int) 
{src0})", 'default': "-{src0}"}),
operation("abs", 1, source_types=signed_numeric_types, c_expression={'i': 
"{src0} < 0 ? -{src0} : {src0}", 'f': "fabsf({src0})", 'd': "fabs({src0})", 
'i64': "{src0} < 0 ? -{src0} : {src0}"}),
operation("sign", 1, source_types=signed_numeric_types, c_expression={'i': 
"({src0} > 0) - ({src0} < 0)", 'f': "float(({src0} > 0.0F) - ({src0} < 0.0F))", 
'd': "double(({src0} > 0.0) - ({src0} < 0.0))", 'i64': "({src0} > 0) - ({src0} 
< 0)"}),
-   operation("rcp", 1, source_types=real_types, c_expression={'f': "{src0} != 
0.0F ? 1.0F / {src0} : 0.0F", 'd': "{src0} != 0.0 ? 1.0 / {src0} : 0.0"}),
+   operation("rcp", 1, source_types=real_types, c_expression={'f': "1.0F / 
{src0}", 'd': "1.0 / {src0}"}),
operation("rsq", 1, source_types=real_types, c_expression={'f': "1.0F / 
sqrtf({src0})", 'd': "1.0 / sqrt({src0})"}),
operation("sqrt", 1, source_types=real_types, c_expression={'f': 
"sqrtf({src0})", 'd': "sqrt({src0})"}),
operation("exp", 1, source_types=(float_type,), 
c_expression="expf({src0})"), # Log base e on gentype

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Rewrite atan2 implementation to fix accuracy and handling of zero/ infinity.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e9ffd12827ac11a2d2002a42fa8eb1df847153ba
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e9ffd12827ac11a2d2002a42fa8eb1df847153ba

Author: Francisco Jerez 
Date:   Sat Jan 21 13:41:08 2017 -0800

glsl: Rewrite atan2 implementation to fix accuracy and handling of 
zero/infinity.

This addresses several issues of the current atan2 implementation:

 - Negative zero (and negative denorms which end up getting flushed to
   zero) isn't handled correctly by the current implementation.  The
   reason is that it does 'y >= 0' and 'x < 0' comparisons to decide
   on which side of the branch cut the argument is, which causes us to
   return incorrect results (off by up to 2π) for very small negative
   values.

 - There is a serious precision problem for x values of large enough
   magnitude introduced by the floating point division operation being
   implemented as a mul+rcp sequence.  This can lead to the quotient
   getting flushed to zero in some cases introducing an error of over
   8e6 ULP in the result -- Or in the most catastrophic case will
   cause us to return NaN instead of the correct value ±π/2 for y=±∞
   and x very large.  We can fix this easily by scaling down both
   arguments when the absolute value of the denominator goes above
   certain threshold.  The error of this atan2 implementation remains
   below 25 ULP in most of its domain except for a neighborhood of y=0
   where it reaches a maximum error of about 180 ULP.

 - It emits a bunch of instructions including no less than three
   if-else branches per scalar component that don't seem to get
   optimized out later on.  This implementation uses about 13% less
   instructions on Intel SKL hardware and doesn't emit any control
   flow instructions.

v2: Fix up argument scaling to take into account the range and
precision of exotic FP24 hardware.  Flip coordinate system for
arguments along the vertical line as if they were on the left
half-plane in order to avoid division by zero which may give
unspecified results on non-GLSL 4.1-capable hardware.  Sprinkle in
some more comments.

Reviewed-by: Ian Romanick 

---

 src/compiler/glsl/builtin_functions.cpp | 96 -
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 4a6c5af..432df65 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3560,44 +3560,68 @@ builtin_builder::_acos(const glsl_type *type)
 ir_function_signature *
 builtin_builder::_atan2(const glsl_type *type)
 {
-   ir_variable *vec_y = in_var(type, "vec_y");
-   ir_variable *vec_x = in_var(type, "vec_x");
-   MAKE_SIG(type, always_available, 2, vec_y, vec_x);
-
-   ir_variable *vec_result = body.make_temp(type, "vec_result");
-   ir_variable *r = body.make_temp(glsl_type::float_type, "r");
-   for (int i = 0; i < type->vector_elements; i++) {
-  ir_variable *y = body.make_temp(glsl_type::float_type, "y");
-  ir_variable *x = body.make_temp(glsl_type::float_type, "x");
-  body.emit(assign(y, swizzle(vec_y, i, 1)));
-  body.emit(assign(x, swizzle(vec_x, i, 1)));
-
-  /* If |x| >= 1.0e-8 * |y|: */
-  ir_if *outer_if =
- new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y;
-
-  ir_factory outer_then(&outer_if->then_instructions, mem_ctx);
-
-  /* Then...call atan(y/x) */
-  do_atan(outer_then, glsl_type::float_type, r, div(y, x));
-
-  /* ...and fix it up: */
-  ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f)));
-  inner_if->then_instructions.push_tail(
- if_tree(gequal(y, imm(0.0f)),
- assign(r, add(r, imm(M_PIf))),
- assign(r, sub(r, imm(M_PIf);
-  outer_then.emit(inner_if);
-
-  /* Else... */
-  outer_if->else_instructions.push_tail(
- assign(r, mul(sign(y), imm(M_PI_2f;
+   const unsigned n = type->vector_elements;
+   ir_variable *y = in_var(type, "y");
+   ir_variable *x = in_var(type, "x");
+   MAKE_SIG(type, always_available, 2, y, x);
 
-  body.emit(outer_if);
+   /* If we're on the left half-plane rotate the coordinates π/2 clock-wise
+* for the y=0 discontinuity to end up aligned with the vertical
+* discontinuity of atan(s/t) along t=0.  This also makes sure that we
+* don't attempt to divide by zero along the vertical line, which may give
+* unspecified results on non-GLSL 4.1-capable hardware.
+*/
+   ir_variable *flip = body.make_temp(glsl_type::bvec(n), "flip");
+   body.emit(assign(flip, gequal(imm(0.0f, n), x)));
+   ir_variable *s = body.make_temp(type, "s");
+   body.emit(assign(s, csel(flip, abs(x), y)));
+   ir_variable *t = bo

Mesa (master): i965/fs: Fix nir_op_fsign of absolute value.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 69042a5be4664c7928a21bd23e4f6795bfb19f60
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=69042a5be4664c7928a21bd23e4f6795bfb19f60

Author: Francisco Jerez 
Date:   Tue Jan 24 12:26:54 2017 -0800

i965/fs: Fix nir_op_fsign of absolute value.

This does point at the front-end emitting silly code that could have
been optimized out, but the current fsign implementation would emit
bogus IR if abs was set for the argument (because it would apply the
abs modifier on an unsigned integer type), and we shouldn't rely on
the upper layer's optimization passes for correctness.

Reviewed-by: Ian Romanick 

---

 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index e1ab598..e0c2fa0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -701,7 +701,14 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, 
nir_alu_instr *instr)
   break;
 
case nir_op_fsign: {
-  if (type_sz(op[0].type) < 8) {
+  if (op[0].abs) {
+ /* Straightforward since the source can be assumed to be
+  * non-negative.
+  */
+ set_condmod(BRW_CONDITIONAL_NZ, bld.MOV(result, op[0]));
+ set_predicate(BRW_PREDICATE_NORMAL, bld.MOV(result, brw_imm_f(1.0f)));
+
+  } else if (type_sz(op[0].type) < 8) {
  /* AND(val, 0x8000) gives the sign bit.
   *
   * Predicated OR ORs 1.0 (0x3f80) with the sign bit if val is not

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Implement IEEE-compliant handling of atan2( ±∞, ±∞).

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 013d40d1ceb1c23e8a95c8e4dbbb8cab581be919
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=013d40d1ceb1c23e8a95c8e4dbbb8cab581be919

Author: Francisco Jerez 
Date:   Tue Jan 24 13:43:07 2017 -0800

glsl: Implement IEEE-compliant handling of atan2(±∞, ±∞).

Reviewed-by: Ian Romanick 
Reviewed-by: Juan A. Suarez Romero 

---

 src/compiler/glsl/builtin_functions.cpp | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/builtin_functions.cpp 
b/src/compiler/glsl/builtin_functions.cpp
index 432df65..b8f9d8f 100644
--- a/src/compiler/glsl/builtin_functions.cpp
+++ b/src/compiler/glsl/builtin_functions.cpp
@@ -3604,11 +3604,31 @@ builtin_builder::_atan2(const glsl_type *type)
body.emit(assign(rcp_scaled_t, rcp(mul(t, scale;
ir_expression *s_over_t = mul(mul(s, scale), rcp_scaled_t);
 
+   /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily
+* that ∞/∞ = 1) in order to comply with the rather artificial rules
+* inherited from IEEE 754-2008, namely:
+*
+*  "atan2(±∞, −∞) is ±3π/4
+*   atan2(±∞, +∞) is ±π/4"
+*
+* Note that this is inconsistent with the rules for the neighborhood of
+* zero that are based on iterated limits:
+*
+*  "atan2(±0, −0) is ±π
+*   atan2(±0, +0) is ±0"
+*
+* but GLSL specifically allows implementations to deviate from IEEE rules
+* at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as
+* well).
+*/
+   ir_expression *tan = csel(equal(abs(x), abs(y)),
+ imm(1.0f, n), abs(s_over_t));
+
/* Calculate the arctangent and fix up the result if we had flipped the
 * coordinate system.
 */
ir_variable *arc = body.make_temp(type, "arc");
-   do_atan(body, type, arc, abs(s_over_t));
+   do_atan(body, type, arc, tan);
body.emit(assign(arc, add(arc, mul(b2f(flip), imm(M_PI_2f);
 
/* Rather convoluted calculation of the sign of the result.  When x < 0 we

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): nir/spirv/glsl450: Implement IEEE-compliant handling of atan2(±∞, ±∞).

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 11e9ebbf15ecf49d7ef02c2ec6c2d9d3ff0f1b6e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=11e9ebbf15ecf49d7ef02c2ec6c2d9d3ff0f1b6e

Author: Francisco Jerez 
Date:   Mon Jan 23 23:36:46 2017 -0800

nir/spirv/glsl450: Implement IEEE-compliant handling of atan2(±∞, ±∞).

Reviewed-by: Ian Romanick 
Reviewed-by: Juan A. Suarez Romero 

---

 src/compiler/spirv/vtn_glsl450.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index 8509f64..dd38cc9 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -339,12 +339,32 @@ build_atan2(nir_builder *b, nir_ssa_def *y, nir_ssa_def 
*x)
nir_ssa_def *rcp_scaled_t = nir_frcp(b, nir_fmul(b, t, scale));
nir_ssa_def *s_over_t = nir_fmul(b, nir_fmul(b, s, scale), rcp_scaled_t);
 
+   /* For |x| = |y| assume tan = 1 even if infinite (i.e. pretend momentarily
+* that ∞/∞ = 1) in order to comply with the rather artificial rules
+* inherited from IEEE 754-2008, namely:
+*
+*  "atan2(±∞, −∞) is ±3π/4
+*   atan2(±∞, +∞) is ±π/4"
+*
+* Note that this is inconsistent with the rules for the neighborhood of
+* zero that are based on iterated limits:
+*
+*  "atan2(±0, −0) is ±π
+*   atan2(±0, +0) is ±0"
+*
+* but GLSL specifically allows implementations to deviate from IEEE rules
+* at (0,0), so we take that license (i.e. pretend that 0/0 = 1 here as
+* well).
+*/
+   nir_ssa_def *tan = nir_bcsel(b, nir_feq(b, nir_fabs(b, x), nir_fabs(b, y)),
+one, nir_fabs(b, s_over_t));
+
/* Calculate the arctangent and fix up the result if we had flipped the
 * coordinate system.
 */
nir_ssa_def *arc = nir_fadd(b, nir_fmul(b, nir_b2f(b, flip),
nir_imm_float(b, M_PI_2f)),
-   build_atan(b, nir_fabs(b, s_over_t)));
+   build_atan(b, tan));
 
/* Rather convoluted calculation of the sign of the result.  When x < 0 we
 * cannot use fsign because we need to be able to distinguish between

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa/program: Translate csel operation from GLSL IR.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e81130d7a146fe6a750bf903e910dc2c7c90d513
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e81130d7a146fe6a750bf903e910dc2c7c90d513

Author: Francisco Jerez 
Date:   Mon Jan 23 23:53:03 2017 -0800

mesa/program: Translate csel operation from GLSL IR.

This will be used internally by the GLSL front-end in order to
implement some built-in functions. Plumb it through MESA IR for
back-ends that rely on this translation pass.

v2: Add comment.

Reviewed-by: Ian Romanick 
Reviewed-by: Juan A. Suarez Romero 

---

 src/mesa/program/ir_to_mesa.cpp | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp
index 0ae797f..dc5f801 100644
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -1360,13 +1360,20 @@ ir_to_mesa_visitor::visit(ir_expression *ir)
   emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
   break;
 
+   case ir_triop_csel:
+  /* We assume that boolean true and false are 1.0 and 0.0.  OPCODE_CMP
+   * selects src1 if src0 is < 0, src2 otherwise.
+   */
+  op[0].negate = ~op[0].negate;
+  emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]);
+  break;
+
case ir_binop_vector_extract:
case ir_triop_fma:
case ir_triop_bitfield_extract:
case ir_triop_vector_insert:
case ir_quadop_bitfield_insert:
case ir_binop_ldexp:
-   case ir_triop_csel:
case ir_binop_carry:
case ir_binop_borrow:
case ir_binop_imul_high:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl/ir_builder: Add rcp builder.

2017-01-31 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 7ec3af3f8ff6584542f029c28abc2bcae1402259
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7ec3af3f8ff6584542f029c28abc2bcae1402259

Author: Francisco Jerez 
Date:   Mon Jan 23 23:59:45 2017 -0800

glsl/ir_builder: Add rcp builder.

Reviewed-by: Ian Romanick 
Reviewed-by: Juan A. Suarez Romero 

---

 src/compiler/glsl/ir_builder.cpp | 6 ++
 src/compiler/glsl/ir_builder.h   | 1 +
 2 files changed, 7 insertions(+)

diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp
index 0cee856..8d61533 100644
--- a/src/compiler/glsl/ir_builder.cpp
+++ b/src/compiler/glsl/ir_builder.cpp
@@ -315,6 +315,12 @@ exp(operand a)
 }
 
 ir_expression *
+rcp(operand a)
+{
+   return expr(ir_unop_rcp, a);
+}
+
+ir_expression *
 rsq(operand a)
 {
return expr(ir_unop_rsq, a);
diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h
index 5ee9412..ff1ff70 100644
--- a/src/compiler/glsl/ir_builder.h
+++ b/src/compiler/glsl/ir_builder.h
@@ -148,6 +148,7 @@ ir_expression *neg(operand a);
 ir_expression *sin(operand a);
 ir_expression *cos(operand a);
 ir_expression *exp(operand a);
+ir_expression *rcp(operand a);
 ir_expression *rsq(operand a);
 ir_expression *sqrt(operand a);
 ir_expression *log(operand a);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Check for executables before enqueueing a kernel

2017-01-11 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 4e0d171d7eb6accbf8f381530eedbc9ff86b54fb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4e0d171d7eb6accbf8f381530eedbc9ff86b54fb

Author: Pierre Moreau 
Date:   Fri Dec 30 00:29:20 2016 +0100

clover: Check for executables before enqueueing a kernel

Without this check, the kernel::bind() method would fail with a
std::out_of_range exception, letting an exception escape from the
library into the client, rather than returning the corresponding error
code CL_INVALID_PROGRAM_EXECUTABLE.

Signed-off-by: Pierre Moreau 
Reviewed-by: Francisco Jerez 

---

 src/gallium/state_trackers/clover/api/kernel.cpp | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/kernel.cpp 
b/src/gallium/state_trackers/clover/api/kernel.cpp
index 73ba34a..b665773 100644
--- a/src/gallium/state_trackers/clover/api/kernel.cpp
+++ b/src/gallium/state_trackers/clover/api/kernel.cpp
@@ -215,7 +215,10 @@ namespace {
 }, kern.args()))
  throw error(CL_INVALID_KERNEL_ARGS);
 
-  if (!count(q.device(), kern.program().devices()))
+  // If the command queue's device is not associated to the program, we get
+  // a module, with no sections, which will also fail the following test.
+  auto &m = kern.program().build(q.device()).binary;
+  if (!any_of(type_equals(module::section::text_executable), m.secs))
  throw error(CL_INVALID_PROGRAM_EXECUTABLE);
}
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Use Clang's diagnostics

2016-12-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: d9fef848a651b47520cbeb72c38b93d4fbf842a8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d9fef848a651b47520cbeb72c38b93d4fbf842a8

Author: Vedran Miletić 
Date:   Wed Dec 21 13:49:36 2016 +0100

clover: Use Clang's diagnostics

Presently errors from frontend are handled only if they occur in
clang::CompilerInvocation::CreateFromArgs(). This patch uses
clang::DiagnosticsEngine to detect errors such as invalid values for
Clang frontend arguments.

Fixes Piglit's cl/program/build/fail/invalid-version-declaration.cl
test.

v2: fix inconsistent code formatting

Signed-off-by: Vedran Miletić 
Reviewed-by: Francisco Jerez 
Tested-by: Aaron Watry 

---

 src/gallium/state_trackers/clover/llvm/invocation.cpp | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 675cf19..f63ff3d 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -98,8 +98,9 @@ namespace {
 const std::vector &opts,
 std::string &r_log) {
   std::unique_ptr c { new clang::CompilerInstance 
};
+  clang::TextDiagnosticBuffer *diag_buffer = new 
clang::TextDiagnosticBuffer;
   clang::DiagnosticsEngine diag { new clang::DiagnosticIDs,
-new clang::DiagnosticOptions, new clang::TextDiagnosticBuffer };
+new clang::DiagnosticOptions, diag_buffer };
 
   // Parse the compiler options.  A file name should be present at the end
   // and must have the .cl extension in order for the CompilerInvocation
@@ -111,6 +112,10 @@ namespace {
  c->getInvocation(), copts.data(), copts.data() + copts.size(), 
diag))
  throw invalid_build_options_error();
 
+  diag_buffer->FlushDiagnostics(diag);
+  if (diag.hasErrorOccurred())
+ throw invalid_build_options_error();
+
   c->getTargetOpts().CPU = target.cpu;
   c->getTargetOpts().Triple = target.triple;
   c->getLangOpts().NoBuiltin = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): anv: Fix uniform and storage buffer offset alignment limits.

2016-12-16 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 79d08ed3d21bef21881303f320706ebb2098a50a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d08ed3d21bef21881303f320706ebb2098a50a

Author: Francisco Jerez 
Date:   Thu Dec 15 13:34:02 2016 -0800

anv: Fix uniform and storage buffer offset alignment limits.

This fixes a regression in a bunch of image store vulkan CTS tests
from commit ad38ba113491869ab0dffed937f7b3dd50e8a735, which started
using OWORD block read messages to implement UBO loads.  The reason
for the failure is that we were giving bogus buffer alignment limits
to the application (1B), so the CTS would happily come back with
descriptor sets pointing at not even word-aligned uniform buffer
addresses.

Surprisingly the sampler messages used to fetch pull constants before
that commit were able to cope with the non-texel aligned addresses,
but the dataport messages used to fetch pull constants after that
commit and the ones used to access storage buffers (before and after
the same commit) aren't as permissive with unaligned addresses.

Cc: 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99097
Reported-by: Mark Janes 
Reviewed-by: Jason Ekstrand 

---

 src/intel/vulkan/anv_device.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index e3d278d..9245e5c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -582,8 +582,8 @@ void anv_GetPhysicalDeviceProperties(
   .viewportSubPixelBits = 13, /* We take a float? */
   .minMemoryMapAlignment= 4096, /* A page */
   .minTexelBufferOffsetAlignment= 1,
-  .minUniformBufferOffsetAlignment  = 1,
-  .minStorageBufferOffsetAlignment  = 1,
+  .minUniformBufferOffsetAlignment  = 16,
+  .minStorageBufferOffsetAlignment  = 4,
   .minTexelOffset   = -8,
   .maxTexelOffset   = 7,
   .minTexelGatherOffset = -32,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 23caf75182d010a60e2d8c8633acaacb3e7c065d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=23caf75182d010a60e2d8c8633acaacb3e7c065d

Author: Francisco Jerez 
Date:   Wed Apr 22 21:37:46 2015 +0300

i965/fs: Remove the FS_OPCODE_SET_SIMD4X2_OFFSET virtual opcode.

Not used anymore.  It was just a scalar MOV.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_defines.h|  1 -
 src/mesa/drivers/dri/i965/brw_fs.h |  3 ---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 --
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  2 --
 4 files changed, 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 1875380..a07d307 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1119,7 +1119,6 @@ enum opcode {
FS_OPCODE_MOV_DISPATCH_TO_FLAGS,
FS_OPCODE_DISCARD_JUMP,
FS_OPCODE_SET_SAMPLE_ID,
-   FS_OPCODE_SET_SIMD4X2_OFFSET,
FS_OPCODE_PACK_HALF_2x16_SPLIT,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X,
FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 941c05f..d0e272b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -442,9 +442,6 @@ private:
struct brw_reg src0,
struct brw_reg src1);
 
-   void generate_set_simd4x2_offset(fs_inst *inst,
-struct brw_reg dst,
-struct brw_reg offset);
void generate_discard_jump(fs_inst *inst);
 
void generate_pack_half_2x16_split(fs_inst *inst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index db61d8e..aed3c72 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1379,29 +1379,6 @@ fs_generator::generate_pixel_interpolator_query(fs_inst 
*inst,
  inst->size_written / REG_SIZE);
 }
 
-
-/**
- * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant
- * sampler LD messages.
- *
- * We don't want to bake it into the send message's code generation because
- * that means we don't get a chance to schedule the instructions.
- */
-void
-fs_generator::generate_set_simd4x2_offset(fs_inst *inst,
-  struct brw_reg dst,
-  struct brw_reg value)
-{
-   assert(value.file == BRW_IMMEDIATE_VALUE);
-
-   brw_push_insn_state(p);
-   brw_set_default_exec_size(p, BRW_EXECUTE_8);
-   brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
-   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-   brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value);
-   brw_pop_insn_state(p);
-}
-
 /* Sets vstride=1, width=4, hstride=0 of register src1 during
  * the ADD instruction.
  */
@@ -2004,10 +1981,6 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
  brw_memory_fence(p, dst);
  break;
 
-  case FS_OPCODE_SET_SIMD4X2_OFFSET:
- generate_set_simd4x2_offset(inst, dst, src[0]);
- break;
-
   case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
  const struct brw_reg mask =
 brw_stage_has_packed_dispatch(devinfo, stage,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 25f745d..afab4aa 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -363,8 +363,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, 
enum opcode op)
 
case FS_OPCODE_SET_SAMPLE_ID:
   return "set_sample_id";
-   case FS_OPCODE_SET_SIMD4X2_OFFSET:
-  return "set_simd4x2_offset";
 
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
   return "pack_half_2x16_split";

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/fs: Expose arbitrary pull constant load sizes to the IR.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 9b22a0d295316b7547667ebbfe1e1b6182439186
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9b22a0d295316b7547667ebbfe1e1b6182439186

Author: Francisco Jerez 
Date:   Thu Dec  8 20:05:18 2016 -0800

i965/fs: Expose arbitrary pull constant load sizes to the IR.

Change the FS generator to ask the dataport for enough owords worth of
constants to fill the execution size of the instruction -- Which means
that the visitor now needs to set the execution size correctly for
uniform pull constant load instructions, which we were kind of
neglecting until now.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_eu_emit.c| 15 +++---
 src/mesa/drivers/dri/i965/brw_fs.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 27 --
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp   |  9 +
 4 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 6141bfb..8536a13 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2256,7 +2256,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
 }
 
 /**
- * Read a float[4] vector from the data port constant cache.
+ * Read float[4] vectors from the data port constant cache.
  * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
  */
@@ -2270,6 +2270,7 @@ void brw_oword_block_read(struct brw_codegen *p,
const unsigned target_cache =
   (devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
BRW_DATAPORT_READ_TARGET_DATA_CACHE);
+   const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
 
/* On newer hardware, offset is in units of owords. */
if (devinfo->gen >= 6)
@@ -2278,11 +2279,12 @@ void brw_oword_block_read(struct brw_codegen *p,
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
brw_push_insn_state(p);
-   brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
 
+   brw_push_insn_state(p);
+   brw_set_default_exec_size(p, BRW_EXECUTE_8);
brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
 
/* set message header global offset field (reg 0, element 2) */
@@ -2291,6 +2293,7 @@ void brw_oword_block_read(struct brw_codegen *p,
   mrf.nr,
   2), BRW_REGISTER_TYPE_UD),
   brw_imm_ud(offset));
+   brw_pop_insn_state(p);
 
brw_inst *insn = next_insn(p, BRW_OPCODE_SEND);
 
@@ -2305,15 +2308,13 @@ void brw_oword_block_read(struct brw_codegen *p,
   brw_inst_set_base_mrf(devinfo, insn, mrf.nr);
}
 
-   brw_set_dp_read_message(p,
-  insn,
-  bind_table_index,
-  BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
+   brw_set_dp_read_message(p, insn, bind_table_index,
+   BRW_DATAPORT_OWORD_BLOCK_DWORDS(exec_size),
   BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
   target_cache,
   1, /* msg_length */
true, /* header_present */
-  1); /* response_length (1 reg, 2 owords!) */
+  DIV_ROUND_UP(exec_size, 8)); /* response_length */
 
brw_pop_insn_state(p);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b22dc9a..977fd8c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2121,7 +2121,7 @@ fs_visitor::lower_constant_loads()
 
  assert(inst->src[i].stride == 0);
 
- const fs_builder ubld = ibld.exec_all().group(8, 0);
+ const fs_builder ubld = ibld.exec_all().group(4, 0);
  struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
  ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
dst, brw_imm_ud(index), offset);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8b9fa8e..93f4c41 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1127,6 +1127,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst 
*inst,
   struct brw_reg index,
   struct brw_reg offset)
 {
+   assert(type_sz(dst.type) == 4);
assert(inst->mlen != 0);
 
assert(index.file == BRW_IMMEDIATE_VALUE &&
@@ -1149,27 +1150,25 @@ 
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 {
assert(index.type == BRW_REGISTER_TYPE_UD);
assert(pa

Mesa (master): i965/fs: Switch to the constant cache for uniform pull constants.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: ad38ba113491869ab0dffed937f7b3dd50e8a735
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ad38ba113491869ab0dffed937f7b3dd50e8a735

Author: Francisco Jerez 
Date:   Wed Oct 26 14:25:06 2016 -0700

i965/fs: Switch to the constant cache for uniform pull constants.

This reverts to using the oword block read messages for uniform pull
constant loads, as used to be the case until
4c1fdae0a01b3f92ec03b61aac1d3df5.  There are two important differences
though: Now the L3 cacheability bits are set up correctly for UBOs
(since 11f5d8a5d4fbb861ec161f68593e429cbd65d1cd), and we target the
constant cache instead of the data cache.  The latter used to get no
L3 way allocation on boot on all platforms that existed at the time,
so oword read messages wouldn't get cached on L3 regardless of the
MOCS bits, what probably explains the apparent slowness of oword
fetches.

Constant cache loads seem to perform better than SIMD4x2 sampler loads
in a number of cases, they alleviate some of the cache thrashing
caused by the competition with textures for the L1/L2 sampler caches,
and they allow fetching up to 128B worth of constants with a single
oword fetch message.

Note that IVB devices suffer from a hardware bug that leads to
serialization of L3 read requests overlapping the same cacheline as
result of a (on IVB buggy) mechanism of the L3 to preserve coherency.
Since read requests for matching cachelines from any L3 client are not
pipelined, throughput may decrease in cases where there are no
non-overlapping requests left in the queue that can be processed
between them.

This situation should be relatively uncommon as long as we make sure
that we don't use the 1/2 oword messages in cases where the shader
intends to read from any other location of the same cacheline at some
other point.  This is generally a good idea anyway on all generations
because using the 1 and 2 oword messages is expected to waste
bandwidth since the minimum L3 request size for the DC is exactly 4
owords (i.e. one cacheline).  A future commit will have this effect.
I haven't been able to find any real-world example where this would
still result in a regression on IVB, but if someone happens to find
one it shouldn't be too difficult to add an IVB-specific check to have
it fall back to the sampler cache for pull constant loads.

Note that on SKL+ this change has the additional benefit of reducing
the register footprint of pull constant loads.  The following table
summarizes the effect of the whole series on several shader-db stats:

 Total instructions  Total cycles
BWR: 4571248 -> 4568342 (-0.06%) 123375740 -> 123373296 (-0.00%)
ELK: 3989020 -> 3985402 (-0.09%)  98757068 -> 98754058 (-0.00%)
ILK: 6383591 -> 6376787 (-0.11%) 143649910 -> 143648914 (-0.00%)
SNB: 7528395 -> 7501446 (-0.36%) 103503796 -> 102460370 (-1.01%)
IVB: 6949221 -> 6943317 (-0.08%)  60592262 -> 60584422 (-0.01%)
HSW: 6409753 -> 6403702 (-0.09%)  60609070 -> 60604414 (-0.01%)
BDW: 8043467 -> 7976364 (-0.83%)  68427730 -> 68483042 (0.08%)
CHV: 8045019 -> 7977916 (-0.83%)  68297426 -> 68352756 (0.08%)
SKL: 8204037 -> 7939086 (-3.23%)  66583900 -> 65624378 (-1.44%)

 Lost->Gained Total spills  Total fills
BWR:  5 ->   51488 -> 1488 (0.00%)  1957 -> 1957 (0.00%)
ELK:  5 ->   51489 -> 1489 (0.00%)  1958 -> 1958 (0.00%)
ILK:  1 ->   41449 -> 1449 (0.00%)  1921 -> 1921 (0.00%)
SNB:  0 ->   0 549 -> 549 (0.00%) 52 -> 52 (0.00%)
IVB: 13 ->   31271 -> 1271 (0.00%)  1162 -> 1162 (0.00%)
HSW: 11 ->   01271 -> 1271 (0.00%)  1162 -> 1162 (0.00%)
BDW: 12 ->   01340 -> 1340 (0.00%)  1452 -> 1452 (0.00%)
CHV: 12 ->   01340 -> 1340 (0.00%)  1452 -> 1452 (0.00%)
SKL:  0 -> 1201269 -> 375 (-70.45%) 1563 -> 690 (-55.85%)

v3: Non-trivial rebase.
Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_eu_emit.c|  5 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 42 +++---
 src/mesa/drivers/dri/i965/brw_fs.h |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 78 +-
 4 files changed, 36 insertions(+), 91 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 72b6df6..341f543 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2266,7 +2266,7 @@ gen7_block_read_scratch(struct brw_codegen *p,
 }
 
 /**
- * Read a float[4] vector from the data port Data Cache (const buffer).
+ * Read a float[4] vector from the data port constant cache.
  * Location (in buffer) should be a multiple of 16.
  * Used for fetching shader constants.
  */
@@ -2278,8 +2278,7 @@ void brw_oword_block_read(struct brw_codegen *p,
 {
const struct gen_device_info *devinfo = p->de

Mesa (master): i965/fs: Drop useless access mode override from pull constant generator code.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e014058195540a3e54085903821beca70f8f2ec5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e014058195540a3e54085903821beca70f8f2ec5

Author: Francisco Jerez 
Date:   Thu Dec  8 19:08:33 2016 -0800

i965/fs: Drop useless access mode override from pull constant generator code.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 93f4c41..db61d8e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1175,7 +1175,6 @@ 
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
 
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-  brw_set_default_access_mode(p, BRW_ALIGN_1);
 
   /* a0.0 = surf_index & 0xff */
   brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
@@ -1311,7 +1310,6 @@ 
fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
 
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
-  brw_set_default_access_mode(p, BRW_ALIGN_1);
 
   /* a0.0 = surf_index & 0xff */
   brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/disasm: Decode dataport constant cache control fields.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: fd3120d85c295eeeb3b6c9a60372506ae48f5fdb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd3120d85c295eeeb3b6c9a60372506ae48f5fdb

Author: Francisco Jerez 
Date:   Thu Dec  8 22:14:59 2016 -0800

i965/disasm: Decode dataport constant cache control fields.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_disasm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index 5e51be7..5930e44 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -1410,6 +1410,7 @@ brw_disassemble_inst(FILE *file, const struct 
gen_device_info *devinfo,
 }
 break;
  case GEN6_SFID_DATAPORT_SAMPLER_CACHE:
+ case GEN6_SFID_DATAPORT_CONSTANT_CACHE:
 /* aka BRW_SFID_DATAPORT_READ on Gen4-5 */
 if (devinfo->gen >= 6) {
format(file, " (%"PRIu64", %"PRIu64", %"PRIu64", %"PRIu64")",

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/gen6+: Invalidate constant cache on brw_emit_mi_flush( ).

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 591e14ec08b13e8d50636feb1afa578257175b9d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=591e14ec08b13e8d50636feb1afa578257175b9d

Author: Francisco Jerez 
Date:   Thu Dec  8 18:00:17 2016 -0800

i965/gen6+: Invalidate constant cache on brw_emit_mi_flush().

In order to make sure that the constant cache is coherent with
previous rendering when we start using it for pull constant loads.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_pipe_control.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index dd426bf..b8f7406 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -351,6 +351,7 @@ brw_emit_mi_flush(struct brw_context *brw)
   int flags = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_RENDER_TARGET_FLUSH;
   if (brw->gen >= 6) {
  flags |= PIPE_CONTROL_INSTRUCTION_INVALIDATE |
+  PIPE_CONTROL_CONST_CACHE_INVALIDATE |
   PIPE_CONTROL_DEPTH_CACHE_FLUSH |
   PIPE_CONTROL_VF_CACHE_INVALIDATE |
   PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965: Let the caller of brw_set_dp_write/ read_message control the target cache.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 3c78d31374422b028b19afa5799689c404a5b73e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c78d31374422b028b19afa5799689c404a5b73e

Author: Francisco Jerez 
Date:   Thu Apr 23 14:36:16 2015 +0300

i965: Let the caller of brw_set_dp_write/read_message control the target cache.

brw_set_dp_read_message already had a target_cache argument, but its
interpretation was rather convoluted (on Gen6 the render cache was
used if the caller asked for it, otherwise it was ignored using the
sampler cache instead), and the constant cache wasn't representable at
all.  brw_set_dp_write_message used the data cache on Gen7+ except for
RENDER_TARGET_WRITE messages, in which case it would use the render
cache.  On Gen6 the render cache was always used.

Instead of the above, provide the shared unit SFID that the caller
expects will be used.  Makes no functional changes.

v3: Non-trivial rebase.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_eu.h   |  1 +
 src/mesa/drivers/dri/i965/brw_eu_emit.c  | 69 +++-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 15 --
 3 files changed, 43 insertions(+), 42 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 737a335..c44896b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -233,6 +233,7 @@ void brw_set_dp_write_message(struct brw_codegen *p,
  unsigned binding_table_index,
  unsigned msg_control,
  unsigned msg_type,
+  unsigned target_cache,
  unsigned msg_length,
  bool header_present,
  unsigned last_render_target,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index ca04221..72b6df6 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -706,6 +706,7 @@ brw_set_dp_write_message(struct brw_codegen *p,
 unsigned binding_table_index,
 unsigned msg_control,
 unsigned msg_type,
+ unsigned target_cache,
 unsigned msg_length,
 bool header_present,
 unsigned last_render_target,
@@ -714,20 +715,8 @@ brw_set_dp_write_message(struct brw_codegen *p,
 unsigned send_commit_msg)
 {
const struct gen_device_info *devinfo = p->devinfo;
-   unsigned sfid;
-
-   if (devinfo->gen >= 7) {
-  /* Use the Render Cache for RT writes; otherwise use the Data Cache */
-  if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
-sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-  else
-sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-   } else if (devinfo->gen == 6) {
-  /* Use the render cache for all write messages. */
-  sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-   } else {
-  sfid = BRW_SFID_DATAPORT_WRITE;
-   }
+   const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
+  BRW_SFID_DATAPORT_WRITE);
 
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
  header_present, end_of_thread);
@@ -753,26 +742,8 @@ brw_set_dp_read_message(struct brw_codegen *p,
unsigned response_length)
 {
const struct gen_device_info *devinfo = p->devinfo;
-   unsigned sfid;
-
-   if (devinfo->gen >= 7) {
-  if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
- sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-  else if (target_cache == BRW_DATAPORT_READ_TARGET_DATA_CACHE)
- sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
-  else if (target_cache == BRW_DATAPORT_READ_TARGET_SAMPLER_CACHE)
- sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
-  else
- unreachable("Invalid target cache");
-
-   } else if (devinfo->gen == 6) {
-  if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
-sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
-  else
-sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
-   } else {
-  sfid = BRW_SFID_DATAPORT_READ;
-   }
+   const unsigned sfid = (devinfo->gen >= 6 ? target_cache :
+  BRW_SFID_DATAPORT_READ);
 
brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
  header_present, false);
@@ -2073,6 +2044,10 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
   unsigned offset)
 {
const struct gen_device_info *devinfo = p->devinfo;
+   const unsigned target_cache =
+  (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
+   devinfo->gen >= 6 ? GEN6_SFID_DAT

Mesa (master): i965/fs: Fetch one cacheline of pull constants at a time.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: b56fa830c6095f8226456b2aeb62f2dfad804be5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b56fa830c6095f8226456b2aeb62f2dfad804be5

Author: Francisco Jerez 
Date:   Thu Dec  8 19:18:00 2016 -0800

i965/fs: Fetch one cacheline of pull constants at a time.

Asking the DC for less than one cacheline (4 owords) of data for
uniform pull constants is suboptimal because the DC cannot request
less than that from L3, resulting in wasted bandwidth and unnecessary
message dispatch overhead, and exacerbating the IVB L3 serialization
bug.  The following table summarizes the overall framerate improvement
(with statistical significance of 5% and sample size ~10) from the
whole series up to this patch for several benchmarks and hardware
generations:

 | SKL   | BDW  | HSW
SynMark2 OglShMapPcf | 24.63% ±0.45% | 4.01% ±0.70% | 10.31% ±0.38%
GfxBench4 gl_manhattan31 |  5.93% ±0.35% | 3.92% ±0.31% |  6.62% ±0.22%
GfxBench4 gl_4   |  2.52% ±0.44% | 1.23% ±0.10% |  N/A
Unigine Valley   |  0.83% ±0.17% | 0.23% ±0.05% |  0.74% ±0.45%

Note that there are two versions of the Manhattan demo shipped with
GfxBench4, one of them is the original gl_manhattan demo which doesn't
use UBOs, so this patch will have no effect on it, and another one is
the gl_manhattan31 demo based on GL 4.3/GLES 3.1, which this patch
benefits as shown above.

I haven't observed any statistically significant regressions in the
benchmarks I have at hand.  Note that the comparatively huge
improvement on SKL in the OglShMapPcf test case is due to the combined
effect of this patch and the register pressure benefit on SKL+ of
"i965/fs: Switch to the constant cache for uniform pull constants.",
part of the same series.

Going up to 8 oword blocks would improve performance of pull constants
even more, but at the cost of some additional bandwidth and register
pressure, so it would have to be done on-demand based on the number of
constants actually used by the shader.

v2: Fix for Gen4 and 5.
v3: Non-trivial rebase.  Rework to allow the visitor specifiy
arbitrary pull constant block sizes.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_fs.cpp | 21 +
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 16 +---
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 977fd8c..671b44b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2111,25 +2111,22 @@ fs_visitor::lower_constant_loads()
  if (pull_index == -1)
continue;
 
- const unsigned index = 
stage_prog_data->binding_table.pull_constants_start;
- fs_reg dst;
-
- if (type_sz(inst->src[i].type) <= 4)
-dst = vgrf(glsl_type::float_type);
- else
-dst = vgrf(glsl_type::double_type);
-
  assert(inst->src[i].stride == 0);
 
- const fs_builder ubld = ibld.exec_all().group(4, 0);
- struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+ const unsigned index = 
stage_prog_data->binding_table.pull_constants_start;
+ const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+ const fs_builder ubld = ibld.exec_all().group(block_sz / 4, 0);
+ const fs_reg dst = ubld.vgrf(BRW_REGISTER_TYPE_UD);
+ const unsigned base = pull_index * 4;
+
  ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
-   dst, brw_imm_ud(index), offset);
+   dst, brw_imm_ud(index), brw_imm_ud(base & ~(block_sz - 1)));
 
  /* Rewrite the instruction to use the temporary VGRF. */
  inst->src[i].file = VGRF;
  inst->src[i].nr = dst.nr;
- inst->src[i].offset = (pull_index & 3) * 4 + inst->src[i].offset % 4;
+ inst->src[i].offset = (base & (block_sz - 1)) +
+   inst->src[i].offset % 4;
 
  brw_mark_surface_used(prog_data, index);
   }
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 7df7423..9f2729a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -4059,21 +4059,23 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, 
nir_intrinsic_instr *instr
   * and we have to split it if necessary.
   */
  const unsigned type_size = type_sz(dest.type);
- const fs_builder ubld = bld.exec_all().group(4, 0);
- const fs_reg packed_consts = ubld.vgrf(BRW_REGISTER_TYPE_F);
+ const unsigned block_sz = 64; /* Fetch one cacheline at a time. */
+ const fs_builder ubld = bld.exec_all().group(block_sz / 4, 0);
+ const fs_reg packed_consts = ubld.vgrf(BRW_REGI

Mesa (master): i965: Factor out oword block read and write message control calculation.

2016-12-14 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 7a6aadb76ff3f6ef73216b53b0dc5edda5bae978
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7a6aadb76ff3f6ef73216b53b0dc5edda5bae978

Author: Francisco Jerez 
Date:   Thu Dec  8 19:58:25 2016 -0800

i965: Factor out oword block read and write message control calculation.

We'll need roughly the same logic in other places and it would be
annoying to duplicate it.  Instead factor it out into a function-like
macro that takes the number of dwords per block (which will prove more
convenient than taking the same value in owords or some other unit).

Reviewed-by: Kenneth Graunke 

---

 src/mesa/drivers/dri/i965/brw_defines.h |  6 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c | 14 ++
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index b1b6248..1875380 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1669,6 +1669,12 @@ enum brw_message_target {
 #define BRW_DATAPORT_OWORD_BLOCK_2_OWORDS 2
 #define BRW_DATAPORT_OWORD_BLOCK_4_OWORDS 3
 #define BRW_DATAPORT_OWORD_BLOCK_8_OWORDS 4
+#define BRW_DATAPORT_OWORD_BLOCK_DWORDS(n)  \
+   ((n) == 4 ? BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW :\
+(n) == 8 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS :  \
+(n) == 16 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS : \
+(n) == 32 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : \
+(abort(), ~0))
 
 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0
 #define BRW_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS2
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 341f543..6141bfb 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2056,11 +2056,6 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
 
const unsigned mlen = 1 + num_regs;
-   const unsigned msg_control =
-  (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS :
-   num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS :
-   num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0);
-   assert(msg_control);
 
/* Set up the message header.  This is g0, with g0.2 filled with
 * the offset.  We don't want to leave our offset around in g0 or
@@ -2134,7 +2129,7 @@ void brw_oword_block_write_scratch(struct brw_codegen *p,
   brw_set_dp_write_message(p,
   insn,
brw_scratch_surface_idx(p),
-  msg_control,
+  BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
   msg_type,
target_cache,
   mlen,
@@ -2181,11 +2176,6 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
dest = retype(dest, BRW_REGISTER_TYPE_UW);
 
const unsigned rlen = num_regs;
-   const unsigned msg_control =
-  (num_regs == 1 ? BRW_DATAPORT_OWORD_BLOCK_2_OWORDS :
-   num_regs == 2 ? BRW_DATAPORT_OWORD_BLOCK_4_OWORDS :
-   num_regs == 4 ? BRW_DATAPORT_OWORD_BLOCK_8_OWORDS : 0);
-   assert(msg_control);
const unsigned target_cache =
   (devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
@@ -,7 +2212,7 @@ brw_oword_block_read_scratch(struct brw_codegen *p,
   brw_set_dp_read_message(p,
  insn,
   brw_scratch_surface_idx(p),
- msg_control,
+ BRW_DATAPORT_OWORD_BLOCK_DWORDS(num_regs * 8),
  BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* 
msg_type */
  target_cache,
  1, /* msg_length */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Restore support for LLVM <= 3.9.

2016-11-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 95ddb37708ca16ccbd0f607d17a82be2de0d07b6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=95ddb37708ca16ccbd0f607d17a82be2de0d07b6

Author: Vedran Miletić 
Date:   Tue Nov 22 20:25:34 2016 +0100

clover: Restore support for LLVM <= 3.9.

The commit 8e430ff8b060b4e8e922bae24b3c57837da6ea77 broke support for
LLVM 3.9 and older versions in Clover. This patch restores it and
refactors the support using Clover compatibility layer for LLVM.

v2: merged #ifdef blocks
v3: added support for LLVM 3.6-3.8
v4: add missing #ifdef around 
v5: simplify using templates and lambda

Signed-off-by: Vedran Miletić 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=98740
Tested-by[v4]: Pierre Moreau 
Tested-by: Vinson Lee 
Reviewed-by: Francisco Jerez 
Reviewed-by: Jan Vesely 

---

 .../state_trackers/clover/llvm/codegen/bitcode.cpp |  9 +++--
 src/gallium/state_trackers/clover/llvm/compat.hpp  | 18 ++
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp 
b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
index 5dcc4f8..d09207b 100644
--- a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
@@ -32,6 +32,7 @@
 ///
 
 #include "llvm/codegen.hpp"
+#include "llvm/compat.hpp"
 #include "llvm/metadata.hpp"
 #include "core/error.hpp"
 #include "util/algorithm.hpp"
@@ -99,13 +100,9 @@ clover::llvm::parse_module_library(const module &m, 
::llvm::LLVMContext &ctx,
auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef(
 as_string(m.secs[0].data), " "), ctx);
 
-   if (::llvm::Error err = mod.takeError()) {
-  std::string msg;
-  ::llvm::handleAllErrors(std::move(err), [&](::llvm::ErrorInfoBase &EIB) {
- msg = EIB.message();
- fail(r_log, error(CL_INVALID_PROGRAM), msg.c_str());
+   compat::handle_module_error(mod, [&](const std::string &s) {
+ fail(r_log, error(CL_INVALID_PROGRAM), s);
   });
-   }
 
return std::unique_ptr<::llvm::Module>(std::move(*mod));
 }
diff --git a/src/gallium/state_trackers/clover/llvm/compat.hpp 
b/src/gallium/state_trackers/clover/llvm/compat.hpp
index a963cff..81592ce 100644
--- a/src/gallium/state_trackers/clover/llvm/compat.hpp
+++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
@@ -39,6 +39,11 @@
 #include 
 #include 
 #include 
+#if HAVE_LLVM >= 0x0400
+#include 
+#else
+#include 
+#endif
 
 #if HAVE_LLVM >= 0x0307
 #include 
@@ -158,6 +163,19 @@ namespace clover {
 #else
  const auto default_reloc_model = ::llvm::Reloc::Default;
 #endif
+
+ template void
+ handle_module_error(M &mod, const F &f) {
+#if HAVE_LLVM >= 0x0400
+if (::llvm::Error err = mod.takeError())
+   ::llvm::handleAllErrors(std::move(err), 
[&](::llvm::ErrorInfoBase &eib) {
+ f(eib.message());
+  });
+#else
+if (!mod)
+   f(mod.getError().message());
+#endif
+ }
   }
}
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().

2016-11-03 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: f3d387867f74ae758b41168f23992671f7dce254
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3d387867f74ae758b41168f23992671f7dce254

Author: Francisco Jerez 
Date:   Tue Nov  1 11:56:13 2016 -0700

nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().

Assuming the hardware is set up to use a screen coordinate system
flipped vertically with respect to the GL's window coordinate system,
the SYSTEM_VALUE_SAMPLE_POS vector will also be flipped vertically
with respect to the value expected by the GL, so we need to give it
the same treatment as gl_FragCoord.  Fixes the following CTS tests on
i965:

 
ES31-CTS.functional.shaders.multisample_interpolation.interpolate_at_offset.at_sample_position.default_framebuffer
 
ES31-CTS.functional.shaders.sample_variables.sample_pos.correctness.default_framebuffer

when run with any multisample configuration, e.g. rgbad24s8ms4.

Cc: 
Reviewed-by: Kenneth Graunke 
Reviewed-by: Anuj Phogat 

---

 src/compiler/nir/nir_lower_wpos_ytransform.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/compiler/nir/nir_lower_wpos_ytransform.c 
b/src/compiler/nir/nir_lower_wpos_ytransform.c
index 173f058..f211c73 100644
--- a/src/compiler/nir/nir_lower_wpos_ytransform.c
+++ b/src/compiler/nir/nir_lower_wpos_ytransform.c
@@ -273,6 +273,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state 
*state,
 }
 
 static void
+lower_load_sample_pos(lower_wpos_ytransform_state *state,
+  nir_intrinsic_instr *intr)
+{
+   nir_builder *b = &state->b;
+   b->cursor = nir_after_instr(&intr->instr);
+
+   nir_ssa_def *pos = &intr->dest.ssa;
+   nir_ssa_def *scale = nir_channel(b, get_transform(state), 0);
+   nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2);
+   /* Either y or 1-y for scale equal to 1 or -1 respectively. */
+   nir_ssa_def *flipped_y =
+   nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)),
+nir_fmul(b, nir_channel(b, pos, 1), scale));
+   nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y);
+
+   nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, 
nir_src_for_ssa(flipped_pos),
+  flipped_pos->parent_instr);
+}
+
+static void
 lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block 
*block)
 {
nir_foreach_instr_safe(instr, block) {
@@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state 
*state, nir_block *block
/* gl_FragCoord should not have array/struct deref's: */
assert(dvar->deref.child == NULL);
lower_fragcoord(state, intr);
+} else if (var->data.mode == nir_var_system_value &&
+   var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
+   assert(dvar->deref.child == NULL);
+   lower_load_sample_pos(state, intr);
 }
  } else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) {
 lower_interp_var_at_offset(state, intr);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Implement clGetExtensionFunctionAddressForPlatform.

2016-10-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: cb0879985a40bcde1516e5341c5a3e5ea0968b87
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb0879985a40bcde1516e5341c5a3e5ea0968b87

Author: Serge Martin 
Date:   Sun Sep 27 11:15:14 2015 +0200

clover: Implement clGetExtensionFunctionAddressForPlatform.

Add clGetExtensionFunctionAddressForPlatform (CL 1.2).

Reviewed-by: Francisco Jerez 

---

 src/gallium/state_trackers/clover/api/dispatch.cpp |  2 +-
 src/gallium/state_trackers/clover/api/dispatch.hpp |  4 
 src/gallium/state_trackers/clover/api/platform.cpp | 16 
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/clover/api/dispatch.cpp 
b/src/gallium/state_trackers/clover/api/dispatch.cpp
index f10babe..8f4cfdc 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.cpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.cpp
@@ -131,7 +131,7 @@ namespace clover {
   clEnqueueMigrateMemObjects,
   clEnqueueMarkerWithWaitList,
   clEnqueueBarrierWithWaitList,
-  NULL, // clGetExtensionFunctionAddressForPlatform
+  GetExtensionFunctionAddressForPlatform,
   NULL, // clCreateFromGLTexture
   NULL, // clGetDeviceIDsFromD3D11KHR
   NULL, // clCreateFromD3D11BufferKHR
diff --git a/src/gallium/state_trackers/clover/api/dispatch.hpp 
b/src/gallium/state_trackers/clover/api/dispatch.hpp
index 7f62282..0ec1b51 100644
--- a/src/gallium/state_trackers/clover/api/dispatch.hpp
+++ b/src/gallium/state_trackers/clover/api/dispatch.hpp
@@ -777,6 +777,10 @@ namespace clover {
void *
GetExtensionFunctionAddress(const char *p_name);
 
+   void *
+   GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform,
+  const char *p_name);
+
cl_int
IcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms,
 cl_uint *rnum_platforms);
diff --git a/src/gallium/state_trackers/clover/api/platform.cpp 
b/src/gallium/state_trackers/clover/api/platform.cpp
index b1b1fdf..ed86163 100644
--- a/src/gallium/state_trackers/clover/api/platform.cpp
+++ b/src/gallium/state_trackers/clover/api/platform.cpp
@@ -92,6 +92,16 @@ clover::GetPlatformInfo(cl_platform_id d_platform, 
cl_platform_info param,
 }
 
 void *
+clover::GetExtensionFunctionAddressForPlatform(cl_platform_id d_platform,
+   const char *p_name) try {
+   obj(d_platform);
+   return GetExtensionFunctionAddress(p_name);
+
+} catch (error &e) {
+   return NULL;
+}
+
+void *
 clover::GetExtensionFunctionAddress(const char *p_name) {
std::string name { p_name };
 
@@ -118,6 +128,12 @@ clGetExtensionFunctionAddress(const char *p_name) {
return GetExtensionFunctionAddress(p_name);
 }
 
+CLOVER_ICD_API void *
+clGetExtensionFunctionAddressForPlatform(cl_platform_id d_platform,
+ const char *p_name) {
+   return GetExtensionFunctionAddressForPlatform(d_platform, p_name);
+}
+
 CLOVER_ICD_API cl_int
 clIcdGetPlatformIDsKHR(cl_uint num_entries, cl_platform_id *rd_platforms,
cl_uint *rnum_platforms) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables

2016-10-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 2fba72046da09dd28f54df02794b358773899d13
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2fba72046da09dd28f54df02794b358773899d13

Author: Vedran Miletić 
Date:   Wed Sep 28 16:18:24 2016 +0200

clover: Introduce CLOVER_EXTRA_*_OPTIONS environment variables

The options specified in the CLOVER_EXTRA_BUILD_OPTIONS shell
variable are appended to the options specified by the OpenCL program
in the clBuildProgram function call, if any.
Analogously, the options specified in the CLOVER_EXTRA_COMPILE_OPTIONS
and CLOVER_EXTRA_LINK_OPTIONS variables are appended to the options
specified in clCompileProgram and clLinkProgram function calls,
respectively.

v2:
 * rename to CLOVER_EXTRA_COMPILER_OPTIONS
 * use debug_get_option
 * append to linker options as well

v3: code cleanups

v4: separate CLOVER_EXTRA_LINKER_OPTIONS options

v5:
 * fix documentation typo
 * use CLOVER_EXTRA_COMPILER_OPTIONS in link stage

v6:
 * separate in CLOVER_EXTRA_{BUILD,COMPILE,LINK}_OPTIONS
 * append options in cl{Build,Compile,Link}Program

Signed-off-by: Vedran Miletić 
Reviewed-by[v1]: Edward O'Callaghan 

v7 [Francisco Jerez]: Slight simplification.

Reviewed-by: Francisco Jerez 

---

 docs/envvars.html | 15 +++
 src/gallium/state_trackers/clover/api/program.cpp | 10 +++---
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/docs/envvars.html b/docs/envvars.html
index cf57ca5..af1a30c 100644
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -235,6 +235,21 @@ Setting to "tgsi", for example, will print all the TGSI 
shaders.
 See src/mesa/state_tracker/st_debug.c for other options.
 
 
+Clover state tracker environment variables
+
+
+CLOVER_EXTRA_BUILD_OPTIONS - allows specifying additional compiler and 
linker
+options. Specified options are appended after the options set by the OpenCL
+program in clBuildProgram.
+CLOVER_EXTRA_COMPILE_OPTIONS - allows specifying additional compiler
+options. Specified options are appended after the options set by the OpenCL
+program in clCompileProgram.
+CLOVER_EXTRA_LINK_OPTIONS - allows specifying additional linker
+options. Specified options are appended after the options set by the OpenCL
+program in clLinkProgram.
+
+
+
 Softpipe driver environment variables
 
 SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders
diff --git a/src/gallium/state_trackers/clover/api/program.cpp 
b/src/gallium/state_trackers/clover/api/program.cpp
index c3f9cb9..ba4ce7a 100644
--- a/src/gallium/state_trackers/clover/api/program.cpp
+++ b/src/gallium/state_trackers/clover/api/program.cpp
@@ -22,6 +22,7 @@
 
 #include "api/util.hpp"
 #include "core/program.hpp"
+#include "util/u_debug.h"
 
 #include 
 
@@ -177,7 +178,8 @@ clBuildProgram(cl_program d_prog, cl_uint num_devs,
auto &prog = obj(d_prog);
auto devs = (d_devs ? objs(d_devs, num_devs) :
 ref_vector(prog.context().devices()));
-   auto opts = (p_opts ? p_opts : "");
+   const auto opts = std::string(p_opts ? p_opts : "") + " " +
+ debug_get_option("CLOVER_EXTRA_BUILD_OPTIONS", "");
 
validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data);
 
@@ -202,7 +204,8 @@ clCompileProgram(cl_program d_prog, cl_uint num_devs,
auto &prog = obj(d_prog);
auto devs = (d_devs ? objs(d_devs, num_devs) :
 ref_vector(prog.context().devices()));
-   auto opts = (p_opts ? p_opts : "");
+   const auto opts = std::string(p_opts ? p_opts : "") + " " +
+ debug_get_option("CLOVER_EXTRA_COMPILE_OPTIONS", "");
header_map headers;
 
validate_build_common(prog, num_devs, d_devs, pfn_notify, user_data);
@@ -271,7 +274,8 @@ clLinkProgram(cl_context d_ctx, cl_uint num_devs, const 
cl_device_id *d_devs,
   void (*pfn_notify) (cl_program, void *), void *user_data,
   cl_int *r_errcode) try {
auto &ctx = obj(d_ctx);
-   auto opts = (p_opts ? p_opts : "");
+   const auto opts = std::string(p_opts ? p_opts : "") + " " +
+ debug_get_option("CLOVER_EXTRA_LINK_OPTIONS", "");
auto progs = objs(d_progs, num_progs);
auto prog = create(ctx);
auto devs = validate_link_devices(progs,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): clover: Pass unquoted compiler arguments to Clang

2016-10-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e3272865c216933168e6c08766d266a33d0e1497
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e3272865c216933168e6c08766d266a33d0e1497

Author: Vedran Miletić 
Date:   Wed Sep 28 17:11:43 2016 +0200

clover: Pass unquoted compiler arguments to Clang

OpenCL apps can quote arguments they pass to the OpenCL compiler, most
commonly include paths containing spaces.

If the Clang OpenCL compiler was called via a shell, the shell would
split the arguments with respect to to quotes and then remove quotes
before passing the arguments to the compiler. Since we call Clang as a
library, we have to split the argument with respect to quotes and then
remove quotes before passing the arguments.

v2: move to tokenize(), remove throwing of CL_INVALID_COMPILER_OPTIONS

v3: simplify parsing logic, use more C++11

v4: restore error throwing, clarify a comment

Signed-off-by: Vedran Miletić 
Reviewed-by: Francisco Jerez 

---

 src/gallium/state_trackers/clover/llvm/util.hpp | 40 ++---
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/gallium/state_trackers/clover/llvm/util.hpp 
b/src/gallium/state_trackers/clover/llvm/util.hpp
index 8db6f20..222becd 100644
--- a/src/gallium/state_trackers/clover/llvm/util.hpp
+++ b/src/gallium/state_trackers/clover/llvm/util.hpp
@@ -24,6 +24,7 @@
 #ifndef CLOVER_LLVM_UTIL_HPP
 #define CLOVER_LLVM_UTIL_HPP
 
+#include "core/error.hpp"
 #include "util/u_debug.h"
 
 #include 
@@ -42,11 +43,42 @@ namespace clover {
   inline std::vector
   tokenize(const std::string &s) {
  std::vector ss;
- std::istringstream iss(s);
- std::string t;
+ std::ostringstream oss;
 
- while (getline(iss, t, ' '))
-ss.push_back(t);
+ // OpenCL programs can pass a quoted argument, most frequently the
+ // include path. This is useful so that path containing spaces is
+ // treated as a single argument instead of being split by the spaces.
+ // Additionally, the argument should also be unquoted before being
+ // passed to the compiler. We avoid using std::string::replace here to
+ // remove quotes, as the single and double quote characters can be a
+ // part of the file name.
+ bool escape_next = false;
+ bool in_quote_double = false;
+ bool in_quote_single = false;
+
+ for (auto c : s) {
+if (escape_next) {
+   oss.put(c);
+   escape_next = false;
+} else if (c == '\\') {
+   escape_next = true;
+} else if (c == '"' && !in_quote_single) {
+   in_quote_double = !in_quote_double;
+} else if (c == '\'' && !in_quote_double) {
+   in_quote_single = !in_quote_single;
+} else if (c != ' ' || in_quote_single || in_quote_double) {
+   oss.put(c);
+} else if (oss.tellp() > 0) {
+   ss.emplace_back(oss.str());
+   oss.str("");
+}
+ }
+
+ if (oss.tellp() > 0)
+ss.emplace_back(oss.str());
+
+ if (in_quote_double || in_quote_single)
+throw invalid_build_options_error();
 
  return ss;
   }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 category.

2016-10-20 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 15a084a03998c5c86206137fdaf6f43b5f98485a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=15a084a03998c5c86206137fdaf6f43b5f98485a

Author: Francisco Jerez 
Date:   Tue Oct 18 14:53:20 2016 -0700

glapi: Move PrimitiveBoundingBox and BlendBarrier definitions into ES3.2 
category.

These two GLES 3.2 entry points were being defined in the category of
the ARB_ES3_2_compatibility and KHR_blend_equation_advanced extensions
respectively instead of in the ES3.2 category.  Defining them in the
ES3.2 category makes sure that the gl_procs.py generator emits
declarations in the glprocs.h header file for the unsuffixed GLES-only
entry points that PrimitiveBoundingBoxARB and BlendBarrierKHR
respectively alias.  This should avoid a compilation failure during
scons builds in combination with "mapi: export all GLES 3.2 functions
in libGLESv2.so".

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Dylan Baker 

---

 src/mapi/glapi/gen/gl_API.xml | 30 +-
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
index 5998ccf..00c9bb7 100644
--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -8296,6 +8296,23 @@
 
 http://www.w3.org/2001/XInclude"/>
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
 
 
@@ -8316,7 +8333,6 @@
 
 
 
-
 
 
 
@@ -8332,18 +8348,6 @@
 
 
 
-
-
-
-
-
-
-
-
-
-
-
 
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so""

2016-10-20 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 811eb7f178b8b85ac299121ac09a3180b9b55da2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=811eb7f178b8b85ac299121ac09a3180b9b55da2

Author: Francisco Jerez 
Date:   Tue Oct 18 20:44:10 2016 -0700

Revert "Revert "mapi: export all GLES 3.2 functions in libGLESv2.so""

This reverts commit 85e9bbc14d93fa7166c9ae075ee7ae29a8313e3f.  The
previous commit should help with the scons build failure caused by the
original commit.

Cc: mesa-sta...@lists.freedesktop.org
Reviewed-by: Dylan Baker 

---

 src/mapi/glapi/gen/static_data.py | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mapi/glapi/gen/static_data.py 
b/src/mapi/glapi/gen/static_data.py
index 2f403e9..25e78bf 100644
--- a/src/mapi/glapi/gen/static_data.py
+++ b/src/mapi/glapi/gen/static_data.py
@@ -484,17 +484,22 @@ functions = [
 "BindVertexBuffer",
 "BindVertexBuffers",
 "Bitmap",
+"BlendBarrier",
 "BlendColor",
 "BlendColorEXT",
 "BlendEquation",
 "BlendEquationEXT",
+"BlendEquationi",
 "BlendEquationiARB",
 "BlendEquationSeparate",
+"BlendEquationSeparatei",
 "BlendEquationSeparateiARB",
 "BlendFunc",
+"BlendFunci",
 "BlendFunciARB",
 "BlendFuncSeparate",
 "BlendFuncSeparateEXT",
+"BlendFuncSeparatei",
 "BlendFuncSeparateiARB",
 "BlitFramebuffer",
 "BufferData",
@@ -825,6 +830,7 @@ functions = [
 "GetFramebufferAttachmentParameteriv",
 "GetFramebufferAttachmentParameterivEXT",
 "GetFramebufferParameteriv",
+"GetGraphicsResetStatus",
 "GetGraphicsResetStatusARB",
 "GetHandleARB",
 "GetHistogram",
@@ -864,8 +870,11 @@ functions = [
 "GetnSeparableFilterARB",
 "GetnTexImageARB",
 "GetnUniformdvARB",
+"GetnUniformfv",
 "GetnUniformfvARB",
+"GetnUniformiv",
 "GetnUniformivARB",
+"GetnUniformuiv",
 "GetnUniformuivARB",
 "GetObjectLabel",
 "GetObjectParameterfvARB",
@@ -1160,6 +1169,7 @@ functions = [
 "Orthof",
 "Orthox",
 "PassThrough",
+"PatchParameteri",
 "PauseTransformFeedback",
 "PixelMapfv",
 "PixelMapuiv",
@@ -1191,6 +1201,7 @@ functions = [
 "PopDebugGroup",
 "PopMatrix",
 "PopName",
+"PrimitiveBoundingBox",
 "PrimitiveRestartIndex",
 "PrimitiveRestartIndexNV",
 "PrimitiveRestartNV",
@@ -1273,6 +1284,7 @@ functions = [
 "RasterPos4s",
 "RasterPos4sv",
 "ReadBuffer",
+"ReadnPixels",
 "ReadnPixelsARB",
 "ReadPixels",
 "Rectd",

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg

2016-09-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: a2392cee48076f1fe6feab7d49214990cfa6a551
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2392cee48076f1fe6feab7d49214990cfa6a551

Author: Jason Ekstrand 
Date:   Wed Sep 14 15:09:32 2016 -0700

i965/reg: Make brw_sr0_reg take a subnr and return a vec1 reg

The state register sr0 is really a collection of dwords not a SIMD8
anything.  It's much more convenient for brw_sr0_reg to return the
particular dword you're looking for rather than a giant blob you have to
massage into what you want.

Signed-off-by: Jason Ekstrand 
[ Francisco Jerez: Trivial simplification of brw_ud1_reg(). ]
Reviewed-by: Francisco Jerez 

---

 src/mesa/drivers/dri/i965/brw_fs.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_reg.h  | 20 
 2 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index d026bbd..5c44007 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6185,7 +6185,7 @@ fs_visitor::run_cs()
if (devinfo->is_haswell && prog_data->total_shared > 0) {
   /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
   const fs_builder abld = bld.exec_all().group(1, 0);
-  abld.MOV(retype(suboffset(brw_sr0_reg(), 1), BRW_REGISTER_TYPE_UW),
+  abld.MOV(retype(brw_sr0_reg(1), BRW_REGISTER_TYPE_UW),
suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1));
}
 
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index d6f22ed..b71c63b 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -567,6 +567,12 @@ brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned 
subnr)
 }
 
 static inline struct brw_reg
+brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr)
+{
+   return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD);
+}
+
+static inline struct brw_reg
 brw_imm_reg(enum brw_reg_type type)
 {
return brw_reg(BRW_IMMEDIATE_VALUE,
@@ -789,19 +795,9 @@ brw_notification_reg(void)
 }
 
 static inline struct brw_reg
-brw_sr0_reg(void)
+brw_sr0_reg(unsigned subnr)
 {
-   return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
-  BRW_ARF_STATE,
-  0,
-  0,
-  0,
-  BRW_REGISTER_TYPE_UD,
-  BRW_VERTICAL_STRIDE_8,
-  BRW_WIDTH_8,
-  BRW_HORIZONTAL_STRIDE_1,
-  BRW_SWIZZLE_XYZW,
-  WRITEMASK_XYZW);
+   return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr);
 }
 
 static inline struct brw_reg

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread dispatch.

2016-09-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: f57f526fc5cfaedf26b2becf8f1899d5de0d0461
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f57f526fc5cfaedf26b2becf8f1899d5de0d0461

Author: Francisco Jerez 
Date:   Thu Sep 15 17:20:23 2016 -0700

i965/ir: Skip eliminate_find_live_channel() for stages with sparse thread 
dispatch.

The eliminate_find_live_channel optimization eliminates
FIND_LIVE_CHANNEL instructions in cases where control flow is known to
be uniform, and replaces them with 'MOV 0', which in turn unblocks
subsequent elimination of the BROADCAST instruction frequently used on
the result of FIND_LIVE_CHANNEL.  This is however not correct in
per-sample fragment shader dispatch because the PSD can dispatch a
fully unlit sample under certain conditions.  Disable the optimization
in that case.

Reviewed-by: Jason Ekstrand 

v2: Add devinfo argument to brw_stage_has_packed_dispatch() to
implement hardware generation check.

---

 src/mesa/drivers/dri/i965/brw_compiler.h | 49 
 src/mesa/drivers/dri/i965/brw_fs.cpp |  8 ++
 src/mesa/drivers/dri/i965/brw_vec4.cpp   |  8 ++
 3 files changed, 65 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 84d3dde..445c166 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -868,6 +868,55 @@ encode_slm_size(unsigned gen, uint32_t bytes)
return slm_size;
 }
 
+/**
+ * Return true if the given shader stage is dispatched contiguously by the
+ * relevant fixed function starting from channel 0 of the SIMD thread, which
+ * implies that the dispatch mask of a thread can be assumed to have the form
+ * '2^n - 1' for some n.
+ */
+static inline bool
+brw_stage_has_packed_dispatch(const struct gen_device_info *devinfo,
+  gl_shader_stage stage,
+  const struct brw_stage_prog_data *prog_data)
+{
+   /* The code below makes assumptions about the hardware's thread dispatch
+* behavior that could be proven wrong in future generations -- Make sure
+* to do a full test run with brw_fs_test_dispatch_packing() hooked up to
+* the NIR front-end before changing this assertion.
+*/
+   assert(devinfo->gen <= 9);
+
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT: {
+  /* The PSD discards subspans coming in with no lit samples, which in the
+   * per-pixel shading case implies that each subspan will either be fully
+   * lit (due to the VMask being used to allow derivative computations),
+   * or not dispatched at all.  In per-sample dispatch mode individual
+   * samples from the same subspan have a fixed relative location within
+   * the SIMD thread, so dispatch of unlit samples cannot be avoided in
+   * general and we should return false.
+   */
+  const struct brw_wm_prog_data *wm_prog_data =
+ (const struct brw_wm_prog_data *)prog_data;
+  return !wm_prog_data->persample_dispatch;
+   }
+   case MESA_SHADER_COMPUTE:
+  /* Compute shaders will be spawned with either a fully enabled dispatch
+   * mask or with whatever bottom/right execution mask was given to the
+   * GPGPU walker command to be used along the workgroup edges -- In both
+   * cases the dispatch mask is required to be tightly packed for our
+   * invocation index calculations to work.
+   */
+  return true;
+   default:
+  /* Most remaining fixed functions are limited to use a packed dispatch
+   * mask due to the hardware representation of the dispatch mask as a
+   * single counter representing the number of enabled channels.
+   */
+  return true;
+   }
+}
+
 #ifdef __cplusplus
 } /* extern "C" */
 #endif
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 5c44007..b60ec71 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2835,6 +2835,14 @@ fs_visitor::eliminate_find_live_channel()
bool progress = false;
unsigned depth = 0;
 
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data)) {
+  /* The optimization below assumes that channel zero is live on thread
+   * dispatch, which may not be the case if the fixed function dispatches
+   * threads sparsely.
+   */
+  return false;
+   }
+
foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
   switch (inst->opcode) {
   case BRW_OPCODE_IF:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 58c8a8a..6aa9102 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1291,6 +1291,14 @@ vec4_visitor::eliminate_find_live_channel()
bool progress = false;
unsigned depth = 0;
 
+   if (!brw_stage_has_packed_dispatch(devinfo, stage, stage_prog_data))

Mesa (master): i965/ir: Test thread dispatch packing assumptions.

2016-09-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: e5311ba1acba738346a18ef661b0f8bbc33bba8e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5311ba1acba738346a18ef661b0f8bbc33bba8e

Author: Francisco Jerez 
Date:   Thu Sep 15 21:43:18 2016 -0700

i965/ir: Test thread dispatch packing assumptions.

Not [originally] intended for upstream.  Should cause a GPU hang if
some thread is executed with a non-contiguous dispatch mask breaking
assumptions of brw_stage_has_packed_dispatch().  Doesn't cause any
CTS, DEQP or Piglit regressions, while replacing
brw_stage_has_packed_dispatch() with a dummy implementation that
unconditionally returns true on top of this patch causes multiple GPU
hangs.

v2: Refactor into a separate function instead of emitting the test
code directly from emit_nir_code(), drop VEC4 test and clean up
slightly for upstream. (Jason)

Reviewed-by: Jason Ekstrand 

---

 src/mesa/drivers/dri/i965/brw_fs.cpp | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b60ec71..1483f41 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6786,3 +6786,33 @@ brw_compile_cs(const struct brw_compiler *compiler, void 
*log_data,
 
return g.get_assembly(final_assembly_size);
 }
+
+/**
+ * Test the dispatch mask packing assumptions of
+ * brw_stage_has_packed_dispatch().  Call this from e.g. the top of
+ * fs_visitor::emit_nir_code() to cause a GPU hang if any shader invocation is
+ * executed with an unexpected dispatch mask.
+ */
+static UNUSED void
+brw_fs_test_dispatch_packing(const fs_builder &bld)
+{
+   const gl_shader_stage stage = bld.shader->stage;
+
+   if (brw_stage_has_packed_dispatch(bld.shader->devinfo, stage,
+ bld.shader->stage_prog_data)) {
+  const fs_builder ubld = bld.exec_all().group(1, 0);
+  const fs_reg tmp = component(bld.vgrf(BRW_REGISTER_TYPE_UD), 0);
+  const fs_reg mask = (stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
+   brw_dmask_reg());
+
+  ubld.ADD(tmp, mask, brw_imm_ud(1));
+  ubld.AND(tmp, mask, tmp);
+
+  /* This will loop forever if the dispatch mask doesn't have the expected
+   * form '2^n-1', in which case tmp will be non-zero.
+   */
+  bld.emit(BRW_OPCODE_DO);
+  bld.CMP(bld.null_reg_ud(), tmp, brw_imm_ud(0), BRW_CONDITIONAL_NZ);
+  set_predicate(BRW_PREDICATE_NORMAL, bld.emit(BRW_OPCODE_WHILE));
+   }
+}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/ir: Pass identity mask to brw_find_live_channel() in the packed dispatch case.

2016-09-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: c05a4f11a03dd5614a9462b5cb28e8b630bfddc0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c05a4f11a03dd5614a9462b5cb28e8b630bfddc0

Author: Francisco Jerez 
Date:   Thu Sep 15 17:24:10 2016 -0700

i965/ir: Pass identity mask to brw_find_live_channel() in the packed dispatch 
case.

This avoids emitting a few extra instructions required to take the
dispatch mask into account when it's known to be tightly packed.

Reviewed-by: Jason Ekstrand 

---

 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 5 -
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 9 +++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index c510f42..842e125 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -2045,7 +2045,10 @@ fs_generator::generate_code(const cfg_t *cfg, int 
dispatch_width)
 
   case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
  const struct brw_reg mask =
-stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() : brw_dmask_reg();
+brw_stage_has_packed_dispatch(devinfo, stage,
+  prog_data) ? brw_imm_ud(~0u) :
+stage == MESA_SHADER_FRAGMENT ? brw_vmask_reg() :
+brw_dmask_reg();
  brw_find_live_channel(p, dst, mask);
  break;
   }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index f9e6d1c..163cf9d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1862,9 +1862,14 @@ generate_code(struct brw_codegen *p,
  brw_memory_fence(p, dst);
  break;
 
-  case SHADER_OPCODE_FIND_LIVE_CHANNEL:
- brw_find_live_channel(p, dst, brw_dmask_reg());
+  case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
+ const struct brw_reg mask =
+brw_stage_has_packed_dispatch(devinfo, nir->stage,
+  &prog_data->base) ? brw_imm_ud(~0u) :
+brw_dmask_reg();
+ brw_find_live_channel(p, dst, mask);
  break;
+  }
 
   case SHADER_OPCODE_BROADCAST:
  assert(inst->force_writemask_all);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/fs: Take Dispatch/ Vector mask into account in FIND_LIVE_CHANNEL

2016-09-21 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 8a468d186e6fc27c26dd12ba989192e7596f667a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a468d186e6fc27c26dd12ba989192e7596f667a

Author: Jason Ekstrand 
Date:   Wed Sep 14 15:09:33 2016 -0700

i965/fs: Take Dispatch/Vector mask into account in FIND_LIVE_CHANNEL

On at least Sky Lake, ce0 does not contain the full story as far as enabled
channels goes.  It is possible to have completely disabled channels where
the corresponding bits in ce0 are 1.  In order to get the correct execution
mask, you have to mask off those channels which were disabled from the
beginning by taking the AND of ce0 with either sr0.2 or sr0.3 depending on
the shader stage.  Failure to do so can result in FIND_LIVE_CHANNEL
returning a completely dead channel.

Signed-off-by: Jason Ekstrand 
Cc: Francisco Jerez 
[ Francisco Jerez: Fix a couple of typos, add mask register type
  assertion, clarify reason why ce0 can have bits set for disabled
  channels, clarify that this may only be a problem when thread
  dispatch doesn't pack channels tightly in the SIMD thread.  Apply
  same treatment to Align16 path. ]
Reviewed-by: Francisco Jerez 

---

 src/mesa/drivers/dri/i965/brw_eu.h   |  3 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c  | 39 ++--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  7 +++--
 src/mesa/drivers/dri/i965/brw_reg.h  | 12 
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  2 +-
 5 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 3e52764..737a335 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -488,7 +488,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
 
 void
 brw_find_live_channel(struct brw_codegen *p,
-  struct brw_reg dst);
+  struct brw_reg dst,
+  struct brw_reg mask);
 
 void
 brw_broadcast(struct brw_codegen *p,
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 3b12030..c98867a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -3361,7 +3361,8 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
 }
 
 void
-brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst)
+brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
+  struct brw_reg mask)
 {
const struct gen_device_info *devinfo = p->devinfo;
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
@@ -3369,6 +3370,7 @@ brw_find_live_channel(struct brw_codegen *p, struct 
brw_reg dst)
brw_inst *inst;
 
assert(devinfo->gen >= 7);
+   assert(mask.type == BRW_REGISTER_TYPE_UD);
 
brw_push_insn_state(p);
 
@@ -3377,18 +3379,32 @@ brw_find_live_channel(struct brw_codegen *p, struct 
brw_reg dst)
 
   if (devinfo->gen >= 8) {
  /* Getting the first active channel index is easy on Gen8: Just find
-  * the first bit set in the mask register.  The same register exists
-  * on HSW already but it reads back as all ones when the current
+  * the first bit set in the execution mask.  The register exists on
+  * HSW already but it reads back as all ones when the current
   * instruction has execution masking disabled, so it's kind of
   * useless.
   */
- inst = brw_FBL(p, vec1(dst),
-retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD));
+ struct brw_reg exec_mask =
+retype(brw_mask_reg(0), BRW_REGISTER_TYPE_UD);
+
+ if (mask.file != BRW_IMMEDIATE_VALUE || mask.ud != 0x) {
+/* Unfortunately, ce0 does not take into account the thread
+ * dispatch mask, which may be a problem in cases where it's not
+ * tightly packed (i.e. it doesn't have the form '2^n - 1' for
+ * some n).  Combine ce0 with the given dispatch (or vector) mask
+ * to mask off those channels which were never dispatched by the
+ * hardware.
+ */
+brw_SHR(p, vec1(dst), mask, brw_imm_ud(qtr_control * 8));
+brw_AND(p, vec1(dst), exec_mask, vec1(dst));
+exec_mask = vec1(dst);
+ }
 
  /* Quarter control has the effect of magically shifting the value of
-  * this register so you'll get the first active channel relative to
-  * the specified quarter control as result.
+  * ce0 so you'll get the first active channel relative to the
+  * specified quarter control as result.
   */
+ inst = brw_FBL(p, vec1(dst), exec_mask);
   } else {
  const struct brw_reg flag = brw_flag_reg(1, 0);
 
@@ -3422,9 +3438,14 @@ brw_find_liv

Mesa (master): 57 new commits

2016-09-14 Thread Francisco Jerez

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6d861968ca2f3e196ce4bcad4f2b91b5a63ce0f1
Author: Francisco Jerez 
Date:   Thu Sep 1 22:37:57 2016 -0700

i965/vec4: Assert that pull constant load offsets are 16B-aligned.

Non-16B-aligned pull constant loads are unlikely to be particularly
useful given that you can get roughly the same effect by using
swizzles on the result.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5ca35c63673dad28854c00ce34ec6f085ba4ec5e
Author: Francisco Jerez 
Date:   Thu Sep 1 22:39:00 2016 -0700

i965/vec4: Assert that ATTR regions are register-aligned.

It might be useful to actually handle this once copy propagation
becomes smarter about register-misaligned offsets.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f33a8f8fcfb6ce3baa8813b32d5eff20506f3df1
Author: Francisco Jerez 
Date:   Thu Sep 1 22:36:15 2016 -0700

i965/vec4: Don't spill non-GRF-aligned register regions.

A better fix would be to do something along the lines of the FS
back-end spilling code and emit a scratch read before any instruction
that overwrites the register to spill partially due to a non-zero
sub-register offset.  In the meantime mark registers used with a
non-zero sub-register offset as no-spill to prevent the spilling code
from miscompiling the program.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8531f943d9aac13489a02e5a5b4bfa381c465a44
Author: Francisco Jerez 
Date:   Thu Sep 1 22:31:43 2016 -0700

i965/vec4: Fix copy propagation for non-register-aligned regions.

This prevents it from trying to propagate a copy through a
register-misaligned region.  MOV instructions with a misaligned
destination shouldn't be treated as a direct GRF copy, because they
only define the destination GRFs partially.  Also fix the interference
check implemented with is_channel_updated() to consider overlapping
regions with different register offset to interfere, since the
writemask check implemented in the function is only valid under the
assumption that the source and destination regions are aligned
component by component.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0e657b7b55bc7c83c8eb5258cd9522b0e5e581b7
Author: Francisco Jerez 
Date:   Thu Sep 1 22:26:59 2016 -0700

i965/vec4: Compare full register offsets in cmod propagation.

Cmod propagation would misoptimize the program if the destination
offset of the generating instruction wasn't exactly the same as the
source region offset of the copy instruction.  In preparation for
adding support for sub-GRF offsets to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8bed1adfc144d9ae8d55ccb9b277942da8a78064
Author: Francisco Jerez 
Date:   Thu Sep 1 22:12:04 2016 -0700

i965/vec4: Assign correct destination offset to rewritten instruction in 
register coalesce.

Because the pass already checks that the destination offset of each
'scan_inst' that needs to be rewritten matches 'inst->src[0].offset'
exactly, the final offset of the rewritten instruction is just the
original destination offset of the copy.  This is in preparation for
adding support for sub-GRF offsets to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a74e437fdec02c28749c94bc1bcf21c3c4b48d7
Author: Francisco Jerez 
Date:   Thu Sep 1 22:08:29 2016 -0700

i965/vec4: Don't coalesce registers with overlapping writes not matching 
the MOV source.

In preparation for adding support for sub-GRF offsets to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1bb5074474445ea9f54d0f52383f99ac0fa6128f
Author: Francisco Jerez 
Date:   Thu Sep 1 22:04:02 2016 -0700

i965/vec4: Compare full register offsets in opt_register_coalesce nop move 
check.

In preparation for adding support for sub-GRF offsets to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3be0d6d040753c62b25077fb6b85ad1f0808b258
Author: Francisco Jerez 
Date:   Thu Sep 1 22:02:00 2016 -0700

i965/vec4: Check that the write offsets match when setting dependency 
controls.

For simplicity just assume that two writes to the same GRF with
different sub-GRF offsets will potentially interfere and break the
dependency control chain.  This is in preparation for adding sub-GRF
offset support to the VEC4 IR.

Reviewed-by: Iago Toral Quiroga 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b52fefc4d55a4627bf0d59c78ac531603f

Mesa (master): st/clover: Define __OPENCL_VERSION__ on the device side

2016-09-10 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: cfa914a1b4e20e7ef416171f5212f21e8224befc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cfa914a1b4e20e7ef416171f5212f21e8224befc

Author: Niels Ole Salscheider 
Date:   Sun Aug 28 16:42:34 2016 +0200

st/clover: Define __OPENCL_VERSION__ on the device side

This is required by the OpenCL standard.

Signed-off-by: Niels Ole Salscheider 
Reviewed-by: Edward O'Callaghan 
Reviewed-by: Vedran Miletić 

---

 src/gallium/state_trackers/clover/llvm/invocation.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp 
b/src/gallium/state_trackers/clover/llvm/invocation.cpp
index 5490d72..b5e8b52 100644
--- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
+++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
@@ -153,6 +153,9 @@ namespace {
   // Add libclc include
   c.getPreprocessorOpts().Includes.push_back("clc/clc.h");
 
+  // Add definition for the OpenCL version
+  c.getPreprocessorOpts().addMacroDef("__OPENCL_VERSION__=110");
+
   // clc.h requires that this macro be defined:
   c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
   c.getPreprocessorOpts().addRemappedFile(

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Fix gl_program:: OutputsWritten computation for dual-source blending.

2016-08-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: fd04d048aec8f850d77f6908c0d13f88195df0da
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fd04d048aec8f850d77f6908c0d13f88195df0da

Author: Francisco Jerez 
Date:   Sat Aug 20 14:55:19 2016 -0700

glsl: Fix gl_program::OutputsWritten computation for dual-source blending.

In the fragment shader OutputsWritten is a bitset of FRAG_RESULT_*
enumerants, which represent the location of each color output written
by the shader.  The secondary and primary color outputs of a given
render target using dual-source blending have the same location, so
the 'idx' computation below will give the wrong bit as result if the
'var->data.index' term is non-zero -- E.g. if the shader writes the
primary and secondary colors of the FRAG_RESULT_COLOR output,
ir_set_program_inouts will think that the shader writes both
FRAG_RESULT_COLOR and FRAG_RESULT_SAMPLE_MASK, which is just bogus.

That would cause the brw_wm_prog_key::nr_color_regions computation
done in the i965 driver during fragment shader precompilation to be
wrong, which currently leads to unnecessary recompilation of shaders
that use dual-source blending, and triggers an assertion failure in
fs_visitor::emit_fb_writes() on my i965-fb-fetch branch.

Reviewed-by: Ilia Mirkin 

---

 src/compiler/glsl/ir_set_program_inouts.cpp | 2 +-
 src/mesa/state_tracker/st_program.c | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp 
b/src/compiler/glsl/ir_set_program_inouts.cpp
index 06d9973..4f6c886 100644
--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -96,7 +96,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, 
int len,
for (int i = 0; i < len; i++) {
   assert(var->data.location != -1);
 
-  int idx = var->data.location + var->data.index + offset + i;
+  int idx = var->data.location + offset + i;
   bool is_patch_generic = var->data.patch &&
   idx != VARYING_SLOT_TESS_LEVEL_INNER &&
   idx != VARYING_SLOT_TESS_LEVEL_OUTER;
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 429d0c9..2a4edfa 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -586,9 +586,7 @@ bool
 st_translate_fragment_program(struct st_context *st,
   struct st_fragment_program *stfp)
 {
-   GLuint outputMapping[2 * FRAG_RESULT_MAX] =
-  { 0 /* XXX - Avoid temporary regression due to bogus OutputsWritten
-   *   bitset. */ };
+   GLuint outputMapping[2 * FRAG_RESULT_MAX];
GLuint inputMapping[VARYING_SLOT_MAX];
GLuint inputSlotToAttr[VARYING_SLOT_MAX];
GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Fix incorrect hard-coded location of the gl_SecondaryFragColorEXT built-in.

2016-08-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 965934f38ab36b77672b70693b5b7b9c983f852b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=965934f38ab36b77672b70693b5b7b9c983f852b

Author: Francisco Jerez 
Date:   Thu Jun 23 00:05:37 2016 -0700

glsl: Fix incorrect hard-coded location of the gl_SecondaryFragColorEXT 
built-in.

gl_SecondaryFragColorEXT should have the same location as gl_FragColor
for the secondary fragment color to be replicated to all fragment
outputs.  The incorrect location of gl_SecondaryFragColorEXT would
cause the linker to mark both FRAG_RESULT_COLOR and FRAG_RESULT_DATA0
as being written to, which isn't allowed by the spec and would
ultimately lead to an assertion failure in
fs_visitor::emit_fb_writes() on my i965-fb-fetch branch.

This should also fix the code below for multiple dual-source-blended
render targets, which no driver currently supports but we have plans
to enable eventually in the i965 driver (the comment saying that no
hardware will ever support it seems rather hilarious).

Reviewed-by: Ilia Mirkin 

---

 src/compiler/glsl/builtin_variables.cpp | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/compiler/glsl/builtin_variables.cpp 
b/src/compiler/glsl/builtin_variables.cpp
index d379de6..f4ddeb9 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -1147,13 +1147,8 @@ builtin_variable_generator::generate_fs_special_vars()
}
 
if (state->es_shader && state->language_version == 100 && 
state->EXT_blend_func_extended_enable) {
-  /* We make an assumption here that there will only ever be one 
dual-source draw buffer
-   * In case this assumption is ever proven to be false, make sure to 
assert here
-   * since we don't handle this case.
-   * In practice, this issue will never arise since no hardware will 
support it.
-   */
-  assert(state->Const.MaxDualSourceDrawBuffers <= 1);
-  add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, 
"gl_SecondaryFragColorEXT");
+  add_index_output(FRAG_RESULT_COLOR, 1, vec4_t,
+   "gl_SecondaryFragColorEXT");
   add_index_output(FRAG_RESULT_DATA0, 1,
array(vec4_t, state->Const.MaxDualSourceDrawBuffers),
"gl_SecondaryFragDataEXT");

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Calculate bitset of secondary outputs written in ir_set_program_inouts.

2016-08-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: cb4b38af41952c2e5ee77253592f0d0833aefd28
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cb4b38af41952c2e5ee77253592f0d0833aefd28

Author: Francisco Jerez 
Date:   Tue Aug 23 11:15:57 2016 -0700

glsl: Calculate bitset of secondary outputs written in ir_set_program_inouts.

Reviewed-by: Ilia Mirkin 

---

 src/compiler/glsl/ir_set_program_inouts.cpp | 9 +++--
 src/mesa/main/mtypes.h  | 1 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp 
b/src/compiler/glsl/ir_set_program_inouts.cpp
index fcfbcd4..06d9973 100644
--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -135,10 +135,14 @@ mark(struct gl_program *prog, ir_variable *var, int 
offset, int len,
  prog->SystemValuesRead |= bitfield;
   } else {
  assert(var->data.mode == ir_var_shader_out);
- if (is_patch_generic)
+ if (is_patch_generic) {
 prog->PatchOutputsWritten |= bitfield;
- else if (!var->data.read_only)
+ } else if (!var->data.read_only) {
 prog->OutputsWritten |= bitfield;
+if (var->data.index > 0)
+   prog->SecondaryOutputsWritten |= bitfield;
+ }
+
  if (var->data.fb_fetch_output)
 prog->OutputsRead |= bitfield;
   }
@@ -446,6 +450,7 @@ do_set_program_inouts(exec_list *instructions, struct 
gl_program *prog,
 
prog->InputsRead = 0;
prog->OutputsWritten = 0;
+   prog->SecondaryOutputsWritten = 0;
prog->OutputsRead = 0;
prog->PatchInputsRead = 0;
prog->PatchOutputsWritten = 0;
diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
index 4013ca7..09b84f1 100644
--- a/src/mesa/main/mtypes.h
+++ b/src/mesa/main/mtypes.h
@@ -1919,6 +1919,7 @@ struct gl_program
GLbitfield64 InputsRead; /**< Bitmask of which input regs are read */
GLbitfield64 DoubleInputsRead; /**< Bitmask of which input regs are 
read  and are doubles */
GLbitfield64 OutputsWritten; /**< Bitmask of which output regs are written 
*/
+   GLbitfield64 SecondaryOutputsWritten; /**< Subset of OutputsWritten outputs 
written with non-zero index. */
GLbitfield64 OutputsRead; /**< Bitmask of which output regs are read */
GLbitfield PatchInputsRead;  /**< VAR[0..31] usage for patch inputs 
(user-defined only) */
GLbitfield PatchOutputsWritten; /**< VAR[0..31] usage for patch outputs 
(user-defined only) */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): st/glsl_to_tgsi: Use SecondaryOutputsWritten to determine dual-source fragment outputs.

2016-08-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 342f945b1320d588e61e4efe1ccc7852a3c8ad9f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=342f945b1320d588e61e4efe1ccc7852a3c8ad9f

Author: Francisco Jerez 
Date:   Tue Aug 23 11:18:19 2016 -0700

st/glsl_to_tgsi: Use SecondaryOutputsWritten to determine dual-source fragment 
outputs.

Currently the mesa state tracker relies on there being two bits set
per dual-source output in the gl_program::OutputsWritten bitset, but
that only worked due to a GLSL front-end bug that caused it to set the
OutputsWritten bit for both location and location+1 even though at the
GLSL level the primary and secondary color outputs used for
dual-source blending have the same location.  Fix it by extending
outputMapping[] to 2*FRAG_RESULT_MAX elements in order to represent a
mapping from a (location, index) pair to its TGSI output, which should
also make it slightly easier to add support for dual-source blending
in combination with multiple render targets in the long run.

No Piglit regressions on llvmpipe.

Reviewed-by: Ilia Mirkin 

---

 src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  5 +++--
 src/mesa/state_tracker/st_program.c| 18 --
 2 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp 
b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
index b7e47db..507a782 100644
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -2419,7 +2419,8 @@ glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
 entry = new(mem_ctx) variable_storage(var,
   PROGRAM_OUTPUT,
   var->data.location
-  + var->data.index);
+  + FRAG_RESULT_MAX *
+var->data.index);
  }
  this->variables.push_tail(entry);
  break;
@@ -5367,7 +5368,7 @@ dst_register(struct st_translate *t, gl_register_file 
file, unsigned index,
case PROGRAM_OUTPUT:
   if (!array_id) {
  if (t->procType == PIPE_SHADER_FRAGMENT)
-assert(index < FRAG_RESULT_MAX);
+assert(index < 2 * FRAG_RESULT_MAX);
  else if (t->procType == PIPE_SHADER_TESS_CTRL ||
   t->procType == PIPE_SHADER_TESS_EVAL)
 assert(index < VARYING_SLOT_TESS_MAX);
diff --git a/src/mesa/state_tracker/st_program.c 
b/src/mesa/state_tracker/st_program.c
index 03a685c..429d0c9 100644
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -586,7 +586,9 @@ bool
 st_translate_fragment_program(struct st_context *st,
   struct st_fragment_program *stfp)
 {
-   GLuint outputMapping[FRAG_RESULT_MAX];
+   GLuint outputMapping[2 * FRAG_RESULT_MAX] =
+  { 0 /* XXX - Avoid temporary regression due to bogus OutputsWritten
+   *   bitset. */ };
GLuint inputMapping[VARYING_SLOT_MAX];
GLuint inputSlotToAttr[VARYING_SLOT_MAX];
GLuint interpMode[PIPE_MAX_SHADER_INPUTS];  /* XXX size? */
@@ -810,9 +812,13 @@ st_translate_fragment_program(struct st_context *st,
   }
 
   /* handle remaining outputs (color) */
-  for (attr = 0; attr < FRAG_RESULT_MAX; attr++) {
- if (outputsWritten & BITFIELD64_BIT(attr)) {
-switch (attr) {
+  for (attr = 0; attr < ARRAY_SIZE(outputMapping); attr++) {
+ const GLbitfield64 written = attr < FRAG_RESULT_MAX ? outputsWritten :
+stfp->Base.Base.SecondaryOutputsWritten;
+ const unsigned loc = attr % FRAG_RESULT_MAX;
+
+ if (written & BITFIELD64_BIT(loc)) {
+switch (loc) {
 case FRAG_RESULT_DEPTH:
 case FRAG_RESULT_STENCIL:
 case FRAG_RESULT_SAMPLE_MASK:
@@ -822,8 +828,8 @@ st_translate_fragment_program(struct st_context *st,
 case FRAG_RESULT_COLOR:
write_all = GL_TRUE; /* fallthrough */
 default:
-   assert(attr == FRAG_RESULT_COLOR ||
-  (FRAG_RESULT_DATA0 <= attr && attr < FRAG_RESULT_MAX));
+   assert(loc == FRAG_RESULT_COLOR ||
+  (FRAG_RESULT_DATA0 <= loc && loc < FRAG_RESULT_MAX));
fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR;
fs_output_semantic_index[fs_num_outputs] = numColors;
outputMapping[attr] = fs_num_outputs;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): i965/fs: Assert that the number of color targets is one when dual-source blend is enabled.

2016-08-30 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6df215d97eab6e18a8c70c9966014f6ab2bbc20a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6df215d97eab6e18a8c70c9966014f6ab2bbc20a

Author: Francisco Jerez 
Date:   Thu Aug 25 18:35:06 2016 -0700

i965/fs: Assert that the number of color targets is one when dual-source blend 
is enabled.

Requested by Anuj during review of
4a87e4ade778e56d4c65a58752b15a00ce69, adding as follow-up since it
led to assertion failures due to various GLSL bugs that should be
fixed now.

---

 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index cfb5bb6..48b5f40 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -487,6 +487,7 @@ fs_visitor::emit_fb_writes()
}
 
prog_data->dual_src_blend = (this->dual_src_output.file != BAD_FILE);
+   assert(!prog_data->dual_src_blend || key->nr_color_regions == 1);
 
if (inst == NULL) {
   /* Even if there's no color buffers enabled, we still need to send

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): 32 new commits

2016-08-25 Thread Francisco Jerez

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=da85b5a9f1b22a8f6cae1a3b335dc5f31011bcb1
Author: Francisco Jerez 
Date:   Fri Jul 22 15:52:49 2016 -0700

i965: Expose shader framebuffer fetch extensions on Gen9+.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4135fc22ff735a40c36fcf051c1735fe23d154f2
Author: Francisco Jerez 
Date:   Thu Aug 18 22:12:37 2016 -0700

i965/fs: Hook up coherent framebuffer reads to the NIR front-end.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=be12a1f36efcdd4628f199d4e11b01cc06787e8a
Author: Francisco Jerez 
Date:   Thu Jul 21 16:56:05 2016 -0700

i965/fs: Remove special casing of framebuffer writes in scheduler code.

The reason why it was safe for the scheduler to ignore the side
effects of framebuffer write instructions was that its side effects
couldn't have had any influence on any other instruction in the
program, because we weren't doing framebuffer reads, and framebuffer
writes were always non-overlapping.  We need actual memory dependency
analysis in order to determine whether a side-effectful instruction
can be reordered with respect to other instructions in the program.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3daa0fae4b39a271f50f473edbe44712b6c8f040
Author: Francisco Jerez 
Date:   Wed Jul 6 20:49:58 2016 -0700

i965/fs: Don't CSE render target messages with different target index.

We weren't checking the fs_inst::target field when comparing whether
two instructions are equal.  For FB writes it doesn't matter because
they aren't CSE-able anyway, but this would have become a problem with
FB reads which are expression-like instructions.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=db123df74773f458e573a9c034ee783570a3ed0f
Author: Francisco Jerez 
Date:   Thu Jul 21 16:55:45 2016 -0700

i965/fs: Define logical framebuffer read opcode and lower it to physical 
reads.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f2f75b0cf05d2519d618c71b19d2187b8ed0d545
Author: Francisco Jerez 
Date:   Thu Jul 21 16:52:33 2016 -0700

i965/fs: Define framebuffer read virtual opcode.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=71d639f69ee868fbeadd0a1b8bbdd76e17398b43
Author: Francisco Jerez 
Date:   Tue Jul 19 11:52:23 2016 -0700

i965/disasm: Fix RC message type strings on Gen7+.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=26ac16fe2f73507041062f63646286dea60053da
Author: Francisco Jerez 
Date:   Thu Jul 21 19:13:55 2016 -0700

i965/eu: Add codegen support for the Gen9+ render target read message.

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=29eb8059fd7906d2595ea99bc65a27691b9fbe53
Author: Francisco Jerez 
Date:   Thu Jul 21 18:49:36 2016 -0700

i965/eu: Take into account the target cache argument in 
brw_set_dp_read_message.

brw_set_dp_read_message() was setting the data cache as send message
SFID on Gen7+ hardware, ignoring the target cache specified by the
caller.  Some of the callers were passing a bogus target cache value
as argument relying on brw_set_dp_read_message not to take it into
account.  Fix them too.

Reviewed-by: Iago Toral Quiroga 
Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8a2f19a7772c80fcac85d6bdfa8e588d6cea1beb
Author: Francisco Jerez 
Date:   Tue Jul 19 15:23:30 2016 -0700

i965: Flip the non-coherent framebuffer fetch extension bit on G45-Gen8 
hardware.

This is not enabled on the original Gen4 part because it lacks surface
state tile offsets so it may not be possible to sample from arbitrary
non-zero layers of the framebuffer depending on the miptree layout (it
should be possible to work around this by allocating a scratch surface
and doing the same hack currently used for render targets, but meh...).

On Gen9+ even though it should mostly work (feel free to force-enable
it in order to compare the coherent and non-coherent paths in terms of
performance), there are some corner cases like 1D array layered
framebuffers that cannot be handled easily by the non-coherent path
because of the incompatible layout in memory of 1D and 2D miptrees (it
should be possible to work around this too by doing state-dependent
recompiles, but it's hard to care enough since Gen9 has native support
for coherent render target reads...)

Reviewed-by: Kenneth Graunke 

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecc4800383fb67cd274154469d933c6050782208
Author: Francisco Jer

Mesa (master): mesa: Add support for querying GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 642aa58577bb0064c86fdd1a261a76a131886f06
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=642aa58577bb0064c86fdd1a261a76a131886f06

Author: Francisco Jerez 
Date:   Tue Jul  5 21:28:11 2016 -0700

mesa: Add support for querying GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT.

This can currently only give true as result since the only way you can
expose EXT_shader_framebuffer_fetch right now is by flipping the
MESA_shader_framebuffer_fetch bit, but that could potentially change
in the future, see [1] for an explanation.

[1] https://lists.freedesktop.org/archives/mesa-dev/2016-July/124028.html

Reviewed-by: Kenneth Graunke 

---

 src/mesa/main/get.c  | 7 +++
 src/mesa/main/get_hash_params.py | 4 
 src/mesa/main/glheader.h | 3 +++
 3 files changed, 14 insertions(+)

diff --git a/src/mesa/main/get.c b/src/mesa/main/get.c
index 8cb0cc7..b017827 100644
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -530,6 +530,13 @@ static const int 
extra_core_ARB_color_buffer_float_and_new_buffers[] = {
EXTRA_END
 };
 
+static const int extra_EXT_shader_framebuffer_fetch[] = {
+   EXTRA_API_ES2,
+   EXTRA_API_ES3,
+   EXT(MESA_shader_framebuffer_fetch),
+   EXTRA_END
+};
+
 /* This is the big table describing all the enums we accept in
  * glGet*v().  The table is partitioned into six parts: enums
  * understood by all GL APIs (OpenGL, GLES and GLES2), enums shared
diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py
index cd8e47f..89d164d 100644
--- a/src/mesa/main/get_hash_params.py
+++ b/src/mesa/main/get_hash_params.py
@@ -424,6 +424,10 @@ descriptor=[
 ]},
 
 { "apis": ["GLES", "GLES2"], "params": [
+# GL_EXT_shader_framebuffer_fetch.  Should be true if the MESA framebuffer
+# fetch extension is supported since the latter imposes no restrictions on
+# non-uniform per-sample discard.
+  [ "FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT", 
"CONTEXT_BOOL(Extensions.MESA_shader_framebuffer_fetch), 
extra_EXT_shader_framebuffer_fetch" ],
 # GL_OES_EGL_image_external
   [ "TEXTURE_BINDING_EXTERNAL_OES", "LOC_CUSTOM, TYPE_INT, 
TEXTURE_EXTERNAL_INDEX, extra_OES_EGL_image_external" ],
   [ "TEXTURE_EXTERNAL_OES", "LOC_CUSTOM, TYPE_BOOLEAN, 0, 
extra_OES_EGL_image_external" ],
diff --git a/src/mesa/main/glheader.h b/src/mesa/main/glheader.h
index 40fada1..3f2a923 100644
--- a/src/mesa/main/glheader.h
+++ b/src/mesa/main/glheader.h
@@ -140,6 +140,9 @@ typedef void *GLeglImageOES;
 #define GL_ETC1_RGB8_OES0x8D64
 #endif
 
+#ifndef GL_EXT_shader_framebuffer_fetch
+#define GL_FRAGMENT_SHADER_DISCARDS_SAMPLES_EXT 0x8A52
+#endif
 
 /* Inexplicably, GL_HALF_FLOAT_OES has a different value than GL_HALF_FLOAT.
  */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Add support for representing framebuffer fetch in the GLSL IR.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: b49d8f20f43ec429e6c17e7d92c7c2d3f926ee5e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b49d8f20f43ec429e6c17e7d92c7c2d3f926ee5e

Author: Francisco Jerez 
Date:   Tue Jul 19 20:07:47 2016 -0700

glsl: Add support for representing framebuffer fetch in the GLSL IR.

The GLSL IR representation of framebuffer fetch amounts to a single
bit in the ir_variable object applicable to fragment shader outputs.
The flag indicates that the variable will be implicitly initialized to
the previous contents of the render buffer at the same fragment
coordinates and sample index.

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/ir.cpp | 1 +
 src/compiler/glsl/ir.h   | 8 
 2 files changed, 9 insertions(+)

diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp
index 2aa4aff..4dadfd2 100644
--- a/src/compiler/glsl/ir.cpp
+++ b/src/compiler/glsl/ir.cpp
@@ -1686,6 +1686,7 @@ ir_variable::ir_variable(const struct glsl_type *type, 
const char *name,
this->data.image_volatile = false;
this->data.image_restrict = false;
this->data.from_ssbo_unsized_array = false;
+   this->data.fb_fetch_output = false;
 
if (type != NULL) {
   if (type->base_type == GLSL_TYPE_SAMPLER)
diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h
index 68e774c..5e1e9bf 100644
--- a/src/compiler/glsl/ir.h
+++ b/src/compiler/glsl/ir.h
@@ -831,6 +831,14 @@ public:
   unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. 
*/
 
   unsigned implicit_sized_array:1;
+
+  /**
+   * Whether this is a fragment shader output implicitly initialized with
+   * the previous contents of the specified render target at the
+   * framebuffer location corresponding to this shader invocation.
+   */
+  unsigned fb_fetch_output:1;
+
   /**
* Emit a warning if this variable is accessed.
*/

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl/linker: Allow fragment output overlap for gl_LastFragData.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 913ae618c6bdb42366f4d87265a6e35a88656e70
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=913ae618c6bdb42366f4d87265a6e35a88656e70

Author: Francisco Jerez 
Date:   Thu Jul 14 12:57:14 2016 -0700

glsl/linker: Allow fragment output overlap for gl_LastFragData.

gl_LastFragData overlaps gl_FragData by definition.

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/linker.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp
index a1a65ef..4b404ff 100644
--- a/src/compiler/glsl/linker.cpp
+++ b/src/compiler/glsl/linker.cpp
@@ -2673,6 +2673,9 @@ assign_attribute_or_color_locations(void *mem_ctx,
  }
   }
 
+  if (strcmp(var->name, "gl_LastFragData") == 0)
+ continue;
+
   /* From GL4.5 core spec, section 15.2 (Shader Execution):
*
* "Output binding assignments will cause LinkProgram to fail:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Handle the inout qualifier in fragment shader output declarations.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 19e929a1774938cb826f68592dc87c520d048597
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=19e929a1774938cb826f68592dc87c520d048597

Author: Francisco Jerez 
Date:   Tue Jul 19 20:10:21 2016 -0700

glsl: Handle the inout qualifier in fragment shader output declarations.

According to the EXT_shader_framebuffer_fetch extension the inout
qualifier can be used on ESSL 3.0+ shaders to declare a special kind
of fragment output that gets implicitly initialized with the previous
framebuffer contents at the current fragment coordinates.  In addition
we allow using the same language to define FB fetch outputs in GLSL
1.3+ shaders in preparation for the desktop MESA_shader_framebuffer_fetch
extensions.

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/ast_to_hir.cpp |  5 -
 src/compiler/glsl/glsl_parser.yy | 12 
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp
index c05fb17..c91ed53 100644
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -3713,7 +3713,7 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
 */
assert(var->data.mode != ir_var_temporary);
if (qual->flags.q.in && qual->flags.q.out)
-  var->data.mode = ir_var_function_inout;
+  var->data.mode = is_parameter ? ir_var_function_inout : 
ir_var_shader_out;
else if (qual->flags.q.in)
   var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in;
else if (qual->flags.q.attribute
@@ -3730,6 +3730,9 @@ apply_type_qualifier_to_variable(const struct 
ast_type_qualifier *qual,
else if (qual->flags.q.shared_storage)
   var->data.mode = ir_var_shader_shared;
 
+   var->data.fb_fetch_output = state->stage == MESA_SHADER_FRAGMENT &&
+   qual->flags.q.in && qual->flags.q.out;
+
if (!is_parameter && is_varying_var(var, state->stage)) {
   /* User-defined ins/outs are not permitted in compute shaders. */
   if (state->stage == MESA_SHADER_COMPUTE) {
diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy
index 5b65861..f2853da 100644
--- a/src/compiler/glsl/glsl_parser.yy
+++ b/src/compiler/glsl/glsl_parser.yy
@@ -1944,6 +1944,18 @@ storage_qualifier:
   $$.xfb_buffer = state->out_qualifier->xfb_buffer;
   }
}
+   | INOUT_TOK
+   {
+  memset(& $$, 0, sizeof($$));
+  $$.flags.q.in = 1;
+  $$.flags.q.out = 1;
+
+  if (!state->has_framebuffer_fetch() ||
+  !state->is_version(130, 300) ||
+  state->stage != MESA_SHADER_FRAGMENT)
+ _mesa_glsl_error(&@1, state, "A single interface variable cannot be "
+  "declared as both input and output");
+   }
| UNIFORM
{
   memset(& $$, 0, sizeof($$));

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Move shader memory barrier functions into barrier.c.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6a976bbf84c9c8790fa61bbeb5eb24a2e646c76c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a976bbf84c9c8790fa61bbeb5eb24a2e646c76c

Author: Francisco Jerez 
Date:   Tue Jul  5 23:18:18 2016 -0700

mesa: Move shader memory barrier functions into barrier.c.

Reviewed-by: Kenneth Graunke 

---

 src/mesa/main/barrier.c | 51 +
 src/mesa/main/barrier.h |  6 ++
 src/mesa/main/shaderimage.c | 51 -
 src/mesa/main/shaderimage.h |  6 --
 4 files changed, 57 insertions(+), 57 deletions(-)

diff --git a/src/mesa/main/barrier.c b/src/mesa/main/barrier.c
index beb48fb..7ae8fc6 100644
--- a/src/mesa/main/barrier.c
+++ b/src/mesa/main/barrier.c
@@ -57,3 +57,54 @@ _mesa_TextureBarrierNV(void)
 
ctx->Driver.TextureBarrier(ctx);
 }
+
+void GLAPIENTRY
+_mesa_MemoryBarrier(GLbitfield barriers)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   if (ctx->Driver.MemoryBarrier)
+  ctx->Driver.MemoryBarrier(ctx, barriers);
+}
+
+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers)
+{
+   GET_CURRENT_CONTEXT(ctx);
+
+   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
+ GL_FRAMEBUFFER_BARRIER_BIT |
+ GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
+ GL_SHADER_STORAGE_BARRIER_BIT |
+ GL_TEXTURE_FETCH_BARRIER_BIT |
+ GL_UNIFORM_BARRIER_BIT;
+
+   if (ctx->Driver.MemoryBarrier) {
+  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+   *
+   *"When barriers is ALL_BARRIER_BITS, shader memory accesses will be
+   * synchronized relative to all these barrier bits, but not to other
+   * barrier bits specific to MemoryBarrier."
+   *
+   * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then 
all
+   * barriers allowed by glMemoryBarrierByRegion should be activated."
+   */
+  if (barriers == GL_ALL_BARRIER_BITS) {
+ ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
+ return;
+  }
+
+  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
+   *
+   *"An INVALID_VALUE error is generated if barriers is not the special
+   * value ALL_BARRIER_BITS, and has any bits set other than those
+   * described above."
+   */
+  if ((barriers & ~all_allowed_bits) != 0) {
+ _mesa_error(ctx, GL_INVALID_VALUE,
+ "glMemoryBarrierByRegion(unsupported barrier bit");
+  }
+
+  ctx->Driver.MemoryBarrier(ctx, barriers);
+   }
+}
diff --git a/src/mesa/main/barrier.h b/src/mesa/main/barrier.h
index 0652d14..8eee583 100644
--- a/src/mesa/main/barrier.h
+++ b/src/mesa/main/barrier.h
@@ -41,4 +41,10 @@ _mesa_init_barrier_functions(struct dd_function_table 
*driver);
 extern void GLAPIENTRY
 _mesa_TextureBarrierNV(void);
 
+void GLAPIENTRY
+_mesa_MemoryBarrier(GLbitfield barriers);
+
+void GLAPIENTRY
+_mesa_MemoryBarrierByRegion(GLbitfield barriers);
+
 #endif /* BARRIER_H */
diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
index 90643c4..db36e3b 100644
--- a/src/mesa/main/shaderimage.c
+++ b/src/mesa/main/shaderimage.c
@@ -753,54 +753,3 @@ _mesa_BindImageTextures(GLuint first, GLsizei count, const 
GLuint *textures)
 
_mesa_end_texture_lookups(ctx);
 }
-
-void GLAPIENTRY
-_mesa_MemoryBarrier(GLbitfield barriers)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   if (ctx->Driver.MemoryBarrier)
-  ctx->Driver.MemoryBarrier(ctx, barriers);
-}
-
-void GLAPIENTRY
-_mesa_MemoryBarrierByRegion(GLbitfield barriers)
-{
-   GET_CURRENT_CONTEXT(ctx);
-
-   GLbitfield all_allowed_bits = GL_ATOMIC_COUNTER_BARRIER_BIT |
- GL_FRAMEBUFFER_BARRIER_BIT |
- GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
- GL_SHADER_STORAGE_BARRIER_BIT |
- GL_TEXTURE_FETCH_BARRIER_BIT |
- GL_UNIFORM_BARRIER_BIT;
-
-   if (ctx->Driver.MemoryBarrier) {
-  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
-   *
-   *"When barriers is ALL_BARRIER_BITS, shader memory accesses will be
-   * synchronized relative to all these barrier bits, but not to other
-   * barrier bits specific to MemoryBarrier."
-   *
-   * That is, if barriers is the special value GL_ALL_BARRIER_BITS, then 
all
-   * barriers allowed by glMemoryBarrierByRegion should be activated."
-   */
-  if (barriers == GL_ALL_BARRIER_BITS) {
- ctx->Driver.MemoryBarrier(ctx, all_allowed_bits);
- return;
-  }
-
-  /* From section 7.11.2 of the OpenGL ES 3.1 specification:
-   *
-   *"An INVALID_VALUE error is g

Mesa (master): glsl: Don' t consider read-only fragment outputs to be written to.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 711213fb7226f25a7da4962aa7526d7265d38356
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=711213fb7226f25a7da4962aa7526d7265d38356

Author: Francisco Jerez 
Date:   Tue Jul 19 20:29:55 2016 -0700

glsl: Don't consider read-only fragment outputs to be written to.

Since they cannot be written.  This prevents adding fragment outputs
to the OutputsWritten set that are only read from via the
gl_LastFragData array but never written to.

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/ir_set_program_inouts.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp 
b/src/compiler/glsl/ir_set_program_inouts.cpp
index 060bea8..a6475b5 100644
--- a/src/compiler/glsl/ir_set_program_inouts.cpp
+++ b/src/compiler/glsl/ir_set_program_inouts.cpp
@@ -137,7 +137,7 @@ mark(struct gl_program *prog, ir_variable *var, int offset, 
int len,
  assert(var->data.mode == ir_var_shader_out);
  if (is_patch_generic)
 prog->PatchOutputsWritten |= bitfield;
- else
+ else if (!var->data.read_only)
 prog->OutputsWritten |= bitfield;
   }
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Define a gl_LastFragData built-in for older GLSL versions.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: 6b33eab959433fdcb4f3fce7c571a83e8050cdf0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b33eab959433fdcb4f3fce7c571a83e8050cdf0

Author: Francisco Jerez 
Date:   Tue Jul 19 20:11:53 2016 -0700

glsl: Define a gl_LastFragData built-in for older GLSL versions.

The EXT_shader_framebuffer_fetch extension defines alternative
language for GLES2 shaders where user-defined fragment outputs are not
allowed.  Instead of using inout user-defined fragment outputs the
shader is expected to read from the gl_LastFragData built-in array.
In addition this allows using the same language on desktop GLSL
versions prior to 4.2 that support the deprecated gl_FragData built-in
in preparation for the MESA_shader_framebuffer_fetch desktop GL
extension.

Both legacy and user-defined inout outputs have a common
representation at the GLSL IR level, so it shouldn't make any
difference for optimization passes and back-ends whether the
application is using gl_LastFragData or user-defined outputs, all
they'll see is a variable dereference of a fragment output at a
certain interface location with the fb_fetch_output bit set to one.

v2: Don't define the built-in variable on GLSL versions for which
gl_FragData exists but is deprecated. (Ken)

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/builtin_variables.cpp | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/src/compiler/glsl/builtin_variables.cpp 
b/src/compiler/glsl/builtin_variables.cpp
index c9d8b1c..cb5f730 100644
--- a/src/compiler/glsl/builtin_variables.cpp
+++ b/src/compiler/glsl/builtin_variables.cpp
@@ -1134,6 +1134,16 @@ builtin_variable_generator::generate_fs_special_vars()
  array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
}
 
+   if (state->has_framebuffer_fetch() && !state->is_version(130, 300)) {
+  ir_variable *const var =
+ add_output(FRAG_RESULT_DATA0,
+array(vec4_t, state->Const.MaxDrawBuffers),
+"gl_LastFragData");
+  var->data.precision = GLSL_PRECISION_MEDIUM;
+  var->data.read_only = 1;
+  var->data.fb_fetch_output = 1;
+   }
+
if (state->es_shader && state->language_version == 100 && 
state->EXT_blend_func_extended_enable) {
   /* We make an assumption here that there will only ever be one 
dual-source draw buffer
* In case this assumption is ever proven to be false, make sure to 
assert here

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): glsl: Add parser state enables for the framebuffer fetch extensions.

2016-08-24 Thread Francisco Jerez

Module: Mesa
Branch: master
Commit: d7cd7b9c49ab01b954702783493fe22cd2bb38f1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d7cd7b9c49ab01b954702783493fe22cd2bb38f1

Author: Francisco Jerez 
Date:   Mon Jul 25 17:24:52 2016 -0700

glsl: Add parser state enables for the framebuffer fetch extensions.

Reviewed-by: Kenneth Graunke 

---

 src/compiler/glsl/glsl_parser_extras.cpp |  1 +
 src/compiler/glsl/glsl_parser_extras.h   | 13 +
 2 files changed, 14 insertions(+)

diff --git a/src/compiler/glsl/glsl_parser_extras.cpp 
b/src/compiler/glsl/glsl_parser_extras.cpp
index 14a5540..a185759 100644
--- a/src/compiler/glsl/glsl_parser_extras.cpp
+++ b/src/compiler/glsl/glsl_parser_extras.cpp
@@ -652,6 +652,7 @@ static const _mesa_glsl_extension 
_mesa_glsl_supported_extensions[] = {
EXT(EXT_clip_cull_distance),
EXT(EXT_gpu_shader5),
EXT(EXT_separate_shader_objects),
+   EXT(EXT_shader_framebuffer_fetch),
EXT(EXT_shader_integer_mix),
EXT(EXT_shader_io_blocks),
EXT(EXT_shader_samples_identical),
diff --git a/src/compiler/glsl/glsl_parser_extras.h 
b/src/compiler/glsl/glsl_parser_extras.h
index 0294ef7..3311688 100644
--- a/src/compiler/glsl/glsl_parser_extras.h
+++ b/src/compiler/glsl/glsl_parser_extras.h
@@ -308,6 +308,13 @@ struct _mesa_glsl_parse_state {
  is_version(450, 0);
}
 
+   bool has_framebuffer_fetch() const
+   {
+  return EXT_shader_framebuffer_fetch_enable ||
+ MESA_shader_framebuffer_fetch_enable ||
+ MESA_shader_framebuffer_fetch_non_coherent_enable;
+   }
+
void process_version_directive(YYLTYPE *locp, int version,
   const char *ident);
 
@@ -696,6 +703,8 @@ struct _mesa_glsl_parse_state {
bool EXT_gpu_shader5_warn;
bool EXT_separate_shader_objects_enable;
bool EXT_separate_shader_objects_warn;
+   bool EXT_shader_framebuffer_fetch_enable;
+   bool EXT_shader_framebuffer_fetch_warn;
bool EXT_shader_integer_mix_enable;
bool EXT_shader_integer_mix_warn;
bool EXT_shader_io_blocks_enable;
@@ -710,6 +719,10 @@ struct _mesa_glsl_parse_state {
bool EXT_texture_array_warn;
bool EXT_texture_buffer_enable;
bool EXT_texture_buffer_warn;
+   bool MESA_shader_framebuffer_fetch_enable;
+   bool MESA_shader_framebuffer_fetch_warn;
+   bool MESA_shader_framebuffer_fetch_non_coherent_enable;
+   bool MESA_shader_framebuffer_fetch_non_coherent_warn;
bool MESA_shader_integer_functions_enable;
bool MESA_shader_integer_functions_warn;
/*@}*/

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

1 2 3 4 5 6 7 >

1 - 100 of 619 matches

Mail list logo