[Mesa-dev] [PATCH 3/3] mesa: Verify memory allocations success in _mesa_PushAttrib

2013-12-02 Thread Juha-Pekka Heikkila
Check if any of the callocs fail and report it with _mesa_error
if needed.

Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
---
 src/mesa/main/attrib.c | 106 -
 1 file changed, 104 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index 2418fb0..037cd64 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -222,6 +222,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_ACCUM_BUFFER_BIT) {
   struct gl_accum_attrib *attr;
   attr = MALLOC_STRUCT( gl_accum_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Accum, sizeof(struct gl_accum_attrib) );
   save_attrib_data(head, GL_ACCUM_BUFFER_BIT, attr);
}
@@ -230,6 +235,11 @@ _mesa_PushAttrib(GLbitfield mask)
   GLuint i;
   struct gl_colorbuffer_attrib *attr;
   attr = MALLOC_STRUCT( gl_colorbuffer_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Color, sizeof(struct gl_colorbuffer_attrib) );
   /* push the Draw FBO's DrawBuffer[] state, not ctx-Color.DrawBuffer[] */
   for (i = 0; i  ctx-Const.MaxDrawBuffers; i ++)
@@ -241,6 +251,11 @@ _mesa_PushAttrib(GLbitfield mask)
   struct gl_current_attrib *attr;
   FLUSH_CURRENT( ctx, 0 );
   attr = MALLOC_STRUCT( gl_current_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Current, sizeof(struct gl_current_attrib) );
   save_attrib_data(head, GL_CURRENT_BIT, attr);
}
@@ -248,6 +263,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_DEPTH_BUFFER_BIT) {
   struct gl_depthbuffer_attrib *attr;
   attr = MALLOC_STRUCT( gl_depthbuffer_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Depth, sizeof(struct gl_depthbuffer_attrib) );
   save_attrib_data(head, GL_DEPTH_BUFFER_BIT, attr);
}
@@ -256,6 +276,11 @@ _mesa_PushAttrib(GLbitfield mask)
   struct gl_enable_attrib *attr;
   GLuint i;
   attr = MALLOC_STRUCT( gl_enable_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   /* Copy enable flags from all other attributes into the enable struct. */
   attr-AlphaTest = ctx-Color.AlphaEnabled;
   attr-AutoNormal = ctx-Eval.AutoNormal;
@@ -331,6 +356,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_EVAL_BIT) {
   struct gl_eval_attrib *attr;
   attr = MALLOC_STRUCT( gl_eval_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Eval, sizeof(struct gl_eval_attrib) );
   save_attrib_data(head, GL_EVAL_BIT, attr);
}
@@ -338,6 +368,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_FOG_BIT) {
   struct gl_fog_attrib *attr;
   attr = MALLOC_STRUCT( gl_fog_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Fog, sizeof(struct gl_fog_attrib) );
   save_attrib_data(head, GL_FOG_BIT, attr);
}
@@ -345,6 +380,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_HINT_BIT) {
   struct gl_hint_attrib *attr;
   attr = MALLOC_STRUCT( gl_hint_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Hint, sizeof(struct gl_hint_attrib) );
   save_attrib_data(head, GL_HINT_BIT, attr);
}
@@ -353,6 +393,11 @@ _mesa_PushAttrib(GLbitfield mask)
   struct gl_light_attrib *attr;
   FLUSH_CURRENT(ctx, 0);   /* flush material changes */
   attr = MALLOC_STRUCT( gl_light_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Light, sizeof(struct gl_light_attrib) );
   save_attrib_data(head, GL_LIGHTING_BIT, attr);
}
@@ -360,6 +405,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_LINE_BIT) {
   struct gl_line_attrib *attr;
   attr = MALLOC_STRUCT( gl_line_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+  }
+
   memcpy( attr, ctx-Line, sizeof(struct gl_line_attrib) );
   save_attrib_data(head, GL_LINE_BIT, attr);
}
@@ -367,6 +417,11 @@ _mesa_PushAttrib(GLbitfield mask)
if (mask  GL_LIST_BIT) {
   struct gl_list_attrib *attr;
   attr = MALLOC_STRUCT( gl_list_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushAttrib);
+ goto end;
+

[Mesa-dev] [PATCH 0/3] Fix Klocwork found issues

2013-12-02 Thread Juha-Pekka Heikkila
Check memory allocations before using them.
I am a bit iffy with this one though in the patches;
--
end:
   if (head != NULL) {
...
}
--
When everything works it works but could not be certain if it work perfect
when something fails. If memory allocations start to return NULL
this attribute stack probably will not be causing majority of problems in 
anycase.


Juha-Pekka Heikkila (3):
  glx: Check malloc return value before accessing memory in
glx/clientattrib.c
  mesa: Verify memory allocations success in _mesa_PushClientAttrib
  mesa: Verify memory allocations success in _mesa_PushAttrib

 src/glx/clientattrib.c |   5 ++
 src/mesa/main/attrib.c | 140 ++---
 2 files changed, 139 insertions(+), 6 deletions(-)

-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] glx: Check malloc return value before accessing memory in glx/clientattrib.c

2013-12-02 Thread Juha-Pekka Heikkila
Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
---
 src/glx/clientattrib.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/glx/clientattrib.c b/src/glx/clientattrib.c
index 1b306ea..a26906f 100644
--- a/src/glx/clientattrib.c
+++ b/src/glx/clientattrib.c
@@ -76,6 +76,11 @@ __indirect_glPushClientAttrib(GLuint mask)
if (spp  gc-attributes.stack[__GL_CLIENT_ATTRIB_STACK_DEPTH]) {
   if (!(sp = *spp)) {
  sp = malloc(sizeof(__GLXattribute));
+
+ if (sp == NULL) {
+__glXSetError(gc, GL_OUT_OF_MEMORY);
+return;
+ }
  *spp = sp;
   }
   sp-mask = mask;
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] mesa: Verify memory allocations success in _mesa_PushClientAttrib

2013-12-02 Thread Juha-Pekka Heikkila
Check if any of the callocs fail and report it with _mesa_error
if needed.

Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
---
 src/mesa/main/attrib.c | 34 ++
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c
index c9332bd..2418fb0 100644
--- a/src/mesa/main/attrib.c
+++ b/src/mesa/main/attrib.c
@@ -1488,6 +1488,12 @@ init_array_attrib_data(struct gl_context *ctx,
 {
/* Get a non driver gl_array_object. */
attrib-ArrayObj = CALLOC_STRUCT( gl_array_object );
+
+   if (attrib-ArrayObj == NULL) {
+  _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushClientAttrib);
+  return;
+   }
+
_mesa_initialize_array_object(ctx, attrib-ArrayObj, 0);
 }
 
@@ -1516,7 +1522,7 @@ _mesa_PushClientAttrib(GLbitfield mask)
GET_CURRENT_CONTEXT(ctx);
 
if (ctx-ClientAttribStackDepth = MAX_CLIENT_ATTRIB_STACK_DEPTH) {
-  _mesa_error( ctx, GL_STACK_OVERFLOW, glPushClientAttrib );
+  _mesa_error(ctx, GL_STACK_OVERFLOW, glPushClientAttrib);
   return;
}
 
@@ -1529,10 +1535,19 @@ _mesa_PushClientAttrib(GLbitfield mask)
   struct gl_pixelstore_attrib *attr;
   /* packing attribs */
   attr = CALLOC_STRUCT( gl_pixelstore_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushClientAttrib);
+ goto end;
+  }
   copy_pixelstore(ctx, attr, ctx-Pack);
   save_attrib_data(head, GL_CLIENT_PACK_BIT, attr);
   /* unpacking attribs */
   attr = CALLOC_STRUCT( gl_pixelstore_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushClientAttrib);
+ goto end;
+  }
+
   copy_pixelstore(ctx, attr, ctx-Unpack);
   save_attrib_data(head, GL_CLIENT_UNPACK_BIT, attr);
}
@@ -1540,13 +1555,24 @@ _mesa_PushClientAttrib(GLbitfield mask)
if (mask  GL_CLIENT_VERTEX_ARRAY_BIT) {
   struct gl_array_attrib *attr;
   attr = CALLOC_STRUCT( gl_array_attrib );
+  if (attr == NULL) {
+ _mesa_error(ctx, GL_OUT_OF_MEMORY, glPushClientAttrib);
+ goto end;
+  }
+
   init_array_attrib_data(ctx, attr);
+  if (attr-ArrayObj == NULL) {
+  goto end;
+  }
+
   save_array_attrib(ctx, attr, ctx-Array);
   save_attrib_data(head, GL_CLIENT_VERTEX_ARRAY_BIT, attr);
}
-
-   ctx-ClientAttribStack[ctx-ClientAttribStackDepth] = head;
-   ctx-ClientAttribStackDepth++;
+end:
+   if (head != NULL) {
+   ctx-ClientAttribStack[ctx-ClientAttribStackDepth] = head;
+   ctx-ClientAttribStackDepth++;
+   }
 }
 
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 00/10] i965: Enable CMS layout for multisample textures

2013-12-02 Thread Kenneth Graunke
On 11/30/2013 03:29 PM, Chris Forbes wrote:
 This series enables the compressed multisample layout for multisample 
 textures.
 Previously we would only use CMS for renderbuffers, since our texelFetch()
 implementation didn't understand it.
 
 Changes from V1:
 - Include fix for Haswell's second copy of the sample mask
 - Actually allocate an MCS surface for textures with this layout
 - Emit the MCS info in surface state for textures
 - Fix swizzling of MCS data in VS

Patches 1-5, 7, and 10 are:
Reviewed-by: Kenneth Graunke kenn...@whitecape.org

The others are probably okay too, but I'd have to read them more
carefully.  It'd be great if someone more familiar with multisampling
could look at this series too.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glx: Add missing null check in gxl/dri2_glx.c

2013-12-02 Thread Juha-Pekka Heikkila
Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com
---
 src/glx/dri2_glx.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/glx/dri2_glx.c b/src/glx/dri2_glx.c
index 3b33312..bfeebed 100644
--- a/src/glx/dri2_glx.c
+++ b/src/glx/dri2_glx.c
@@ -676,6 +676,10 @@ dri2FlushFrontBuffer(__DRIdrawable *driDrawable, void 
*loaderPrivate)
psc = (struct dri2_screen *) pdraw-base.psc;
 
priv = __glXInitialize(psc-base.dpy);
+
+   if (priv == NULL)
+   return;
+
pdp = (struct dri2_display *) priv-dri2Display;
gc = __glXGetCurrentContext();
 
-- 
1.8.1.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 03/10] i965/wm: Set copy of sample mask in 3DSTATE_PS correctly for Haswell

2013-12-02 Thread Paul Berry
On 30 November 2013 15:30, Chris Forbes chr...@ijw.co.nz wrote:

 The bspec says:

 SW must program the sample mask value in this field so that it matches
 with 3DSTATE_SAMPLE_MASK

 I haven't observed this to actually fix anything, but stumbled across it
 while adding the rest of the support for CMS layout for multisample
textures.

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c
 b/src/mesa/drivers/dri/i965/gen7_wm_state.c
 index 65c9bbf..92e880c 100644
 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
 +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
 @@ -173,8 +173,11 @@ upload_ps_state(struct brw_context *brw)
 if (ctx-Shader.CurrentFragmentProgram == NULL)
dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;

 +   /* Haswell requires the sample mask to be set in this packet as well as
 +* in 3DSTATE_SAMPLE_MASK; the values must match, or strange things
 happen. */


In the commit message you say you haven't observed this to fix anything,
but here you say that strange things happen if you don't set it.  Which is
it?

With that discrepancy resolved, this patch is:

Reviewed-by: Paul Berry stereotype...@gmail.com


 +   /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
 if (brw-is_haswell)
 -  dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
 +  dw4 |= SET_FIELD(gen6_determine_sample_mask(brw),
 HSW_PS_SAMPLE_MASK);

 dw4 |= (brw-max_wm_threads - 1)  max_threads_shift;

 @@ -274,7 +277,9 @@ upload_ps_state(struct brw_context *brw)
  const struct brw_tracked_state gen7_ps_state = {
 .dirty = {
.mesa  = (_NEW_PROGRAM_CONSTANTS |
 -   _NEW_COLOR),
 +   _NEW_COLOR |
 +_NEW_BUFFERS |
 +_NEW_MULTISAMPLE),
.brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 BRW_NEW_PS_BINDING_TABLE |
 BRW_NEW_BATCH |
 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72230] New: Unable to extract MesaLib-10.0.0.tar.{gz, bz2} with bsdtar

2013-12-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72230

  Priority: medium
Bug ID: 72230
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Unable to extract MesaLib-10.0.0.tar.{gz,bz2} with
bsdtar
  Severity: major
Classification: Unclassified
OS: Linux (All)
  Reporter: p...@pks.im
  Hardware: All
Status: NEW
   Version: unspecified
 Component: Other
   Product: Mesa

It is currently not possible to extract the archive provided on [1] with bsdtar
due to duplicated files in the archive. This could be caused by misuse of the
-T or -I flags on archive creation probably resulting in corrupted archives
(see [2]). GNU tar provides some logic that handles such corruptions and as a
result doesn't fail extracting the archive.

[1]: ftp://ftp.freedesktop.org/pub/mesa/10.0/
[2]: https://groups.google.com/forum/#!topic/libarchive-discuss/M86idLujRgw

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V3 06/10] i965/Gen7: Include bitfield in the sampler key for CMS layout

2013-12-02 Thread Paul Berry
On 1 December 2013 00:24, Chris Forbes chr...@ijw.co.nz wrote:

 We need to emit extra shader code in this case to sample the
 MCS surface first; we can't just blindly do this all the time
 since IVB will sometimes try to access the MCS surface even if
 disabled.

 V3: Use actual MSAA layout from the texture's mt, rather
 then computing what would have been used based on the format.
 This is simpler and less fragile - there's at least one case where
 we might want to have a texture's MSAA layout change based on what
 the app does (CMS SINT falling back to UMS if the app ever attempts
 to render to it with a channel disabled.)

 This also obsoletes V2's 1/10 -- compute_msaa_layout can now remain
 an implementation detail of the miptree code.

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/brw_program.h | 5 +
  src/mesa/drivers/dri/i965/brw_wm.c  | 8 
  2 files changed, 13 insertions(+)

 diff --git a/src/mesa/drivers/dri/i965/brw_program.h
 b/src/mesa/drivers/dri/i965/brw_program.h
 index 07be4a0..51182ea 100644
 --- a/src/mesa/drivers/dri/i965/brw_program.h
 +++ b/src/mesa/drivers/dri/i965/brw_program.h
 @@ -45,6 +45,11 @@ struct brw_sampler_prog_key_data {
  * For RG32F, gather4's channel select is broken.
  */
 uint16_t gather_channel_quirk_mask;
 +
 +   /**
 +* Whether this sampler uses the compressed multisample surface layout.
 +*/
 +   uint16_t compressed_multisample_layout_mask;
  };

  #ifdef __cplusplus
 diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
 b/src/mesa/drivers/dri/i965/brw_wm.c
 index bc1480c..3977395 100644
 --- a/src/mesa/drivers/dri/i965/brw_wm.c
 +++ b/src/mesa/drivers/dri/i965/brw_wm.c
 @@ -38,6 +38,7 @@
  #include main/samplerobj.h
  #include program/prog_parameter.h
  #include program/program.h
 +#include intel_mipmap_tree.h

  #include glsl/ralloc.h

 @@ -356,6 +357,13 @@ brw_populate_sampler_prog_key_data(struct gl_context
 *ctx,
  if (img-InternalFormat == GL_RG32F)
 key-gather_channel_quirk_mask |= 1  s;
   }
 +
 + /* If this is a multisample sampler, and uses the CMS MSAA
 layout, then
 +  * we need to emit slightly different code to first sample the
 MCS surface.
 +  */
 + if (brw-gen = 7  intel_texture_object((struct
 gl_texture_object *)t)-mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 +key-compressed_multisample_layout_mask |= 1  s;
 + }


We prefer to keep source lines limited to 80 columns when possible.  Maybe
rewrite like this?

 /* If this is a multisample sampler, and uses the CMS MSAA layout,
  * then we need to emit slightly different code to first sample the
  * MCS surface.
  */
 struct intel_texture_object *intel_tex =
intel_texture_object((struct gl_texture_object *)t);
 if (brw-gen = 7 
 intel_tex-mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
key-compressed_multisample_layout_mask |= 1  s;
 }

With that fixed, this patch is:

Reviewed-by: Paul Berry stereotype...@gmail.com


}
 }
  }
 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 09/10] i965/vs: Sample from MCS surface when required

2013-12-02 Thread Paul Berry
On 30 November 2013 15:30, Chris Forbes chr...@ijw.co.nz wrote:

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/brw_vec4.h   |  1 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 42
 +-
  2 files changed, 36 insertions(+), 7 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 5cec9f9..d4029d8 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -477,6 +477,7 @@ public:
 void emit_unpack_half_2x16(dst_reg dst, src_reg src0);

 uint32_t gather_channel(ir_texture *ir, int sampler);
 +   src_reg emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int
 sampler);
 void swizzle_result(ir_texture *ir, src_reg orig_val, int sampler);

 void emit_ndc_computation();
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 index a13eafb..619b386 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 @@ -2215,6 +2215,31 @@ vec4_visitor::visit(ir_call *ir)
 }
  }

 +src_reg
 +vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, int
 sampler)
 +{
 +   vec4_instruction *inst = new(mem_ctx) vec4_instruction(this,
 SHADER_OPCODE_TXF_MCS);
 +   inst-base_mrf = 2;
 +   inst-mlen = 1;
 +   inst-sampler = sampler;
 +   inst-dst = dst_reg(this, glsl_type::uvec4_type);
 +   inst-dst.writemask = WRITEMASK_XYZW;
 +
 +   /* parameters are: u, v, r, lod; lod will always be zero due to api
 restrictions */
 +   int param_base = inst-base_mrf;
 +   int coord_mask = (1  ir-coordinate-type-vector_elements) - 1;
 +   int zero_mask = 0xf  ~coord_mask;
 +
 +   emit(MOV(dst_reg(MRF, param_base, ir-coordinate-type, coord_mask),
 +coordinate));
 +
 +   emit(MOV(dst_reg(MRF, param_base, ir-coordinate-type, zero_mask),
 +src_reg(0)));
 +
 +   emit(inst);
 +   return src_reg(inst-dst);
 +}
 +
  void
  vec4_visitor::visit(ir_texture *ir)
  {
 @@ -2265,7 +2290,7 @@ vec4_visitor::visit(ir_texture *ir)
 }

 const glsl_type *lod_type = NULL, *sample_index_type = NULL;
 -   src_reg lod, dPdx, dPdy, sample_index;
 +   src_reg lod, dPdx, dPdy, sample_index, mcs;
 switch (ir-op) {
 case ir_tex:
lod = src_reg(0.0f);
 @@ -2286,6 +2311,11 @@ vec4_visitor::visit(ir_texture *ir)
ir-lod_info.sample_index-accept(this);
sample_index = this-result;
sample_index_type = ir-lod_info.sample_index-type;
 +
 +  if (brw-gen = 7  key-tex.compressed_multisample_layout_mask 
 (1sampler))
 + mcs = emit_mcs_fetch(ir, coordinate, sampler);
 +  else
 + mcs = src_reg(0u);
break;
 case ir_txd:
ir-lod_info.grad.dPdx-accept(this);
 @@ -2406,13 +2436,11 @@ vec4_visitor::visit(ir_texture *ir)
} else if (ir-op == ir_txf_ms) {
   emit(MOV(dst_reg(MRF, param_base + 1, sample_index_type,
 WRITEMASK_X),
sample_index));
 + if (brw-gen = 7)
 +mcs.swizzle =
 BRW_SWIZZLE4(SWIZZLE_X,SWIZZLE_X,SWIZZLE_X,SWIZZLE_X);
 +emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::uint_type,
 WRITEMASK_Y),
 + mcs));


It would be nice to have a quick comment here explaining that the swizzle
and writemask are needed because ld2dms expects mcs to be in position 5,
which corresponds to the y component of the 2nd vec4.

With that added, this patch is:

Reviewed-by: Paul Berry stereotype...@gmail.com



   inst-mlen++;
 -
 - /* on Gen7, there is an additional MCS parameter here after SI,
 -  * but we don't bother to emit it since it's always zero. If
 -  * we start supporting texturing from CMS surfaces, this will
 have
 -  * to change
 -  */
} else if (ir-op == ir_txd) {
  const glsl_type *type = lod_type;

 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 00/10] i965: Enable CMS layout for multisample textures

2013-12-02 Thread Paul Berry
On 30 November 2013 15:29, Chris Forbes chr...@ijw.co.nz wrote:

 This series enables the compressed multisample layout for multisample
 textures.
 Previously we would only use CMS for renderbuffers, since our texelFetch()
 implementation didn't understand it.

 Changes from V1:
 - Include fix for Haswell's second copy of the sample mask
 - Actually allocate an MCS surface for textures with this layout
 - Emit the MCS info in surface state for textures
 - Fix swizzling of MCS data in VS

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


I sent comments on patches 3, 6, and 9.  The rest are:

Reviewed-by: Paul Berry stereotype...@gmail.com

Thanks for working on this, Chris!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72230] Unable to extract MesaLib-10.0.0.tar.{gz, bz2} with bsdtar

2013-12-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72230

a.ra...@arcor.de changed:

   What|Removed |Added

 CC||a.ra...@arcor.de

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72230] Unable to extract MesaLib-10.0.0.tar.{gz, bz2} with bsdtar

2013-12-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72230

Patrick Steinhardt p...@pks.im changed:

   What|Removed |Added

 CC||p...@pks.im

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 03/10] i965/wm: Set copy of sample mask in 3DSTATE_PS correctly for Haswell

2013-12-02 Thread Chris Forbes
Oops -- when I wrote the comment I was still hoping this would have
some visible effect on the problem I was hunting; never updated it.

On Tue, Dec 3, 2013 at 4:51 AM, Paul Berry stereotype...@gmail.com wrote:
 On 30 November 2013 15:30, Chris Forbes chr...@ijw.co.nz wrote:

 The bspec says:

 SW must program the sample mask value in this field so that it matches
 with 3DSTATE_SAMPLE_MASK

 I haven't observed this to actually fix anything, but stumbled across it
 while adding the rest of the support for CMS layout for multisample
textures.

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c
 b/src/mesa/drivers/dri/i965/gen7_wm_state.c
 index 65c9bbf..92e880c 100644
 --- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
 +++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
 @@ -173,8 +173,11 @@ upload_ps_state(struct brw_context *brw)
 if (ctx-Shader.CurrentFragmentProgram == NULL)
dw2 |= GEN7_PS_FLOATING_POINT_MODE_ALT;

 +   /* Haswell requires the sample mask to be set in this packet as well
 as
 +* in 3DSTATE_SAMPLE_MASK; the values must match, or strange things
 happen. */


 In the commit message you say you haven't observed this to fix anything, but
 here you say that strange things happen if you don't set it.  Which is it?

 With that discrepancy resolved, this patch is:

 Reviewed-by: Paul Berry stereotype...@gmail.com


 +   /* _NEW_BUFFERS, _NEW_MULTISAMPLE */
 if (brw-is_haswell)
 -  dw4 |= SET_FIELD(1, HSW_PS_SAMPLE_MASK); /* 1 sample for now */
 +  dw4 |= SET_FIELD(gen6_determine_sample_mask(brw),
 HSW_PS_SAMPLE_MASK);

 dw4 |= (brw-max_wm_threads - 1)  max_threads_shift;

 @@ -274,7 +277,9 @@ upload_ps_state(struct brw_context *brw)
  const struct brw_tracked_state gen7_ps_state = {
 .dirty = {
.mesa  = (_NEW_PROGRAM_CONSTANTS |
 -   _NEW_COLOR),
 +   _NEW_COLOR |
 +_NEW_BUFFERS |
 +_NEW_MULTISAMPLE),
.brw   = (BRW_NEW_FRAGMENT_PROGRAM |
 BRW_NEW_PS_BINDING_TABLE |
 BRW_NEW_BATCH |
 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V3 06/10] i965/Gen7: Include bitfield in the sampler key for CMS layout

2013-12-02 Thread Chris Forbes
Yes, that's nicer. Will fix.

On Tue, Dec 3, 2013 at 5:04 AM, Paul Berry stereotype...@gmail.com wrote:
 On 1 December 2013 00:24, Chris Forbes chr...@ijw.co.nz wrote:

 We need to emit extra shader code in this case to sample the
 MCS surface first; we can't just blindly do this all the time
 since IVB will sometimes try to access the MCS surface even if
 disabled.

 V3: Use actual MSAA layout from the texture's mt, rather
 then computing what would have been used based on the format.
 This is simpler and less fragile - there's at least one case where
 we might want to have a texture's MSAA layout change based on what
 the app does (CMS SINT falling back to UMS if the app ever attempts
 to render to it with a channel disabled.)

 This also obsoletes V2's 1/10 -- compute_msaa_layout can now remain
 an implementation detail of the miptree code.

 Signed-off-by: Chris Forbes chr...@ijw.co.nz
 ---
  src/mesa/drivers/dri/i965/brw_program.h | 5 +
  src/mesa/drivers/dri/i965/brw_wm.c  | 8 
  2 files changed, 13 insertions(+)

 diff --git a/src/mesa/drivers/dri/i965/brw_program.h
 b/src/mesa/drivers/dri/i965/brw_program.h
 index 07be4a0..51182ea 100644
 --- a/src/mesa/drivers/dri/i965/brw_program.h
 +++ b/src/mesa/drivers/dri/i965/brw_program.h
 @@ -45,6 +45,11 @@ struct brw_sampler_prog_key_data {
  * For RG32F, gather4's channel select is broken.
  */
 uint16_t gather_channel_quirk_mask;
 +
 +   /**
 +* Whether this sampler uses the compressed multisample surface
 layout.
 +*/
 +   uint16_t compressed_multisample_layout_mask;
  };

  #ifdef __cplusplus
 diff --git a/src/mesa/drivers/dri/i965/brw_wm.c
 b/src/mesa/drivers/dri/i965/brw_wm.c
 index bc1480c..3977395 100644
 --- a/src/mesa/drivers/dri/i965/brw_wm.c
 +++ b/src/mesa/drivers/dri/i965/brw_wm.c
 @@ -38,6 +38,7 @@
  #include main/samplerobj.h
  #include program/prog_parameter.h
  #include program/program.h
 +#include intel_mipmap_tree.h

  #include glsl/ralloc.h

 @@ -356,6 +357,13 @@ brw_populate_sampler_prog_key_data(struct gl_context
 *ctx,
  if (img-InternalFormat == GL_RG32F)
 key-gather_channel_quirk_mask |= 1  s;
   }
 +
 + /* If this is a multisample sampler, and uses the CMS MSAA
 layout, then
 +  * we need to emit slightly different code to first sample the
 MCS surface.
 +  */
 + if (brw-gen = 7  intel_texture_object((struct
 gl_texture_object *)t)-mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) {
 +key-compressed_multisample_layout_mask |= 1  s;
 + }


 We prefer to keep source lines limited to 80 columns when possible.  Maybe
 rewrite like this?


  /* If this is a multisample sampler, and uses the CMS MSAA layout,
   * then we need to emit slightly different code to first sample the
   * MCS surface.
   */
  struct intel_texture_object *intel_tex =
 intel_texture_object((struct gl_texture_object *)t);

  if (brw-gen = 7 
  intel_tex-mt-msaa_layout == INTEL_MSAA_LAYOUT_CMS) {

 key-compressed_multisample_layout_mask |= 1  s;
  }

 With that fixed, this patch is:

 Reviewed-by: Paul Berry stereotype...@gmail.com


}
 }
  }
 --
 1.8.4.2

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 00/10] i965: Enable CMS layout for multisample textures

2013-12-02 Thread Chris Forbes
Paul,

You had some patches a while ago that did CMS - UMS conversion, but
ended up not being needed -- if we wanted ot resurrect the conversion
code itself, it would now be straightforward to get CMS for signed
integer surfaces as well, until we see the app try to mask off
channels.

Do you think this is worthwhile?

-- Chris

On Tue, Dec 3, 2013 at 5:23 AM, Paul Berry stereotype...@gmail.com wrote:
 On 30 November 2013 15:29, Chris Forbes chr...@ijw.co.nz wrote:

 This series enables the compressed multisample layout for multisample
 textures.
 Previously we would only use CMS for renderbuffers, since our texelFetch()
 implementation didn't understand it.

 Changes from V1:
 - Include fix for Haswell's second copy of the sample mask
 - Actually allocate an MCS surface for textures with this layout
 - Emit the MCS info in surface state for textures
 - Fix swizzling of MCS data in VS

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


 I sent comments on patches 3, 6, and 9.  The rest are:

 Reviewed-by: Paul Berry stereotype...@gmail.com

 Thanks for working on this, Chris!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH V2 00/10] i965: Enable CMS layout for multisample textures

2013-12-02 Thread Paul Berry
On 2 December 2013 10:01, Chris Forbes chr...@ijw.co.nz wrote:

 Paul,

 You had some patches a while ago that did CMS - UMS conversion, but
 ended up not being needed -- if we wanted ot resurrect the conversion
 code itself, it would now be straightforward to get CMS for signed
 integer surfaces as well, until we see the app try to mask off
 channels.

 Do you think this is worthwhile?


For now, I don't think it's worthwhile.  My guess is that real-world apps
using signed integer multisample surfaces are very rare (perhaps
non-existent), so I'd prefer not to resurrect that hack until we find an
app that needs it.



 -- Chris

 On Tue, Dec 3, 2013 at 5:23 AM, Paul Berry stereotype...@gmail.com
 wrote:
  On 30 November 2013 15:29, Chris Forbes chr...@ijw.co.nz wrote:
 
  This series enables the compressed multisample layout for multisample
  textures.
  Previously we would only use CMS for renderbuffers, since our
 texelFetch()
  implementation didn't understand it.
 
  Changes from V1:
  - Include fix for Haswell's second copy of the sample mask
  - Actually allocate an MCS surface for textures with this layout
  - Emit the MCS info in surface state for textures
  - Fix swizzling of MCS data in VS
 
  ___
  mesa-dev mailing list
  mesa-dev@lists.freedesktop.org
  http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
 
  I sent comments on patches 3, 6, and 9.  The rest are:
 
  Reviewed-by: Paul Berry stereotype...@gmail.com
 
  Thanks for working on this, Chris!

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/15] i965/cfg: Add code to dump blocks and cfg.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 34 ++
 src/mesa/drivers/dri/i965/brw_cfg.h   |  3 +++
 2 files changed, 37 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index e9d2bb8..cfe43d2 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -67,6 +67,19 @@ bblock_t::make_list(void *mem_ctx)
return new(mem_ctx) bblock_link(this);
 }
 
+void
+bblock_t::dump(backend_visitor *v)
+{
+   int ip = this-start_ip;
+   for (backend_instruction *inst = (backend_instruction *)this-start;
+   inst != this-end-next;
+   inst = (backend_instruction *) inst-next) {
+  printf(%5d: , ip);
+  v-dump_instruction(inst);
+  ip++;
+   }
+}
+
 cfg_t::cfg_t(backend_visitor *v)
 {
create(v-mem_ctx, v-instructions);
@@ -261,3 +274,24 @@ cfg_t::make_block_array()
}
assert(i == num_blocks);
 }
+
+void
+cfg_t::dump(backend_visitor *v)
+{
+   for (int b = 0; b  this-num_blocks; b++) {
+bblock_t *block = this-blocks[b];
+  printf(START B%d, b);
+  foreach_list(node, block-parents) {
+ bblock_link *link = (bblock_link *)node;
+ printf( -B%d, link-block-block_num);
+  }
+  printf(\n);
+  block-dump(v);
+  printf(END B%d, b);
+  foreach_list(node, block-children) {
+ bblock_link *link = (bblock_link *)node;
+ printf( -B%d, link-block-block_num);
+  }
+  printf(\n);
+   }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index ec5a3a0..e667d22 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -46,6 +46,7 @@ public:
bblock_t();
 
void add_successor(void *mem_ctx, bblock_t *successor);
+   void dump(backend_visitor *v);
 
backend_instruction *start;
backend_instruction *end;
@@ -72,6 +73,8 @@ public:
void set_next_block(bblock_t *block);
void make_block_array();
 
+   void dump(backend_visitor *v);
+
/** @{
 *
 * Used while generating the block list.
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 03/15] i965/cfg: Rework to make IF ELSE blocks flow into ENDIF.

2013-12-02 Thread Matt Turner
Previously we made the basic block following an ENDIF instruction a
successor of the basic blocks ending with IF and ELSE. The PRM says that
IF and ELSE instructions jump *to* the ENDIF, rather than over it.

This should be immaterial to dataflow analysis, except for if, break,
endif sequences:

   START B1 -B0 -B9
0x0100: cmp.g.f0(8) nullg158,8,1F g40,1,0F
0x0110: (+f0) if(8) 0 0 null0xUD
   END B1 -B2 -B4
   START B2 -B1
   break
0x0120: break(8) 0 0null0D
   END B2 -B10
   START B3
0x0130: endif(8) 2  null0x0002UD
   END B3 -B4

The ENDIF block would have no parents, so dataflow analysis would
generate incorrect results, preventing copy propagation from eliminating
some instructions.

This patch changes the CFG to make ENDIF start rather than end basic
blocks, so that it can be the jump target of the IF and ELSE
instructions.

It helps three programs (including two fs8/fs16 pairs) and hurts a
single fs8/fs16 pair.

total instructions in shared programs: 1561126 - 1561066 (-0.00%)
instructions in affected programs: 983 - 923 (-6.10%)

More importantly, it allows copy propagation to handle more cases.
Disabling the register_coalesce() pass before this patch hurts 58
programs, while afterward it only hurts 11 programs.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 46 +--
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 548b458..83c3c34 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -133,10 +133,7 @@ cfg_t::create(void *parent_mem_ctx, exec_list 
*instructions)
 
 cur_if = cur;
 cur_else = NULL;
-/* Set up the block just after the endif.  Don't know when exactly
- * it will start, yet.
- */
-cur_endif = new_block();
+ cur_endif = NULL;
 
 /* Set up our immediately following block, full of then
  * instructions.
@@ -149,26 +146,49 @@ cfg_t::create(void *parent_mem_ctx, exec_list 
*instructions)
 break;
 
   case BRW_OPCODE_ELSE:
-cur-add_successor(mem_ctx, cur_endif);
+ cur_else = cur;
 
 next = new_block();
 next-start = (backend_instruction *)inst-next;
 cur_if-add_successor(mem_ctx, next);
-cur_else = next;
 
 set_next_block(next);
 break;
 
   case BRW_OPCODE_ENDIF: {
-cur_endif-start = (backend_instruction *)inst-next;
-cur-add_successor(mem_ctx, cur_endif);
-set_next_block(cur_endif);
+ backend_instruction *prev_inst = (backend_instruction *)inst-prev;
+ switch (prev_inst-opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_ELSE:
+ case BRW_OPCODE_WHILE:
+ case BRW_OPCODE_DO:
+ case BRW_OPCODE_BREAK:
+ case BRW_OPCODE_CONTINUE:
+/* New block was just created; use it. */
+cur_endif = cur;
+break;
+ case BRW_OPCODE_ENDIF:
+ default:
+cur_endif = new_block();
+cur_endif-start = inst;
+
+cur-end = prev_inst;
+cur-add_successor(mem_ctx, cur_endif);
+
+ip--;
+set_next_block(cur_endif);
+ip++;
+break;
+ }
 
-if (!cur_else)
-   cur_if-add_successor(mem_ctx, cur_endif);
+ backend_instruction *else_inst = NULL;
+ if (cur_else) {
+else_inst = (backend_instruction *)cur_else-end;
 
- backend_instruction *else_inst = cur_else ?
-(backend_instruction *) cur_else-start-prev : NULL;
+cur_else-add_successor(mem_ctx, cur_endif);
+ } else {
+cur_if-add_successor(mem_ctx, cur_endif);
+ }
 
  assert(cur_if-end-opcode == BRW_OPCODE_IF);
  assert(!else_inst || else_inst-opcode == BRW_OPCODE_ELSE);
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/15] i965/cfg: Throw out confusing make_list method.

2013-12-02 Thread Matt Turner
make_list is just a one-line wrapper and was confusingly called by
NULL objects. E.g., cur_if == NULL; cur_if-make_list(mem_ctx).
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 20 +++-
 src/mesa/drivers/dri/i965/brw_cfg.h   |  2 --
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 613aa80..aa4f1d0 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -60,14 +60,8 @@ bblock_t::bblock_t() :
 void
 bblock_t::add_successor(void *mem_ctx, bblock_t *successor)
 {
-   successor-parents.push_tail(this-make_list(mem_ctx));
-   children.push_tail(successor-make_list(mem_ctx));
-}
-
-bblock_link *
-bblock_t::make_list(void *mem_ctx)
-{
-   return new(mem_ctx) bblock_link(this);
+   successor-parents.push_tail(new(mem_ctx) bblock_link(this));
+   children.push_tail(new(mem_ctx) bblock_link(successor));
 }
 
 void
@@ -126,8 +120,8 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
 /* Push our information onto a stack so we can recover from
  * nested ifs.
  */
-if_stack.push_tail(cur_if-make_list(mem_ctx));
-else_stack.push_tail(cur_else-make_list(mem_ctx));
+if_stack.push_tail(new(mem_ctx) bblock_link(cur_if));
+else_stack.push_tail(new(mem_ctx) bblock_link(cur_else));
 
 cur_if = cur;
 cur_else = NULL;
@@ -215,8 +209,8 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
 /* Push our information onto a stack so we can recover from
  * nested loops.
  */
-do_stack.push_tail(cur_do-make_list(mem_ctx));
-while_stack.push_tail(cur_while-make_list(mem_ctx));
+do_stack.push_tail(new(mem_ctx) bblock_link(cur_do));
+while_stack.push_tail(new(mem_ctx) bblock_link(cur_while));
 
 /* Set up the block just after the while.  Don't know when exactly
  * it will start, yet.
@@ -300,7 +294,7 @@ cfg_t::set_next_block(bblock_t *block)
 
block-start_ip = ip;
block-block_num = num_blocks++;
-   block_list.push_tail(block-make_list(mem_ctx));
+   block_list.push_tail(new(mem_ctx) bblock_link(block));
cur = block;
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index f6b7934..4b87089 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -43,8 +43,6 @@ class bblock_t {
 public:
DECLARE_RALLOC_CXX_OPERATORS(bblock_t)
 
-   bblock_link *make_list(void *mem_ctx);
-
bblock_t();
 
void add_successor(void *mem_ctx, bblock_t *successor);
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/15] i965/cfg: Remove unnecessary endif_stack.

2013-12-02 Thread Matt Turner
Unnecessary since last commit.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 83c3c34..c89767b 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -107,7 +107,7 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
bblock_t *entry = new_block();
bblock_t *cur_if = NULL, *cur_else = NULL, *cur_endif = NULL;
bblock_t *cur_do = NULL, *cur_while = NULL;
-   exec_list if_stack, else_stack, endif_stack, do_stack, while_stack;
+   exec_list if_stack, else_stack, do_stack, while_stack;
bblock_t *next;
 
set_next_block(entry);
@@ -129,7 +129,6 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
  */
 if_stack.push_tail(cur_if-make_list(mem_ctx));
 else_stack.push_tail(cur_else-make_list(mem_ctx));
-endif_stack.push_tail(cur_endif-make_list(mem_ctx));
 
 cur_if = cur;
 cur_else = NULL;
@@ -211,7 +210,6 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
 /* Pop the stack so we're in the previous if/else/endif */
 cur_if = pop_stack(if_stack);
 cur_else = pop_stack(else_stack);
-cur_endif = pop_stack(endif_stack);
 break;
   }
   case BRW_OPCODE_DO:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/15] i965/cfg: Keep pointers to IF/ELSE/ENDIF instructions in the cfg.

2013-12-02 Thread Matt Turner
Useful for finding the associated control flow instructions, given a
block ending in one.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 31 ---
 src/mesa/drivers/dri/i965/brw_cfg.h   | 10 ++
 2 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index cfe43d2..548b458 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -28,7 +28,7 @@
 #include brw_fs.h
 #include brw_cfg.h
 
-/** @file brw_cfg_t.cpp
+/** @file brw_cfg.cpp
  *
  * Walks the shader instructions generated and creates a set of basic
  * blocks with successor/predecessor edges connecting them.
@@ -52,6 +52,10 @@ bblock_t::bblock_t() :
 
parents.make_empty();
children.make_empty();
+
+   if_inst = NULL;
+   else_inst = NULL;
+   endif_inst = NULL;
 }
 
 void
@@ -155,7 +159,7 @@ cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
 set_next_block(next);
 break;
 
-  case BRW_OPCODE_ENDIF:
+  case BRW_OPCODE_ENDIF: {
 cur_endif-start = (backend_instruction *)inst-next;
 cur-add_successor(mem_ctx, cur_endif);
 set_next_block(cur_endif);
@@ -163,12 +167,33 @@ cfg_t::create(void *parent_mem_ctx, exec_list 
*instructions)
 if (!cur_else)
cur_if-add_successor(mem_ctx, cur_endif);
 
+ backend_instruction *else_inst = cur_else ?
+(backend_instruction *) cur_else-start-prev : NULL;
+
+ assert(cur_if-end-opcode == BRW_OPCODE_IF);
+ assert(!else_inst || else_inst-opcode == BRW_OPCODE_ELSE);
+ assert(inst-opcode == BRW_OPCODE_ENDIF);
+
+ cur_if-if_inst = cur_if-end;
+ cur_if-else_inst = else_inst;
+ cur_if-endif_inst = inst;
+
+if (cur_else) {
+cur_else-if_inst = cur_if-end;
+cur_else-else_inst = else_inst;
+cur_else-endif_inst = inst;
+ }
+
+ cur-if_inst = cur_if-end;
+ cur-else_inst = else_inst;
+ cur-endif_inst = inst;
+
 /* Pop the stack so we're in the previous if/else/endif */
 cur_if = pop_stack(if_stack);
 cur_else = pop_stack(else_stack);
 cur_endif = pop_stack(endif_stack);
 break;
-
+  }
   case BRW_OPCODE_DO:
 /* Push our information onto a stack so we can recover from
  * nested loops.
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index e667d22..ad54f86 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -57,6 +57,16 @@ public:
exec_list parents;
exec_list children;
int block_num;
+
+   /* If the current basic block ends in an IF, ELSE, or ENDIF instruction,
+* these pointers will hold the locations of the other associated control
+* flow instructions.
+*
+* Otherwise they are NULL.
+*/
+   backend_instruction *if_inst;
+   backend_instruction *else_inst;
+   backend_instruction *endif_inst;
 };
 
 class cfg_t {
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/15] i965/cfg: Include only needed headers.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 1 -
 src/mesa/drivers/dri/i965/brw_cfg.h   | 4 +++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index c89767b..613aa80 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -25,7 +25,6 @@
  *
  */
 
-#include brw_fs.h
 #include brw_cfg.h
 
 /** @file brw_cfg.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index ad54f86..f6b7934 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -25,7 +25,9 @@
  *
  */
 
-#include brw_fs.h
+#include brw_shader.h
+
+class bblock_t;
 
 class bblock_link : public exec_node {
 public:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/15] i965/cfg: Remove ip cur from brw_cfg.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 33 -
 src/mesa/drivers/dri/i965/brw_cfg.h   | 10 +-
 2 files changed, 17 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index b87a6be..9391b36 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -83,8 +83,9 @@ cfg_t::cfg_t(exec_list *instructions)
block_list.make_empty();
blocks = NULL;
num_blocks = 0;
-   ip = 0;
-   cur = NULL;
+
+   bblock_t *cur = NULL;
+   int ip = 0;
 
bblock_t *entry = new_block();
bblock_t *cur_if = NULL, *cur_else = NULL, *cur_endif = NULL;
@@ -92,7 +93,7 @@ cfg_t::cfg_t(exec_list *instructions)
exec_list if_stack, else_stack, do_stack, while_stack;
bblock_t *next;
 
-   set_next_block(entry);
+   set_next_block(cur, entry, ip);
 
entry-start = (backend_instruction *) instructions-get_head();
 
@@ -123,7 +124,7 @@ cfg_t::cfg_t(exec_list *instructions)
 next-start = (backend_instruction *)inst-next;
 cur_if-add_successor(mem_ctx, next);
 
-set_next_block(next);
+set_next_block(cur, next, ip);
 break;
 
   case BRW_OPCODE_ELSE:
@@ -133,7 +134,7 @@ cfg_t::cfg_t(exec_list *instructions)
 next-start = (backend_instruction *)inst-next;
 cur_if-add_successor(mem_ctx, next);
 
-set_next_block(next);
+set_next_block(cur, next, ip);
 break;
 
   case BRW_OPCODE_ENDIF: {
@@ -156,9 +157,7 @@ cfg_t::cfg_t(exec_list *instructions)
 cur-end = prev_inst;
 cur-add_successor(mem_ctx, cur_endif);
 
-ip--;
-set_next_block(cur_endif);
-ip++;
+set_next_block(cur, cur_endif, ip - 1);
 break;
  }
 
@@ -214,7 +213,7 @@ cfg_t::cfg_t(exec_list *instructions)
 cur-add_successor(mem_ctx, next);
 cur_do = next;
 
-set_next_block(next);
+set_next_block(cur, next, ip);
 break;
 
   case BRW_OPCODE_CONTINUE:
@@ -225,7 +224,7 @@ cfg_t::cfg_t(exec_list *instructions)
 if (inst-predicate)
cur-add_successor(mem_ctx, next);
 
-set_next_block(next);
+set_next_block(cur, next, ip);
 break;
 
   case BRW_OPCODE_BREAK:
@@ -236,14 +235,14 @@ cfg_t::cfg_t(exec_list *instructions)
 if (inst-predicate)
cur-add_successor(mem_ctx, next);
 
-set_next_block(next);
+set_next_block(cur, next, ip);
 break;
 
   case BRW_OPCODE_WHILE:
 cur_while-start = (backend_instruction *)inst-next;
 
 cur-add_successor(mem_ctx, cur_do);
-set_next_block(cur_while);
+set_next_block(cur, cur_while, ip);
 
 /* Pop the stack so we're in the previous loop */
 cur_do = pop_stack(do_stack);
@@ -274,17 +273,17 @@ cfg_t::new_block()
 }
 
 void
-cfg_t::set_next_block(bblock_t *block)
+cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int ip)
 {
-   if (cur) {
-  assert(cur-end-next == block-start);
-  cur-end_ip = ip - 1;
+   if (*cur) {
+  assert((*cur)-end-next == block-start);
+  (*cur)-end_ip = ip - 1;
}
 
block-start_ip = ip;
block-block_num = num_blocks++;
block_list.push_tail(new(mem_ctx) bblock_link(block));
-   cur = block;
+   *cur = block;
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index d0f091f..7bd3e24 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -77,19 +77,11 @@ public:
~cfg_t();
 
bblock_t *new_block();
-   void set_next_block(bblock_t *block);
+   void set_next_block(bblock_t **cur, bblock_t *block, int ip);
void make_block_array();
 
void dump(backend_visitor *v);
 
-   /** @{
-*
-* Used while generating the block list.
-*/
-   bblock_t *cur;
-   int ip;
-   /** @} */
-
void *mem_ctx;
 
/** Ordered list (by ip) of basic blocks */
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/15] i965/fs: Let register_coalesce_2() eliminate self-moves.

2013-12-02 Thread Matt Turner
This is the last thing that register_coalesce() still handled.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index eecde62..168411c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2271,7 +2271,8 @@ fs_visitor::register_coalesce_2()
   int var_from = live_intervals-var_from_reg(inst-src[0]);
   int var_to = live_intervals-var_from_reg(inst-dst);
 
-  if (live_intervals-vars_interfere(var_from, var_to))
+  if (live_intervals-vars_interfere(var_from, var_to) 
+  !inst-dst.equals(inst-src[0]))
  continue;
 
   int reg_from = inst-src[0].reg;
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/15] i965: Allow constant propagation into ASR and BFI1.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp   | 2 ++
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index accd9bd..a1720cd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -344,6 +344,8 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry 
*entry)
  progress = true;
  break;
 
+  case BRW_OPCODE_BFI1:
+  case BRW_OPCODE_ASR:
   case BRW_OPCODE_SHL:
   case BRW_OPCODE_SHR:
   case BRW_OPCODE_ADDC:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index 033c642..e11b07f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -95,6 +95,8 @@ try_constant_propagation(vec4_instruction *inst, int arg, 
src_reg *values[4])
   inst-src[arg] = value;
   return true;
 
+   case BRW_OPCODE_BFI1:
+   case BRW_OPCODE_ASR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_SHR:
case BRW_OPCODE_ADDC:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/15] i965/fs: Remove now useless register_coalesce() pass.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 147 ---
 src/mesa/drivers/dri/i965/brw_fs.h   |   1 -
 2 files changed, 148 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 168411c..ad1323c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2309,152 +2309,6 @@ fs_visitor::register_coalesce_2()
 }
 
 bool
-fs_visitor::register_coalesce()
-{
-   bool progress = false;
-   int if_depth = 0;
-   int loop_depth = 0;
-
-   foreach_list_safe(node, this-instructions) {
-  fs_inst *inst = (fs_inst *)node;
-
-  /* Make sure that we dominate the instructions we're going to
-   * scan for interfering with our coalescing, or we won't have
-   * scanned enough to see if anything interferes with our
-   * coalescing.  We don't dominate the following instructions if
-   * we're in a loop or an if block.
-   */
-  switch (inst-opcode) {
-  case BRW_OPCODE_DO:
-loop_depth++;
-break;
-  case BRW_OPCODE_WHILE:
-loop_depth--;
-break;
-  case BRW_OPCODE_IF:
-if_depth++;
-break;
-  case BRW_OPCODE_ENDIF:
-if_depth--;
-break;
-  default:
-break;
-  }
-  if (loop_depth || if_depth)
-continue;
-
-  if (inst-opcode != BRW_OPCODE_MOV ||
- inst-is_partial_write() ||
- inst-saturate ||
- inst-dst.file != GRF || (inst-src[0].file != GRF 
-   inst-src[0].file != UNIFORM)||
- inst-dst.type != inst-src[0].type)
-continue;
-
-  bool has_source_modifiers = (inst-src[0].abs ||
-   inst-src[0].negate ||
-   inst-src[0].smear != -1 ||
-   inst-src[0].file == UNIFORM);
-
-  /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
-   * them: check for no writes to either one until the exit of the
-   * program.
-   */
-  bool interfered = false;
-
-  for (fs_inst *scan_inst = (fs_inst *)inst-next;
-  !scan_inst-is_tail_sentinel();
-  scan_inst = (fs_inst *)scan_inst-next) {
-if (scan_inst-dst.file == GRF) {
-   if (scan_inst-overwrites_reg(inst-dst) ||
-scan_inst-overwrites_reg(inst-src[0])) {
-  interfered = true;
-  break;
-   }
-}
-
- if (has_source_modifiers) {
-for (int i = 0; i  3; i++) {
-   if (scan_inst-src[i].file == GRF 
-   scan_inst-src[i].reg == inst-dst.reg 
-   scan_inst-src[i].reg_offset == inst-dst.reg_offset 
-   inst-dst.type != scan_inst-src[i].type)
-   {
- interfered = true;
- break;
-   }
-}
- }
-
-
-/* The gen6 MATH instruction can't handle source modifiers or
- * unusual register regions, so avoid coalescing those for
- * now.  We should do something more specific.
- */
-if (has_source_modifiers  !can_do_source_mods(scan_inst)) {
-interfered = true;
-   break;
-}
-
-if (scan_inst-mlen  0  scan_inst-base_mrf == -1 
-scan_inst-src[0].file == GRF 
-scan_inst-src[0].reg == inst-dst.reg) {
-   interfered = true;
-   break;
-}
-
-/* The accumulator result appears to get used for the
- * conditional modifier generation.  When negating a UD
- * value, there is a 33rd bit generated for the sign in the
- * accumulator value, so now you can't check, for example,
- * equality with a 32-bit value.  See piglit fs-op-neg-uint.
- */
-if (scan_inst-conditional_mod 
-inst-src[0].negate 
-inst-src[0].type == BRW_REGISTER_TYPE_UD) {
-   interfered = true;
-   break;
-}
-  }
-  if (interfered) {
-continue;
-  }
-
-  /* Rewrite the later usage to point at the source of the move to
-   * be removed.
-   */
-  for (fs_inst *scan_inst = inst;
-  !scan_inst-is_tail_sentinel();
-  scan_inst = (fs_inst *)scan_inst-next) {
-for (int i = 0; i  3; i++) {
-   if (scan_inst-src[i].file == GRF 
-   scan_inst-src[i].reg == inst-dst.reg 
-   scan_inst-src[i].reg_offset == inst-dst.reg_offset) {
-  fs_reg new_src = inst-src[0];
-   new_src.type = scan_inst-src[i].type;
-   if (scan_inst-src[i].abs) {
-  new_src.negate = 0;
-  new_src.abs = 1;
-   }
-  new_src.negate ^= scan_inst-src[i].negate;
-  new_src.sechalf = scan_inst-src[i].sechalf;
-  scan_inst-src[i] = new_src;
-   }
-}
-  }
-
-  

[Mesa-dev] [PATCH 09/15] i965/cfg: Document cur_* variables.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index 9391b36..eb84640 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -88,8 +88,11 @@ cfg_t::cfg_t(exec_list *instructions)
int ip = 0;
 
bblock_t *entry = new_block();
-   bblock_t *cur_if = NULL, *cur_else = NULL, *cur_endif = NULL;
-   bblock_t *cur_do = NULL, *cur_while = NULL;
+   bblock_t *cur_if = NULL;/** BB ending with IF. */
+   bblock_t *cur_else = NULL;  /** BB ending with ELSE. */
+   bblock_t *cur_endif = NULL; /** BB starting with ENDIF. */
+   bblock_t *cur_do = NULL;/** BB ending with DO. */
+   bblock_t *cur_while = NULL; /** BB immediately following WHILE. */
exec_list if_stack, else_stack, do_stack, while_stack;
bblock_t *next;
 
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/15] i965/fs: Rename register_coalesce_2() - register_coalesce().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 10 +-
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index ad1323c..2d41c3f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2241,12 +2241,12 @@ fs_visitor::dead_code_eliminate_local()
 }
 
 /**
- * Implements a second type of register coalescing: This one checks if
- * the two regs involved in a raw move don't interfere, in which case
- * they can both by stored in the same place and the MOV removed.
+ * Implements register coalescing: Checks if the two registers involved in a
+ * raw move don't interfere, in which case they can both be stored in the same
+ * place and the MOV removed.
  */
 bool
-fs_visitor::register_coalesce_2()
+fs_visitor::register_coalesce()
 {
bool progress = false;
 
@@ -3125,7 +3125,7 @@ fs_visitor::run()
 progress = dead_code_eliminate() || progress;
 progress = dead_code_eliminate_local() || progress;
  progress = dead_control_flow_eliminate(this) || progress;
-progress = register_coalesce_2() || progress;
+ progress = register_coalesce() || progress;
 progress = compute_to_mrf() || progress;
   } while (progress);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 12a7576..be87e59 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -314,7 +314,7 @@ public:
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
  exec_list *acp);
-   bool register_coalesce_2();
+   bool register_coalesce();
bool compute_to_mrf();
bool dead_code_eliminate();
bool dead_code_eliminate_local();
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/15] i965: Allow commuting the operands of ADDC for const propagation.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index a1720cd..fc47d22 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -348,7 +348,6 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry 
*entry)
   case BRW_OPCODE_ASR:
   case BRW_OPCODE_SHL:
   case BRW_OPCODE_SHR:
-  case BRW_OPCODE_ADDC:
   case BRW_OPCODE_SUBB:
  if (i == 1) {
 inst-src[i] = entry-src;
@@ -362,6 +361,7 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry 
*entry)
   case BRW_OPCODE_OR:
   case BRW_OPCODE_AND:
   case BRW_OPCODE_XOR:
+  case BRW_OPCODE_ADDC:
  if (i == 1) {
 inst-src[i] = entry-src;
 progress = true;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
index e11b07f..abb9101 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -99,7 +99,6 @@ try_constant_propagation(vec4_instruction *inst, int arg, 
src_reg *values[4])
case BRW_OPCODE_ASR:
case BRW_OPCODE_SHL:
case BRW_OPCODE_SHR:
-   case BRW_OPCODE_ADDC:
case BRW_OPCODE_SUBB:
   if (arg == 1) {
  inst-src[arg] = value;
@@ -113,6 +112,7 @@ try_constant_propagation(vec4_instruction *inst, int arg, 
src_reg *values[4])
case BRW_OPCODE_OR:
case BRW_OPCODE_AND:
case BRW_OPCODE_XOR:
+   case BRW_OPCODE_ADDC:
   if (arg == 1) {
 inst-src[arg] = value;
 return true;
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/15] glsl: Remove silly OR(..., 0x0) from ldexp() lowering.

2013-12-02 Thread Matt Turner
I translated copysign(0.0f, x) a little too literally.
---
 src/glsl/lower_instructions.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp
index d01879c..8f8d448 100644
--- a/src/glsl/lower_instructions.cpp
+++ b/src/glsl/lower_instructions.cpp
@@ -383,7 +383,6 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression 
*ir)
 
/* Constants */
ir_constant *zeroi = ir_constant::zero(ir, ivec);
-   ir_constant *zerof = ir_constant::zero(ir, ir-type);
 
ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x807fu, 
vec_elem);
ir_constant *sign_mask = new(ir) ir_constant(0x8000u, vec_elem);
@@ -429,8 +428,7 @@ lower_instructions_visitor::ldexp_to_arith(ir_expression 
*ir)
 */
i.insert_before(zero_sign_x);
i.insert_before(assign(zero_sign_x,
-  bitcast_u2f(bit_or(bit_and(bitcast_f2u(x), 
sign_mask),
- bitcast_f2u(zerof);
+  bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask;
 
i.insert_before(is_not_zero_or_underflow);
i.insert_before(assign(is_not_zero_or_underflow,
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/15] i965/cfg: Clean up cfg_t constructors.

2013-12-02 Thread Matt Turner
parent_mem_ctx was unused since db47074a, so remove the two wrappers
around create() and make create() the constructor.
---
 src/mesa/drivers/dri/i965/brw_cfg.cpp | 13 +
 src/mesa/drivers/dri/i965/brw_cfg.h   |  5 +
 src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_cse.cpp  |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp|  2 +-
 src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp|  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_live_variables.cpp |  2 +-
 9 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp 
b/src/mesa/drivers/dri/i965/brw_cfg.cpp
index aa4f1d0..b87a6be 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.cpp
+++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp
@@ -77,18 +77,7 @@ bblock_t::dump(backend_visitor *v)
}
 }
 
-cfg_t::cfg_t(backend_visitor *v)
-{
-   create(v-mem_ctx, v-instructions);
-}
-
-cfg_t::cfg_t(void *mem_ctx, exec_list *instructions)
-{
-   create(mem_ctx, instructions);
-}
-
-void
-cfg_t::create(void *parent_mem_ctx, exec_list *instructions)
+cfg_t::cfg_t(exec_list *instructions)
 {
mem_ctx = ralloc_context(NULL);
block_list.make_empty();
diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
b/src/mesa/drivers/dri/i965/brw_cfg.h
index 4b87089..d0f091f 100644
--- a/src/mesa/drivers/dri/i965/brw_cfg.h
+++ b/src/mesa/drivers/dri/i965/brw_cfg.h
@@ -73,12 +73,9 @@ class cfg_t {
 public:
DECLARE_RALLOC_CXX_OPERATORS(cfg_t)
 
-   cfg_t(backend_visitor *v);
-   cfg_t(void *mem_ctx, exec_list *instructions);
+   cfg_t(exec_list *instructions);
~cfg_t();
 
-   void create(void *mem_ctx, exec_list *instructions);
-
bblock_t *new_block();
void set_next_block(bblock_t *block);
void make_block_array();
diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp 
b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index 8bdf094..ad8ed82 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -39,7 +39,7 @@ dead_control_flow_eliminate(backend_visitor *v)
 {
bool progress = false;
 
-   cfg_t cfg(v);
+   cfg_t cfg(v-instructions);
 
for (int b = 0; b  cfg.num_blocks; b++) {
   bblock_t *block = cfg.blocks[b];
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 26bac94..accd9bd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -525,7 +525,7 @@ fs_visitor::opt_copy_propagate()
 {
bool progress = false;
void *mem_ctx = ralloc_context(this-mem_ctx);
-   cfg_t cfg(this);
+   cfg_t cfg(instructions);
exec_list *out_acp[cfg.num_blocks];
for (int i = 0; i  cfg.num_blocks; i++)
   out_acp[i] = new exec_list [ACP_HASH_SIZE];
diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
index 27541db..d8a5434 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp
@@ -269,7 +269,7 @@ fs_visitor::opt_cse()
 
calculate_live_intervals();
 
-   cfg_t cfg(this);
+   cfg_t cfg(instructions);
 
for (int b = 0; b  cfg.num_blocks; b++) {
   bblock_t *block = cfg.blocks[b];
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6626a8c..8bb6184 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1308,7 +1308,7 @@ fs_generator::generate_code(exec_list *instructions)
 
cfg_t *cfg = NULL;
if (unlikely(INTEL_DEBUG  DEBUG_WM))
-  cfg = new(mem_ctx) cfg_t(mem_ctx, instructions);
+  cfg = new(mem_ctx) cfg_t(instructions);
 
foreach_list(node, instructions) {
   fs_inst *inst = (fs_inst *)node;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 21b2618..fa84c55 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -320,7 +320,7 @@ fs_visitor::calculate_live_intervals()
   virtual_grf_end[i] = -1;
}
 
-   cfg_t cfg(this);
+   cfg_t cfg(instructions);
this-live_intervals = new(mem_ctx) fs_live_variables(this, cfg);
 
/* Merge the per-component live ranges to whole VGRF live ranges. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 73f91a0..5222a67 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -668,7 +668,7 @@ vec4_visitor::opt_set_dependency_control()
vec4_instruction *last_mrf_write[BRW_MAX_GRF];
uint8_t mrf_channels_written[BRW_MAX_GRF];
 

Re: [Mesa-dev] [PATCH 2/2] i965/hsw: Apply non-msrt fast color clear w/a to all HSW GTs

2013-12-02 Thread Chad Versace

On 11/26/2013 07:13 PM, Paul Berry wrote:

On 26 November 2013 17:34, Chad Versace chad.vers...@linux.intel.comwrote:


Pre-patch, the workaround was applied to only HSW GT3. However, the
workaround also fixes render corruption on the HSW GT1 Chromebook,
codenamed Falco.

CC: Anuj Phogat anuj.pho...@gmail.com
CC: Paul Berry stereotype...@gmail.com
OTC-Tracker: CHRMOS-812
Signed-off-by: Chad Versace chad.vers...@linux.intel.com
---
  src/mesa/drivers/dri/i965/brw_blorp_clear.cpp | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
index 63d83d7..2620ce6 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_clear.cpp
@@ -265,7 +265,7 @@ brw_blorp_clear_params::brw_blorp_clear_params(struct
brw_context *brw,
x_align *= 16;
y_align *= 32;

-  if (brw-is_haswell  brw-gt == 3) {
+  if (brw-is_haswell) {



Ok, I'll ask the obvious question: if the bspec says that this extra
rectangle alignment code is needed for IVB, VLVT, and HSW, why are we doing
it for just HSW?

I suspect that in truth, the extra rectangle alignment is only needed for
HSW (This is based in part on the fact that fast clears have been working
fine on IVB for a long time without this bug fix), so the patch will
probably work fine as written.  But the performance cost of applying the
extra alignment to IVB is minuscule, and if it saves us from having to
track down and re-fix this bug one more time, it will be worth it.

On the other hand, there's some appeal to limiting the scope of the bug fix
to just the hardware that's been experiencing problems.


I applied it only to Haswell because...

  1. I did not validate the workaround on Ivybridge. I don't like committing 
code
 that affects older gens unless I've validated it on that gen. Especially
 for a patch in train for the stable branch.

  2. I suspect, as you, that the alignment is needed only on Haswell. 
Accordingly,
 I didn't want to waste time validating it on Ivybridge.


I'll leave it up to you.  Either way, the series is:

Reviewed-by: Paul Berry stereotype...@gmail.com

Note: Personally I'd prefer to see the two patches squashed together, but I
won't be a stickler about that.


I'll squash them.


Oh, one other question: was it a deliberate decision not to mark this as a
candidate for cherry-picking back to the 10.0 and 9.2 branches?  At first
blush it seems worth cherry-picking to me.


I forgot. I'll CC it to stable.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Spec interpretation question: layered framebuffers with mismatched layer counts

2013-12-02 Thread Paul Berry
On 25 November 2013 11:08, Ian Romanick i...@freedesktop.org wrote:

 On 11/22/2013 03:08 PM, Paul Berry wrote:
  The ARB_geometry_shader4 spec says, in the list of conditions necessary
  for framebuffer completeness:
 
* If any framebuffer attachment is layered, all attachments must
 have
  the same layer count.  For three-dimensional textures, the layer
  count
  is the depth of the attached volume.  For cube map textures, the
  layer
  count is always six.  For one- and two-dimensional array
  textures, the
  layer count is simply the number of layers in the array texture.
  { FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB }
 
 
  When geometry shaders were adopted into desktop GL, this condition was
  dropped.  The constant FRAMEBUFFER_INCOMPLETE_LAYER_COUNT doesn't appear
  at all in the desktop GL spec.  Instead, GL 3.2 says (in section 4.4.7
  (Layered Framebuffers)):
 
  When fragments are written to a layered framebuffer, the fragment’s
  layer number selects an image from the array of images at each
  attachment point to use for the stencil test (see section 4.1.5), depth
  buffer test (see section 4.1.6), and for blending and color buffer
  writes (see section 4.1.8). If the fragment’s layer number is negative,
  or greater than the minimum number of layers of any attachment, the
  effects of the fragment on the framebuffer contents are undefined.
 
  (note: presumably where the spec says greater, greater than equal is
  actually intended).
 
  In other words, a framebuffer is allowed to have layers with mismatched
  layer counts, but if so then the client may only reliably render to
  layer numbers that are present in all attachments.
 
  Mesa currently implements the rule in ARB_geometry_shader4.
  Technically, this is a bug, since Mesa is trying to implement geometry
  shaders as specified in GL 3.2, not as specified in ARB_geometry_shader4.
 
  However, there are two mitigating factors:
 
  1. If we follow the GL 3.2 spec, then it's not clear what should happen
  if the client calls glClear() on a framebuffer with mismatched layer
  counts.  For example, if I have a color attachment with 4 layers and a
  color attachment with 2 layers, should all 4 layers of the color
  attachment with 4 layers be cleared, or just the first 2?  Or is it
  undefined?  If we're required to clear all 4 layers, that's going to
  make the Meta implementation of glClear() a lot more clumsy.

 Principle of least surprise suggests that we should do whatever we
 already do for mismatches in other dimensions.  I seem to recall there
 being a fairly recent Khronos bug about that very issue.  I'll try to
 track down the resoultion.

  2. The Nvidia proprietary drivers for Linux (version 313.18) follows the
  ARB_geometry_shader4 rule (returning
  FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB from glCheckFramebufferStatus()),
  even when an OpenGL 3.2+ context is used.

 That is surprising.  If you have a test case, maybe Ken could try it on
 AMD.


Ok, I just tried it on AMD, and what I found was:

- Unlike Nvidia, AMD allows a framebuffer to have different layer counts in
different attachments.

- Calling glClear() on such a framebuffer causes all layers of all
attachments to be cleared, even those layers which aren't present in all
attachments.



  Currently, I'm inclined to leave Mesa as is, and file a spec bug with
  Khronos encouraging them to adopt the ARB_geometry_shader4 text into
  OpenGL.  I'm curious to hear other people's opinions.

 Looking at the spec, I think the change was intentional, but I don't
 recall why.  I'd have to do some archaeology to find the reason.  If we
 poke at some other implementations and they all generate
 FRAMEBUFFER_INCOMPLETE_LAYER_COUNT_ARB, then we should try to get the
 spec changed.  If some implementations don't generate the error, then
 the next step is not as clear to me.


Here's my current plan:

- Change fbo_incomplete() to allow different layer counts in different
attachments.

- Replace gl_framebuffer::NumLayers with MinNumLayers and MaxNumLayers.

- In meta clears, loop from 0 to MaxNumbLayers-1, so that all layers of all
attachments get cleared, even if not all attachments have the same layer
count.*

*Note: strictly speaking, this makes Meta depend on some undefined
behaviour--it depends on the hardware handling writes to undefined layers
independently on different attachments.  But I believe that's what i965
does, and since i965 is the only user of Meta that supports layered
attachments, we should be fine.

Does that seem good to you?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/23] i965: Add non-mutating helper functions to modify the register offset.

2013-12-02 Thread Francisco Jerez
Yes, we could avoid having four copies of essentially the same code by
using templates here.
---
 src/mesa/drivers/dri/i965/brw_fs.h |  8 
 src/mesa/drivers/dri/i965/brw_shader.h |  8 
 src/mesa/drivers/dri/i965/brw_vec4.h   | 16 
 3 files changed, 32 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index f918f7e..c011758 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -93,6 +93,14 @@ public:
 };
 
 static inline fs_reg
+offset(fs_reg reg, unsigned delta)
+{
+   assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
+   reg.reg_offset += delta;
+   return reg;
+}
+
+static inline fs_reg
 byte_offset(fs_reg reg, unsigned delta)
 {
assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index d1357ce..322ef1f 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -80,6 +80,14 @@ public:
} imm;
 };
 
+static inline backend_reg
+offset(backend_reg reg, unsigned delta)
+{
+   assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
+   reg.reg_offset += delta;
+   return reg;
+}
+
 class backend_instruction : public exec_node {
 public:
bool is_tex();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 720cc6e..db1604a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -117,6 +117,14 @@ public:
src_reg *reladdr;
 };
 
+static inline src_reg
+offset(src_reg reg, unsigned delta)
+{
+   assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
+   reg.reg_offset += delta;
+   return reg;
+}
+
 class dst_reg : public backend_reg
 {
 public:
@@ -138,6 +146,14 @@ public:
src_reg *reladdr;
 };
 
+static inline dst_reg
+offset(dst_reg reg, unsigned delta)
+{
+   assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
+   reg.reg_offset += delta;
+   return reg;
+}
+
 dst_reg
 with_writemask(dst_reg const r, int mask);
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/23] i965/fs: Allocate the param_size array dynamically.

2013-12-02 Thread Francisco Jerez
Useful because the total number of uniform components might exceed
MAX_UNIFORMS * 4 in some cases because of the image metadata we'll be
passing as push constants.
---
 src/mesa/drivers/dri/i965/brw_fs.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_wm.c   | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9f4a1be..ad165f0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -432,7 +432,7 @@ public:
struct brw_wm_compile *c;
unsigned int sanity_param_count;
 
-   int param_size[MAX_UNIFORMS * 4];
+   int *param_size;
 
int *virtual_grf_sizes;
int virtual_grf_count;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a045100..a66fe38 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2925,7 +2925,7 @@ fs_visitor::fs_visitor(struct brw_context *brw,
 
this-spilled_any_registers = false;
 
-   memset(this-param_size, 0, sizeof(this-param_size));
+   this-param_size = rzalloc_array(mem_ctx, int, c-prog_data.nr_params);
 }
 
 fs_visitor::~fs_visitor()
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index bc1480c..b745d8f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -165,6 +165,7 @@ bool do_wm_prog(struct brw_context *brw,
param_count += 2 * BRW_MAX_TEX_UNIT;
c-prog_data.param = rzalloc_array(NULL, const float *, param_count);
c-prog_data.pull_param = rzalloc_array(NULL, const float *, param_count);
+   c-prog_data.nr_params = param_count;
 
memcpy(c-key, key, sizeof(*key));
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/23] i965/fs: Remove fs_reg::smear.

2013-12-02 Thread Francisco Jerez
The same effect can be achieved using a combination of ::stride and
::subreg_offset.  Remove the less flexible ::smear to keep the data
members of fs_reg orthogonal.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 23 +-
 src/mesa/drivers/dri/i965/brw_fs.h |  4 +++-
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  4 +---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  4 ++--
 .../drivers/dri/i965/brw_fs_live_variables.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 ++--
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 14 ++---
 7 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e4cee33..e6e32fc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -379,7 +379,6 @@ void
 fs_reg::init()
 {
memset(this, 0, sizeof(*this));
-   this-smear = -1;
stride = 1;
 }
 
@@ -445,7 +444,6 @@ fs_reg::equals(const fs_reg r) const
!reladdr  !r.reladdr 
memcmp(fixed_hw_reg, r.fixed_hw_reg,
   sizeof(fixed_hw_reg)) == 0 
-   smear == r.smear 
stride == r.stride 
imm.u == r.imm.u);
 }
@@ -468,6 +466,15 @@ fs_reg::apply_stride(unsigned stride)
return *this;
 }
 
+fs_reg 
+fs_reg::set_smear(unsigned subreg)
+{
+   assert(file != HW_REG  file != IMM);
+   subreg_offset = subreg * type_sz(type);
+   stride = 0;
+   return *this;
+}
+
 bool
 fs_reg::is_contiguous() const
 {
@@ -545,7 +552,7 @@ fs_visitor::get_timestamp()
 * else that might disrupt timing) by setting smear to 2 and checking if
 * that field is != 0.
 */
-   dst.smear = 0;
+   dst.set_smear(0);
 
return dst;
 }
@@ -580,7 +587,7 @@ fs_visitor::emit_shader_time_end()
 * were the only two timestamp reads that happened).
 */
fs_reg reset = shader_end_time;
-   reset.smear = 2;
+   reset.set_smear(2);
fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
test-conditional_mod = BRW_CONDITIONAL_Z;
emit(IF(BRW_PREDICATE_NORMAL));
@@ -1934,7 +1941,7 @@ fs_visitor::setup_pull_constants()
 inst-src[i].file = GRF;
 inst-src[i].reg = dst.reg;
 inst-src[i].reg_offset = 0;
-inst-src[i].smear = pull_index  3;
+inst-src[i].set_smear(pull_index  3);
   }
}
 }
@@ -2263,8 +2270,7 @@ fs_visitor::register_coalesce_2()
  inst-src[0].file != GRF ||
  inst-src[0].negate ||
  inst-src[0].abs ||
- inst-src[0].smear != -1 ||
-  !inst-src[0].is_contiguous() ||
+ !inst-src[0].is_contiguous() ||
  inst-dst.file != GRF ||
  inst-dst.type != inst-src[0].type ||
  virtual_grf_sizes[inst-src[0].reg] != 1) {
@@ -2356,7 +2362,6 @@ fs_visitor::register_coalesce()
 
   bool has_source_modifiers = (inst-src[0].abs ||
inst-src[0].negate ||
-   inst-src[0].smear != -1 ||
!inst-src[0].is_contiguous() ||
inst-src[0].file == UNIFORM);
 
@@ -2480,7 +2485,7 @@ fs_visitor::compute_to_mrf()
  inst-dst.file != MRF || inst-src[0].file != GRF ||
  inst-dst.type != inst-src[0].type ||
  inst-src[0].abs || inst-src[0].negate ||
-  inst-src[0].smear != -1 || !inst-src[0].is_contiguous() ||
+  !inst-src[0].is_contiguous() ||
   inst-src[0].subreg_offset)
 continue;
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index b0ce812..e36943c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -81,10 +81,12 @@ public:
fs_reg retype(uint32_t type);
fs_reg apply_stride(unsigned stride);
 
+   /** Smear a channel of the reg to all channels. */
+   fs_reg set_smear(unsigned subreg);
+
bool negate;
bool abs;
bool sechalf;
-   int smear; /* -1, or a channel of the reg to smear to all channels. */
int subreg_offset; /** Offset in bytes from the start of the register. */
int stride; /** Register region horizontal stride */
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index 2f2d6b6..6b67d39 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -298,7 +298,7 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
bool has_source_modifiers = entry-src.abs || entry-src.negate;
 
if ((has_source_modifiers || entry-src.file == UNIFORM ||
-entry-src.smear != -1 || !entry-src.is_contiguous()) 
+!entry-src.is_contiguous()) 
!can_do_source_mods(inst))
   return false;
 
@@ -311,8 +311,6 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)

[Mesa-dev] [PATCH 18/23] i965/fs: Remove fs_reg::retype.

2013-12-02 Thread Francisco Jerez
There doesn't seem to be any reason for it to be a method, and it's
surprising that the expression 'reg.retype(t)' doesn't retype its
object but rather it creates a temporary with the new type.  Use
'retype(reg, t)' instead.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp |  8 
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 18 +-
 3 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 6b9c9b5..4408cbe 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -448,14 +448,6 @@ fs_reg::equals(const fs_reg r) const
imm.u == r.imm.u);
 }
 
-fs_reg
-fs_reg::retype(uint32_t type)
-{
-   fs_reg result = *this;
-   result.type = type;
-   return result;
-}
-
 fs_reg 
 fs_reg::apply_stride(unsigned stride)
 {
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index f62da44..4f9767c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -78,7 +78,7 @@ public:
bool equals(const fs_reg r) const;
bool is_valid_3src() const;
bool is_contiguous() const;
-   fs_reg retype(uint32_t type);
+
fs_reg apply_stride(unsigned stride);
 
/** Smear a channel of the reg to all channels. */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 5693dbc..f45f089 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1295,24 +1295,24 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg 
dst, fs_reg coordinate,
   break;
}
case ir_txs:
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), lod));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod));
   next.reg_offset++;
   break;
case ir_query_levels:
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
   next.reg_offset++;
   break;
case ir_txf:
   /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
   coordinate.reg_offset++;
   next.reg_offset++;
 
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_D), lod));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod));
   next.reg_offset++;
 
   for (int i = 1; i  ir-coordinate-type-vector_elements; i++) {
-emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
+emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
 coordinate.reg_offset++;
 next.reg_offset++;
   }
@@ -1320,21 +1320,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg 
dst, fs_reg coordinate,
   coordinate_done = true;
   break;
case ir_txf_ms:
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), sample_index));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index));
   next.reg_offset++;
 
   /* constant zero MCS; we arrange to never actually have a compressed
* multisample surface here for now. TODO: issue ld_mcs to get this 
first,
* if we ever support texturing from compressed multisample surfaces
*/
-  emit(MOV(next.retype(BRW_REGISTER_TYPE_UD), fs_reg(0u)));
+  emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
   next.reg_offset++;
 
   /* there is no offsetting for this message; just copy in the integer
* texture coordinates
*/
   for (int i = 0; i  ir-coordinate-type-vector_elements; i++) {
- emit(MOV(next.retype(BRW_REGISTER_TYPE_D), coordinate));
+ emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
  coordinate.reg_offset++;
  next.reg_offset++;
   }
@@ -1357,7 +1357,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
  }
 
  for (int i = 0; i  2; i++) { /* offu, offv */
-emit(MOV(next.retype(BRW_REGISTER_TYPE_D), offset_value));
+emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value));
 offset_value.reg_offset++;
 next.reg_offset++;
  }
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/23] i965/fs: Add support for specifying register horizontal strides.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 29 +++---
 src/mesa/drivers/dri/i965/brw_fs.h |  3 +++
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  7 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  8 +++---
 .../drivers/dri/i965/brw_fs_live_variables.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  |  4 +--
 6 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 0caae2d..e4cee33 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -380,6 +380,7 @@ fs_reg::init()
 {
memset(this, 0, sizeof(*this));
this-smear = -1;
+   stride = 1;
 }
 
 /** Generic unset register constructor. */
@@ -445,6 +446,7 @@ fs_reg::equals(const fs_reg r) const
memcmp(fixed_hw_reg, r.fixed_hw_reg,
   sizeof(fixed_hw_reg)) == 0 
smear == r.smear 
+   stride == r.stride 
imm.u == r.imm.u);
 }
 
@@ -456,6 +458,22 @@ fs_reg::retype(uint32_t type)
return result;
 }
 
+fs_reg 
+fs_reg::apply_stride(unsigned stride)
+{
+   assert((this-stride * stride) = 4 
+  is_power_of_two(stride) 
+  file != HW_REG  file != IMM);
+   this-stride *= stride;
+   return *this;
+}
+
+bool
+fs_reg::is_contiguous() const
+{
+   return stride == 1;
+}
+
 bool
 fs_reg::is_valid_3src() const
 {
@@ -686,7 +704,7 @@ fs_inst::is_partial_write()
 {
return ((this-predicate  this-opcode != BRW_OPCODE_SEL) ||
this-force_uncompressed ||
-   this-force_sechalf);
+   this-force_sechalf || !this-dst.is_contiguous());
 }
 
 int
@@ -2246,6 +2264,7 @@ fs_visitor::register_coalesce_2()
  inst-src[0].negate ||
  inst-src[0].abs ||
  inst-src[0].smear != -1 ||
+  !inst-src[0].is_contiguous() ||
  inst-dst.file != GRF ||
  inst-dst.type != inst-src[0].type ||
  virtual_grf_sizes[inst-src[0].reg] != 1) {
@@ -2338,6 +2357,7 @@ fs_visitor::register_coalesce()
   bool has_source_modifiers = (inst-src[0].abs ||
inst-src[0].negate ||
inst-src[0].smear != -1 ||
+   !inst-src[0].is_contiguous() ||
inst-src[0].file == UNIFORM);
 
   /* Found a move of a GRF to a GRF.  Let's see if we can coalesce
@@ -2422,7 +2442,9 @@ fs_visitor::register_coalesce()
}
   new_src.negate ^= scan_inst-src[i].negate;
   new_src.sechalf = scan_inst-src[i].sechalf;
-   new_src.subreg_offset += scan_inst-src[i].subreg_offset;
+   new_src.subreg_offset +=
+  scan_inst-src[i].subreg_offset * new_src.stride;
+   new_src.stride *= scan_inst-src[i].stride;
   scan_inst-src[i] = new_src;
}
 }
@@ -2458,7 +2480,8 @@ fs_visitor::compute_to_mrf()
  inst-dst.file != MRF || inst-src[0].file != GRF ||
  inst-dst.type != inst-src[0].type ||
  inst-src[0].abs || inst-src[0].negate ||
-  inst-src[0].smear != -1 || inst-src[0].subreg_offset)
+  inst-src[0].smear != -1 || !inst-src[0].is_contiguous() ||
+  inst-src[0].subreg_offset)
 continue;
 
   /* Work out which hardware MRF registers are written by this
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 93a393d..b0ce812 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -77,13 +77,16 @@ public:
 
bool equals(const fs_reg r) const;
bool is_valid_3src() const;
+   bool is_contiguous() const;
fs_reg retype(uint32_t type);
+   fs_reg apply_stride(unsigned stride);
 
bool negate;
bool abs;
bool sechalf;
int smear; /* -1, or a channel of the reg to smear to all channels. */
int subreg_offset; /** Offset in bytes from the start of the register. */
+   int stride; /** Register region horizontal stride */
 
fs_reg *reladdr;
 };
diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
index f3f44c6..2f2d6b6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -298,7 +298,11 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, 
acp_entry *entry)
bool has_source_modifiers = entry-src.abs || entry-src.negate;
 
if ((has_source_modifiers || entry-src.file == UNIFORM ||
-entry-src.smear != -1)  !can_do_source_mods(inst))
+entry-src.smear != -1 || !entry-src.is_contiguous()) 
+   !can_do_source_mods(inst))
+  return false;
+
+   if (entry-src.stride * inst-src[arg].stride  4)
   return false;
 
if (has_source_modifiers  entry-dst.type != inst-src[arg].type)
@@ -310,6 +314,7 

[Mesa-dev] [PATCH 05/23] i965: Unify fs_generator:: and vec4_generator::mark_surface_used as a free function.

2013-12-02 Thread Francisco Jerez
This way it can be used anywhere.  I need it from the visitor.
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  2 --
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 28 +---
 src/mesa/drivers/dri/i965/brw_program.c  | 10 +
 src/mesa/drivers/dri/i965/brw_program.h  |  4 
 src/mesa/drivers/dri/i965/brw_shader.cpp |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4.h |  2 --
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 22 ++-
 7 files changed, 32 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index ad165f0..f8fb1c5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -596,8 +596,6 @@ private:
   struct brw_reg dst,
   struct brw_reg surf_index);
 
-   void mark_surface_used(unsigned surf_index);
-
void patch_discard_jumps_to_fb_writes();
 
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 6626a8c..b5882a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -59,15 +59,6 @@ fs_generator::~fs_generator()
 }
 
 void
-fs_generator::mark_surface_used(unsigned surf_index)
-{
-   assert(surf_index  BRW_MAX_SURFACES);
-
-   c-prog_data.base.binding_table.size_bytes =
-  MAX2(c-prog_data.base.binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
-void
 fs_generator::patch_discard_jumps_to_fb_writes()
 {
if (brw-gen  6 || this-discard_halt_patches.is_empty())
@@ -187,7 +178,7 @@ fs_generator::generate_fb_write(fs_inst *inst)
eot,
inst-header_present);
 
-   mark_surface_used(surf_index);
+   brw_mark_surface_used(c-prog_data.base, surf_index);
 }
 
 /* Computes the integer pixel x,y values from the origin.
@@ -582,7 +573,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
  simd_mode,
  return_format);
 
-   mark_surface_used(surface_index);
+   brw_mark_surface_used(c-prog_data.base, surface_index);
 }
 
 
@@ -794,7 +785,7 @@ fs_generator::generate_uniform_pull_constant_load(fs_inst 
*inst,
brw_oword_block_read(p, dst, brw_message_reg(inst-base_mrf),
read_offset, surf_index);
 
-   mark_surface_used(surf_index);
+   brw_mark_surface_used(c-prog_data.base, surf_index);
 }
 
 void
@@ -836,7 +827,7 @@ 
fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
 
-   mark_surface_used(surf_index);
+   brw_mark_surface_used(c-prog_data.base, surf_index);
 }
 
 void
@@ -903,7 +894,7 @@ fs_generator::generate_varying_pull_constant_load(fs_inst 
*inst,
simd_mode,
return_format);
 
-   mark_surface_used(surf_index);
+   brw_mark_surface_used(c-prog_data.base, surf_index);
 }
 
 void
@@ -947,7 +938,7 @@ 
fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst,
simd_mode,
0);
 
-   mark_surface_used(surf_index);
+   brw_mark_surface_used(c-prog_data.base, surf_index);
 }
 
 /**
@@ -1255,7 +1246,8 @@ fs_generator::generate_shader_time_add(fs_inst *inst,
c-prog_data.base.binding_table.shader_time_start);
brw_pop_insn_state(p);
 
-   mark_surface_used(c-prog_data.base.binding_table.shader_time_start);
+   brw_mark_surface_used(c-prog_data.base,
+ c-prog_data.base.binding_table.shader_time_start);
 }
 
 void
@@ -1272,7 +1264,7 @@ fs_generator::generate_untyped_atomic(fs_inst *inst, 
struct brw_reg dst,
   atomic_op.dw1.ud, surf_index.dw1.ud,
   inst-mlen, dispatch_width / 8);
 
-   mark_surface_used(surf_index.dw1.ud);
+   brw_mark_surface_used(c-prog_data.base, surf_index.dw1.ud);
 }
 
 void
@@ -1286,7 +1278,7 @@ fs_generator::generate_untyped_surface_read(fs_inst 
*inst, struct brw_reg dst,
 surf_index.dw1.ud,
 inst-mlen, dispatch_width / 8);
 
-   mark_surface_used(surf_index.dw1.ud);
+   brw_mark_surface_used(c-prog_data.base, surf_index.dw1.ud);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index a494bc2..908782b 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -526,6 +526,16 @@ brw_destroy_shader_time(struct brw_context *brw)
brw-shader_time.bo = NULL;
 }
 
+void
+brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
+  unsigned surf_index)
+{
+   assert(surf_index  BRW_MAX_SURFACES);
+
+   prog_data-binding_table.size_bytes =
+  

[Mesa-dev] [PATCH 10/23] i965/fs: Remove fs_reg::sechalf.

2013-12-02 Thread Francisco Jerez
The same effect can be achieved using ::subreg_offset.  Remove the
less flexible alternative and define a convenience function to keep
the fs_reg interface sane.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   |  9 ++---
 src/mesa/drivers/dri/i965/brw_fs.h | 11 ++-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  2 --
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   |  4 +---
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e6e32fc..6b9c9b5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1186,22 +1186,18 @@ fs_visitor::emit_samplepos_setup(ir_variable *ir)
 
emit(MOV(int_sample_x, fs_reg(sample_pos_reg)));
if (dispatch_width == 16) {
-  int_sample_x.sechalf = true;
-  fs_inst *inst = emit(MOV(int_sample_x,
+  fs_inst *inst = emit(MOV(half(int_sample_x, 1),
fs_reg(suboffset(sample_pos_reg, 16;
   inst-force_sechalf = true;
-  int_sample_x.sechalf = false;
}
/* Compute gl_SamplePosition.x */
compute_sample_position(pos, int_sample_x);
pos.reg_offset++;
emit(MOV(int_sample_y, fs_reg(suboffset(sample_pos_reg, 1;
if (dispatch_width == 16) {
-  int_sample_y.sechalf = true;
-  fs_inst *inst = emit(MOV(int_sample_y,
+  fs_inst *inst = emit(MOV(half(int_sample_y, 1),
fs_reg(suboffset(sample_pos_reg, 17;
   inst-force_sechalf = true;
-  int_sample_y.sechalf = false;
}
/* Compute gl_SamplePosition.y */
compute_sample_position(pos, int_sample_y);
@@ -2446,7 +2442,6 @@ fs_visitor::register_coalesce()
   new_src.abs = 1;
}
   new_src.negate ^= scan_inst-src[i].negate;
-  new_src.sechalf = scan_inst-src[i].sechalf;
new_src.subreg_offset +=
   scan_inst-src[i].subreg_offset * new_src.stride;
new_src.stride *= scan_inst-src[i].stride;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index e36943c..2c36d9f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -86,7 +86,6 @@ public:
 
bool negate;
bool abs;
-   bool sechalf;
int subreg_offset; /** Offset in bytes from the start of the register. */
int stride; /** Register region horizontal stride */
 
@@ -101,6 +100,16 @@ byte_offset(fs_reg reg, unsigned delta)
return reg;
 }
 
+/**
+ * Get either of the 8-component halves of a 16-component register.
+ */
+static inline fs_reg
+half(const fs_reg reg, unsigned idx)
+{
+   assert(idx == 0 || (reg.file != HW_REG  reg.file != IMM));
+   return byte_offset(reg, 8 * idx * reg.stride * type_sz(reg.type));
+}
+
 static const fs_reg reg_undef;
 static const fs_reg reg_null_f(retype(brw_null_reg(), BRW_REGISTER_TYPE_F));
 static const fs_reg reg_null_d(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 3972fda..8d310a1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -996,8 +996,6 @@ brw_reg_from_fs_reg(fs_reg *reg)
   }
 
   brw_reg = retype(brw_reg, reg-type);
-  if (reg-sechalf)
-brw_reg = sechalf(brw_reg);
   brw_reg = byte_offset(brw_reg, reg-subreg_offset);
   break;
case IMM:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8a76094..5693dbc 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2598,12 +2598,10 @@ fs_visitor::emit_color_write(int target, int index, int 
first_color_mrf)
 inst-saturate = c-key.clamp_fragment_color;
 pop_force_uncompressed();
 
-color.sechalf = true;
 inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
- color));
+ half(color, 1)));
 inst-force_sechalf = true;
 inst-saturate = c-key.clamp_fragment_color;
-color.sechalf = false;
   }
}
 }
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/23] i965/fs: Use a separate variable to keep track of the last uniform index seen.

2013-12-02 Thread Francisco Jerez
Like the VEC4 back-end does.  It will make dynamic allocation of the
param_size array easier in a future commit.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 51 +---
 src/mesa/drivers/dri/i965/brw_fs.h   |  3 ++
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp  |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 11 +++---
 4 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index eecde62..bc66e4a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -883,7 +883,7 @@ fs_visitor::setup_uniform_values(ir_variable *ir)
 * order we'd walk the type, so walk the list of storage and find anything
 * with our name, or the prefix of a component that starts with our name.
 */
-   unsigned params_before = c-prog_data.nr_params;
+   unsigned params_before = uniforms;
for (unsigned u = 0; u  shader_prog-NumUserUniformStorage; u++) {
   struct gl_uniform_storage *storage = shader_prog-UniformStorage[u];
 
@@ -899,14 +899,13 @@ fs_visitor::setup_uniform_values(ir_variable *ir)
  slots *= storage-array_elements;
 
   for (unsigned i = 0; i  slots; i++) {
- c-prog_data.param[c-prog_data.nr_params++] =
+ c-prog_data.param[uniforms++] =
 storage-storage[i].f;
   }
}
 
/* Make sure we actually initialized the right amount of stuff here. */
-   assert(params_before + ir-type-component_slots() ==
-  c-prog_data.nr_params);
+   assert(params_before + ir-type-component_slots() == uniforms);
(void)params_before;
 }
 
@@ -939,7 +938,7 @@ fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
break;
 last_swiz = swiz;
 
-c-prog_data.param[c-prog_data.nr_params++] =
+c-prog_data.param[uniforms++] =
 fp-Base.Parameters-ParameterValues[index][swiz].f;
   }
}
@@ -1368,13 +1367,17 @@ fs_visitor::emit_math(enum opcode opcode, fs_reg dst, 
fs_reg src0, fs_reg src1)
 void
 fs_visitor::assign_curb_setup()
 {
-   c-prog_data.curb_read_length = ALIGN(c-prog_data.nr_params, 8) / 8;
if (dispatch_width == 8) {
   c-prog_data.first_curbe_grf = c-nr_payload_regs;
+  c-prog_data.nr_params = uniforms;
} else {
   c-prog_data.first_curbe_grf_16 = c-nr_payload_regs;
+  /* Make sure we didn't try to sneak in an extra uniform */
+  assert(uniforms == 0);
}
 
+   c-prog_data.curb_read_length = ALIGN(c-prog_data.nr_params, 8) / 8;
+
/* Map the offsets in the UNIFORM file to fixed HW regs. */
foreach_list(node, this-instructions) {
   fs_inst *inst = (fs_inst *)node;
@@ -1695,10 +1698,10 @@ bool
 fs_visitor::remove_dead_constants()
 {
if (dispatch_width == 8) {
-  this-params_remap = ralloc_array(mem_ctx, int, c-prog_data.nr_params);
-  this-nr_params_remap = c-prog_data.nr_params;
+  this-params_remap = ralloc_array(mem_ctx, int, uniforms);
+  this-nr_params_remap = uniforms;
 
-  for (unsigned int i = 0; i  c-prog_data.nr_params; i++)
+  for (unsigned int i = 0; i  uniforms; i++)
 this-params_remap[i] = -1;
 
   /* Find which params are still in use. */
@@ -1716,7 +1719,7 @@ fs_visitor::remove_dead_constants()
 * Out-of-bounds reads return undefined values, which include
 * values from other variables of the active program or zero.
 */
-   if (constant_nr  0 || constant_nr = (int)c-prog_data.nr_params) {
+   if (constant_nr  0 || constant_nr = (int)uniforms) {
   constant_nr = 0;
}
 
@@ -1734,14 +1737,14 @@ fs_visitor::remove_dead_constants()
* now we don't care.
*/
   unsigned int new_nr_params = 0;
-  for (unsigned int i = 0; i  c-prog_data.nr_params; i++) {
+  for (unsigned int i = 0; i  uniforms; i++) {
 if (this-params_remap[i] != -1) {
this-params_remap[i] = new_nr_params++;
 }
   }
 
   /* Update the list of params to be uploaded to match our new numbering. 
*/
-  for (unsigned int i = 0; i  c-prog_data.nr_params; i++) {
+  for (unsigned int i = 0; i  uniforms; i++) {
 int remapped = this-params_remap[i];
 
 if (remapped == -1)
@@ -1750,7 +1753,7 @@ fs_visitor::remove_dead_constants()
 c-prog_data.param[remapped] = c-prog_data.param[i];
   }
 
-  c-prog_data.nr_params = new_nr_params;
+  uniforms = new_nr_params;
} else {
   /* This should have been generated in the 8-wide pass already. */
   assert(this-params_remap);
@@ -1794,9 +1797,9 @@ fs_visitor::remove_dead_constants()
 void
 fs_visitor::move_uniform_array_access_to_pull_constants()
 {
-   int pull_constant_loc[c-prog_data.nr_params];
+   int pull_constant_loc[uniforms];
 
-   for (unsigned int i = 0; i  c-prog_data.nr_params; i++) {
+   for (unsigned int i = 0; i  uniforms; i++) {
   

[Mesa-dev] [PATCH 07/23] i965/fs: Add support for sub-register byte offsets to the FS back-end IR.

2013-12-02 Thread Francisco Jerez
It would be nice if we could have a single 'reg_offset' field
expressed in bytes that would serve the purpose of both, but the
semantics of 'reg_offset' are quite complex currently (it's measured
in units of one, eight or sixteen dwords depending on the register
file and the dispatch width) and changing it to bytes would be a very
intrusive change at this stage.  Add a separate 'subreg_offset' field
for now.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 26 ++
 src/mesa/drivers/dri/i965/brw_fs.h |  9 
 .../drivers/dri/i965/brw_fs_copy_propagation.cpp   |  7 --
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp  | 20 -
 5 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 761e7e6..0caae2d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -437,6 +437,7 @@ fs_reg::equals(const fs_reg r) const
return (file == r.file 
reg == r.reg 
reg_offset == r.reg_offset 
+   subreg_offset == r.subreg_offset 
type == r.type 
negate == r.negate 
abs == r.abs 
@@ -1368,7 +1369,9 @@ fs_visitor::assign_curb_setup()
  constant_nr % 8);
 
inst-src[i].file = HW_REG;
-   inst-src[i].fixed_hw_reg = retype(brw_reg, inst-src[i].type);
+   inst-src[i].fixed_hw_reg = byte_offset(
+   retype(brw_reg, inst-src[i].type),
+   inst-src[i].subreg_offset);
 }
   }
}
@@ -2328,7 +2331,8 @@ fs_visitor::register_coalesce()
  inst-saturate ||
  inst-dst.file != GRF || (inst-src[0].file != GRF 
inst-src[0].file != UNIFORM)||
- inst-dst.type != inst-src[0].type)
+ inst-dst.type != inst-src[0].type ||
+  inst-dst.subreg_offset != 0)
 continue;
 
   bool has_source_modifiers = (inst-src[0].abs ||
@@ -2418,6 +2422,7 @@ fs_visitor::register_coalesce()
}
   new_src.negate ^= scan_inst-src[i].negate;
   new_src.sechalf = scan_inst-src[i].sechalf;
+   new_src.subreg_offset += scan_inst-src[i].subreg_offset;
   scan_inst-src[i] = new_src;
}
 }
@@ -2452,7 +2457,8 @@ fs_visitor::compute_to_mrf()
  inst-is_partial_write() ||
  inst-dst.file != MRF || inst-src[0].file != GRF ||
  inst-dst.type != inst-src[0].type ||
- inst-src[0].abs || inst-src[0].negate || inst-src[0].smear != -1)
+ inst-src[0].abs || inst-src[0].negate ||
+  inst-src[0].smear != -1 || inst-src[0].subreg_offset)
 continue;
 
   /* Work out which hardware MRF registers are written by this
@@ -2971,8 +2977,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst)
switch (inst-dst.file) {
case GRF:
   printf(vgrf%d, inst-dst.reg);
-  if (inst-dst.reg_offset)
- printf(+%d, inst-dst.reg_offset);
+  if (inst-dst.reg_offset || inst-dst.subreg_offset)
+ printf(+%d.%d, inst-dst.reg_offset, inst-dst.subreg_offset);
   break;
case MRF:
   printf(m%d, inst-dst.reg);
@@ -3002,16 +3008,18 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst)
   switch (inst-src[i].file) {
   case GRF:
  printf(vgrf%d, inst-src[i].reg);
- if (inst-src[i].reg_offset)
-printf(+%d, inst-src[i].reg_offset);
+ if (inst-src[i].reg_offset || inst-src[i].subreg_offset)
+printf(+%d.%d, inst-src[i].reg_offset,
+   inst-src[i].subreg_offset);
  break;
   case MRF:
  printf(***m%d***, inst-src[i].reg);
  break;
   case UNIFORM:
  printf(u%d, inst-src[i].reg);
- if (inst-src[i].reg_offset)
-printf(.%d, inst-src[i].reg_offset);
+ if (inst-src[i].reg_offset || inst-src[i].subreg_offset)
+printf(+%d.%d, inst-src[i].reg_offset,
+   inst-src[i].subreg_offset);
  break;
   case BAD_FILE:
  printf((null));
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index dfd3b07..93a393d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -83,10 +83,19 @@ public:
bool abs;
bool sechalf;
int smear; /* -1, or a channel of the reg to smear to all channels. */
+   int subreg_offset; /** Offset in bytes from the start of the register. */
 
fs_reg *reladdr;
 };
 
+static inline fs_reg
+byte_offset(fs_reg reg, unsigned delta)
+{
+   assert(delta == 0 || (reg.file != HW_REG  reg.file != IMM));
+   reg.subreg_offset += delta;
+   return reg;
+}
+
 static const fs_reg reg_undef;
 static const fs_reg reg_null_f(retype(brw_null_reg(), 

[Mesa-dev] [PATCH 03/23] i965/vec4: Add constructor of src_reg from a fixed hardware reg.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 8 
 src/mesa/drivers/dri/i965/brw_vec4.h   | 1 +
 2 files changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 73f91a0..4458e76 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -112,6 +112,14 @@ src_reg::src_reg(int32_t i)
this-imm.i = i;
 }
 
+src_reg::src_reg(struct brw_reg reg)
+{
+   init();
+
+   this-file = HW_REG;
+   this-fixed_hw_reg = reg;
+}
+
 src_reg::src_reg(dst_reg reg)
 {
init();
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5cec9f9..4b1e86a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -125,6 +125,7 @@ public:
src_reg(float f);
src_reg(uint32_t u);
src_reg(int32_t i);
+   src_reg(struct brw_reg reg);
 
bool equals(src_reg *r);
bool is_zero() const;
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/23] i965/vec4: Fix confusion between SWIZZLE and BRW_SWIZZLE macros.

2013-12-02 Thread Francisco Jerez
Most of the VEC4 back-end agrees on src_reg::swizzle being one of the
BRW_SWIZZLE macros defined in brw_reg.h, except in two places where we
use Mesa's SWIZZLE macros.  There is even a doxygen comment saying
that Mesa's macros are the right ones.  They are incompatible swizzle
representations (3 bits vs. 2 bits per component), and the code using
Mesa's works by pure luck.  Fix it.
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4.h   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index e149f39..ab01b119 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -76,7 +76,7 @@ src_reg::src_reg(register_file file, int reg, const glsl_type 
*type)
if (type  (type-is_scalar() || type-is_vector() || type-is_matrix()))
   this-swizzle = swizzle_for_size(type-vector_elements);
else
-  this-swizzle = SWIZZLE_XYZW;
+  this-swizzle = BRW_SWIZZLE_XYZW;
 }
 
 /** Generic unset register constructor. */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index a718333..720cc6e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -110,7 +110,7 @@ public:
 
explicit src_reg(dst_reg reg);
 
-   GLuint swizzle; /** SWIZZLE_XYZW swizzles from Mesa. */
+   GLuint swizzle; /** BRW_SWIZZLE_XYZW macros from brw_reg.h. */
bool negate;
bool abs;
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 180a138..c5990ee 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -473,14 +473,14 @@ vec4_visitor::emit_pack_half_2x16(dst_reg dst, src_reg 
src0)
/* Give the write-channels of dst the form:
 *   0x
 */
-   tmp_src.swizzle = SWIZZLE_Y;
+   tmp_src.swizzle = BRW_SWIZZLE_;
emit(SHL(dst, tmp_src, src_reg(16u)));
 
/* Finally, give the write-channels of dst the form of packHalf2x16's
 * output:
 *   0x
 */
-   tmp_src.swizzle = SWIZZLE_X;
+   tmp_src.swizzle = BRW_SWIZZLE_;
emit(OR(dst, src_reg(dst), tmp_src));
 }
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/23] i965/fs: Use offset() in the ARB_fragment_program code.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 131 +++-
 1 file changed, 62 insertions(+), 69 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index b537a5a..5433023 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -30,13 +30,6 @@
 #include brw_context.h
 #include brw_fs.h
 
-static fs_reg
-regoffset(fs_reg reg, int i)
-{
-   reg.reg_offset += i;
-   return reg;
-}
-
 void
 fs_visitor::emit_fp_alu1(enum opcode opcode,
  const struct prog_instruction *fpi,
@@ -44,7 +37,7 @@ fs_visitor::emit_fp_alu1(enum opcode opcode,
 {
for (int i = 0; i  4; i++) {
   if (fpi-DstReg.WriteMask  (1  i))
- emit(opcode, regoffset(dst, i), regoffset(src, i));
+ emit(opcode, offset(dst, i), offset(src, i));
}
 }
 
@@ -55,8 +48,8 @@ fs_visitor::emit_fp_alu2(enum opcode opcode,
 {
for (int i = 0; i  4; i++) {
   if (fpi-DstReg.WriteMask  (1  i))
- emit(opcode, regoffset(dst, i),
-  regoffset(src0, i), regoffset(src1, i));
+ emit(opcode, offset(dst, i),
+  offset(src0, i), offset(src1, i));
}
 }
 
@@ -72,8 +65,8 @@ fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
 
for (int i = 0; i  4; i++) {
   if (fpi-DstReg.WriteMask  (1  i)) {
- emit_minmax(conditionalmod, regoffset(dst, i),
- regoffset(src0, i), regoffset(src1, i));
+ emit_minmax(conditionalmod, offset(dst, i),
+ offset(src0, i), offset(src1, i));
   }
}
 }
@@ -88,10 +81,10 @@ fs_visitor::emit_fp_sop(uint32_t conditional_mod,
   if (fpi-DstReg.WriteMask  (1  i)) {
  fs_inst *inst;
 
- emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
+ emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
   conditional_mod));
 
- inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
+ inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
  inst-predicate = BRW_PREDICATE_NORMAL;
   }
}
@@ -103,7 +96,7 @@ fs_visitor::emit_fp_scalar_write(const struct 
prog_instruction *fpi,
 {
for (int i = 0; i  4; i++) {
   if (fpi-DstReg.WriteMask  (1  i))
- emit(MOV(regoffset(dst, i), src));
+ emit(MOV(offset(dst, i), src));
}
 }
 
@@ -170,11 +163,11 @@ fs_visitor::emit_fragment_program_code()
 if (fpi-DstReg.WriteMask  (1  i)) {
fs_inst *inst;
 
-   emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+   emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
 BRW_CONDITIONAL_L));
 
-   inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
-   regoffset(src[1], i), regoffset(src[2], i));
+   inst = emit(BRW_OPCODE_SEL, offset(dst, i),
+   offset(src[1], i), offset(src[2], i));
inst-predicate = BRW_PREDICATE_NORMAL;
 }
  }
@@ -200,14 +193,14 @@ fs_visitor::emit_fragment_program_code()
  default: assert(!not reached); count = 0; break;
  }
 
- emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
+ emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
  for (int i = 1; i  count; i++) {
-emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
+emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
 emit(ADD(acc, acc, mul));
  }
 
  if (fpi-Opcode == OPCODE_DPH)
-emit(ADD(acc, acc, regoffset(src[1], 3)));
+emit(ADD(acc, acc, offset(src[1], 3)));
 
  emit_fp_scalar_write(fpi, dst, acc);
  break;
@@ -217,13 +210,13 @@ fs_visitor::emit_fragment_program_code()
  if (fpi-DstReg.WriteMask  WRITEMASK_X)
 emit(MOV(dst, fs_reg(1.0f)));
  if (fpi-DstReg.WriteMask  WRITEMASK_Y) {
-emit(MUL(regoffset(dst, 1),
- regoffset(src[0], 1), regoffset(src[1], 1)));
+emit(MUL(offset(dst, 1),
+ offset(src[0], 1), offset(src[1], 1)));
  }
  if (fpi-DstReg.WriteMask  WRITEMASK_Z)
-emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
+emit(MOV(offset(dst, 2), offset(src[0], 2)));
  if (fpi-DstReg.WriteMask  WRITEMASK_W)
-emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
+emit(MOV(offset(dst, 3), offset(src[1], 3)));
  break;
 
   case OPCODE_EX2:
@@ -257,7 +250,7 @@ fs_visitor::emit_fragment_program_code()
  * undiscarded pixels, and updates just those pixels to be
  * turned off.
  */
-fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
+fs_inst *cmp = emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
   

[Mesa-dev] [PATCH 13/23] i965/fs: Take into account reg_offset consistently for MRF regs.

2013-12-02 Thread Francisco Jerez
Until now it was only being taken into account in the VEC4 back-end
but not in the FS back-end.  Do it in both cases.
---
 src/mesa/drivers/dri/i965/brw_fs.h |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 10 ++
 src/mesa/drivers/dri/i965/brw_shader.h |  7 ---
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 2c36d9f..f918f7e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -615,4 +615,4 @@ bool brw_do_channel_expressions(struct exec_list 
*instructions);
 bool brw_do_vector_splitting(struct exec_list *instructions);
 bool brw_fs_precompile(struct gl_context *ctx, struct gl_shader_program *prog);
 
-struct brw_reg brw_reg_from_fs_reg(fs_reg *reg);
+struct brw_reg brw_reg_from_fs_reg(fs_reg *reg, unsigned dispatch_width);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 8d310a1..1de59eb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -981,8 +981,9 @@ static uint32_t brw_file_from_reg(fs_reg *reg)
 }
 
 struct brw_reg
-brw_reg_from_fs_reg(fs_reg *reg)
+brw_reg_from_fs_reg(fs_reg *reg, unsigned dispatch_width)
 {
+   const int reg_size = 4 * dispatch_width;
struct brw_reg brw_reg;
 
switch (reg-file) {
@@ -996,7 +997,8 @@ brw_reg_from_fs_reg(fs_reg *reg)
   }
 
   brw_reg = retype(brw_reg, reg-type);
-  brw_reg = byte_offset(brw_reg, reg-subreg_offset);
+  brw_reg = byte_offset(brw_reg, (reg-subreg_offset +
+  reg-reg_offset * reg_size));
   break;
case IMM:
   switch (reg-type) {
@@ -1349,7 +1351,7 @@ fs_generator::generate_code(exec_list *instructions)
   }
 
   for (unsigned int i = 0; i  3; i++) {
-src[i] = brw_reg_from_fs_reg(inst-src[i]);
+src[i] = brw_reg_from_fs_reg(inst-src[i], dispatch_width);
 
 /* The accumulator result appears to get used for the
  * conditional modifier generation.  When negating a UD
@@ -1361,7 +1363,7 @@ fs_generator::generate_code(exec_list *instructions)
inst-src[i].type != BRW_REGISTER_TYPE_UD ||
!inst-src[i].negate);
   }
-  dst = brw_reg_from_fs_reg(inst-dst);
+  dst = brw_reg_from_fs_reg(inst-dst, dispatch_width);
 
   brw_set_conditionalmod(p, inst-conditional_mod);
   brw_set_predicate_control(p, inst-predicate);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index f284389..d1357ce 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -61,9 +61,10 @@ public:
/**
 * Offset from the start of the contiguous register block.
 *
-* For pre-register-allocation GRFs, this is in units of a float per pixel
-* (1 hardware register for SIMD8 mode, or 2 registers for SIMD16 mode).
-* For uniforms, this is in units of 1 float.
+* For pre-register-allocation GRFs and MRFs, this is in units of a
+* float per pixel (1 hardware register for SIMD8 mode, or 2
+* registers for SIMD16 mode).  For uniforms, this is in units of 1
+* float.
 */
int reg_offset;
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/23] i965/vec4: Add non-mutating helper functions to modify src_reg::swizzle and ::negate.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_vec4.h | 24 
 1 file changed, 24 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index db1604a..8ce3a5e 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -125,6 +125,30 @@ offset(src_reg reg, unsigned delta)
return reg;
 }
 
+/**
+ * Reswizzle a given source register.
+ * \sa brw_swizzle().
+ */
+static inline src_reg
+swizzle(src_reg reg, unsigned swizzle)
+{
+   assert(reg.file != HW_REG);
+   reg.swizzle = BRW_SWIZZLE4(
+  BRW_GET_SWZ(reg.swizzle, BRW_GET_SWZ(swizzle, 0)),
+  BRW_GET_SWZ(reg.swizzle, BRW_GET_SWZ(swizzle, 1)),
+  BRW_GET_SWZ(reg.swizzle, BRW_GET_SWZ(swizzle, 2)),
+  BRW_GET_SWZ(reg.swizzle, BRW_GET_SWZ(swizzle, 3)));
+   return reg;
+}
+
+static inline src_reg
+negate(src_reg reg)
+{
+   assert(reg.file != HW_REG  reg.file != IMM);
+   reg.negate = !reg.negate;
+   return reg;
+}
+
 class dst_reg : public backend_reg
 {
 public:
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/23] i965/vec4: Fix off-by-one register class overallocation.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
index 951560b..b19b40d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -121,7 +121,7 @@ brw_vec4_alloc_reg_set(struct brw_context *brw)
if (brw-gen = 6)
   ra_set_allocate_round_robin(brw-vec4.regs);
ralloc_free(brw-vec4.classes);
-   brw-vec4.classes = ralloc_array(brw, int, class_count + 1);
+   brw-vec4.classes = ralloc_array(brw, int, class_count);
 
/* Now, add the registers to their classes, and add the conflicts
 * between them and the base GRF registers (and also each other).
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 20/23] i965/vec4: Use swizzle() in the ARB_vertex_program code.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_reg.h   |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4_vp.cpp | 33 +--
 2 files changed, 11 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 66f6aad..37a2ca9 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -78,6 +78,8 @@ extern C {
 #define BRW_SWIZZLE_  BRW_SWIZZLE4(2,2,2,2)
 #define BRW_SWIZZLE_  BRW_SWIZZLE4(3,3,3,3)
 #define BRW_SWIZZLE_XYXY  BRW_SWIZZLE4(0,1,0,1)
+#define BRW_SWIZZLE_YZXW  BRW_SWIZZLE4(1,2,0,3)
+#define BRW_SWIZZLE_ZXYW  BRW_SWIZZLE4(2,0,1,3)
 #define BRW_SWIZZLE_ZWZW  BRW_SWIZZLE4(2,3,2,3)
 
 static inline bool
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
index 0ec37c1..f1000f2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
@@ -50,21 +50,6 @@ vec4_visitor::emit_vp_sop(uint32_t conditional_mod,
inst-predicate = BRW_PREDICATE_NORMAL;
 }
 
-/**
- * Reswizzle a given source register.
- * \sa brw_swizzle().
- */
-static inline src_reg
-reswizzle(src_reg orig, unsigned x, unsigned y, unsigned z, unsigned w)
-{
-   src_reg t = orig;
-   t.swizzle = BRW_SWIZZLE4(BRW_GET_SWZ(orig.swizzle, x),
-BRW_GET_SWZ(orig.swizzle, y),
-BRW_GET_SWZ(orig.swizzle, z),
-BRW_GET_SWZ(orig.swizzle, w));
-   return t;
-}
-
 void
 vec4_vs_visitor::emit_program_code()
 {
@@ -161,7 +146,7 @@ vec4_vs_visitor::emit_program_code()
 /* tmp_d = floor(src[0].x) */
 src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
 assert(tmp_d.type == BRW_REGISTER_TYPE_D);
-emit(RNDD(dst_reg(tmp_d), reswizzle(src[0], 0, 0, 0, 0)));
+emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_)));
 
 /* result[0] = 2.0 ^ tmp */
 /* Adjust exponent for floating point: exp += 127 */
@@ -228,7 +213,7 @@ vec4_vs_visitor::emit_program_code()
 result.writemask = WRITEMASK_YZ;
 emit(MOV(result, src_reg(0.0f)));
 
-src_reg tmp_x = reswizzle(src[0], 0, 0, 0, 0);
+src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_);
 
 emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
 emit(IF(BRW_PREDICATE_NORMAL));
@@ -240,14 +225,14 @@ vec4_vs_visitor::emit_program_code()
 
 if (vpi-DstReg.WriteMask  WRITEMASK_Z) {
/* if (tmp.y  0) tmp.y = 0; */
-   src_reg tmp_y = reswizzle(src[0], 1, 1, 1, 1);
+   src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_);
result.writemask = WRITEMASK_Z;
emit_minmax(BRW_CONDITIONAL_G, result, tmp_y, src_reg(0.0f));
 
src_reg clamped_y(result);
clamped_y.swizzle = BRW_SWIZZLE_;
 
-   src_reg tmp_w = reswizzle(src[0], 3, 3, 3, 3);
+   src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_);
 
emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
 }
@@ -261,7 +246,7 @@ vec4_vs_visitor::emit_program_code()
  result.type = BRW_REGISTER_TYPE_UD;
  src_reg result_src = src_reg(result);
 
- src_reg arg0_ud = reswizzle(src[0], 0, 0, 0, 0);
+ src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_);
  arg0_ud.type = BRW_REGISTER_TYPE_UD;
 
  /* Perform mant = frexpf(fabsf(x), exp), adjust exp and mnt
@@ -383,11 +368,11 @@ vec4_vs_visitor::emit_program_code()
  src_reg t2 = src_reg(this, glsl_type::vec4_type);
 
  emit(MUL(dst_reg(t1),
-  reswizzle(src[0], 1, 2, 0, 3),
-  reswizzle(src[1], 2, 0, 1, 3)));
+  swizzle(src[0], BRW_SWIZZLE_YZXW),
+  swizzle(src[1], BRW_SWIZZLE_ZXYW)));
  emit(MUL(dst_reg(t2),
-  reswizzle(src[0], 2, 0, 1, 3),
-  reswizzle(src[1], 1, 2, 0, 3)));
+  swizzle(src[0], BRW_SWIZZLE_ZXYW),
+  swizzle(src[1], BRW_SWIZZLE_YZXW)));
  t2.negate = true;
  emit(ADD(dst, t1, t2));
  break;
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/23] i965: Add polymorphic backend_visitor method to extract the result of a visit.

2013-12-02 Thread Francisco Jerez
This will be used by the generic implementation of the image and
atomic counter built-ins to extract the register location of its
arguments without having to be aware of the actual visitor type.
---
 src/mesa/drivers/dri/i965/brw_fs.h | 3 +++
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 7 +++
 src/mesa/drivers/dri/i965/brw_shader.h | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4.h   | 3 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +++
 5 files changed, 23 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 4f9767c..4ada075 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -237,6 +237,9 @@ public:
void visit(ir_emit_vertex *);
void visit(ir_end_primitive *);
 
+   /** Visit an instruction \p ir and extract the result. */
+   virtual fs_reg visit_result(ir_instruction *ir);
+
uint32_t gather_channel(ir_texture *ir, int sampler);
void swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler);
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index f45f089..b5957c6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -2874,6 +2874,13 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, 
fs_reg *reg)
*reg = temp;
 }
 
+fs_reg 
+fs_visitor::visit_result(ir_instruction *ir)
+{
+   ir-accept(this);
+   return result;
+}
+
 fs_visitor::fs_visitor(struct brw_context *brw,
struct brw_wm_compile *c,
struct gl_shader_program *shader_prog,
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index c31f76d..48382af 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -141,6 +141,9 @@ public:
 */
exec_list instructions;
 
+   /** Visit an instruction \p ir and extract the result. */
+   virtual backend_reg visit_result(ir_instruction *ir) = 0;
+
virtual void dump_instruction(backend_instruction *inst) = 0;
void dump_instructions();
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 50e4794..355d497 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -354,6 +354,9 @@ public:
virtual void visit(ir_end_primitive *);
/*@}*/
 
+   /** Visit an instruction \p ir and extract the result. */
+   virtual src_reg visit_result(ir_instruction *ir);
+
src_reg result;
 
/* Regs for vertex results.  Generated at ir_variable visiting time
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 5b0ceea..97bd26d 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -3230,6 +3230,13 @@ vec4_visitor::resolve_ud_negate(src_reg *reg)
*reg = temp;
 }
 
+src_reg 
+vec4_visitor::visit_result(ir_instruction *ir)
+{
+   ir-accept(this);
+   return result;
+}
+
 vec4_visitor::vec4_visitor(struct brw_context *brw,
struct brw_vec4_compile *c,
struct gl_program *prog,
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/23] i965: Add helper function to find out the signedness of a register type.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_reg.h | 21 +
 1 file changed, 21 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 37a2ca9..2591cbf 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -154,6 +154,27 @@ type_sz(unsigned type)
}
 }
 
+static inline bool
+type_is_signed(unsigned type)
+{
+   switch(type) {
+   case BRW_REGISTER_TYPE_D:
+   case BRW_REGISTER_TYPE_F:
+   case BRW_REGISTER_TYPE_HF:
+   case BRW_REGISTER_TYPE_W:
+   case BRW_REGISTER_TYPE_B:
+  return true;
+
+   case BRW_REGISTER_TYPE_UD:
+   case BRW_REGISTER_TYPE_UW:
+   case BRW_REGISTER_TYPE_UB:
+  return false;
+
+   default:
+  unreachable();
+   }
+}
+
 /**
  * Construct a brw_reg.
  * \param file  one of the BRW_x_REGISTER_FILE values
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/23] i965: Have brw_imm_vf4() take the vector components as integer values.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_clip_util.c |  2 +-
 src/mesa/drivers/dri/i965/brw_reg.h   | 40 +++
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_clip_util.c 
b/src/mesa/drivers/dri/i965/brw_clip_util.c
index 24d053e..6533a29 100644
--- a/src/mesa/drivers/dri/i965/brw_clip_util.c
+++ b/src/mesa/drivers/dri/i965/brw_clip_util.c
@@ -224,7 +224,7 @@ void brw_clip_interp_vertex( struct brw_clip_compile *c,
   vec1(t_nopersp),
   brw_imm_f(0));
   brw_IF(p, BRW_EXECUTE_1);
-  brw_MOV(p, t_nopersp, brw_imm_vf4(VF_ONE, VF_ZERO, VF_ZERO, VF_ZERO));
+  brw_MOV(p, t_nopersp, brw_imm_vf4(1, 0, 0, 0));
   brw_ENDIF(p);
 
   /* Now compute t_nopersp = t_nopersp.y/t_nopersp.x and broadcast it. */
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 2591cbf..a1d25cf 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -43,6 +43,7 @@
 #define BRW_REG_H
 
 #include stdbool.h
+#include main/imports.h
 #include program/prog_instruction.h
 #include main/compiler.h
 #include brw_defines.h
@@ -475,19 +476,38 @@ brw_imm_vf(unsigned v)
return imm;
 }
 
-#define VF_ZERO 0x0
-#define VF_ONE  0x30
-#define VF_NEG  (17)
+/**
+ * Convert an integer into a restricted 8-bit float, used in vector
+ * immediates.  The 8-bit floating point format has a sign bit, an
+ * excess-3 3-bit exponent, and a 4-bit mantissa.  All integer values
+ * from -31 to 31 can be represented exactly.
+ */
+static inline uint8_t
+int_to_float8(int x)
+{
+   if (x == 0) {
+  return 0;
+   } else if (x  0) {
+  return 1  7 | int_to_float8(-x);
+   } else {
+  const unsigned exponent = _mesa_logbase2(x);
+  const unsigned mantissa = (x - (1  exponent))  (4 - exponent);
+  assert(exponent = 4);
+  return (exponent + 3)  4 | mantissa;
+   }
+}
 
+/**
+ * Construct a floating-point packed vector immediate from its integer
+ * values. \sa int_to_float8()
+ */
 static inline struct brw_reg
-brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
+brw_imm_vf4(int v0, int v1, int v2, int v3)
 {
-   struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF);
-   imm.vstride = BRW_VERTICAL_STRIDE_0;
-   imm.width = BRW_WIDTH_4;
-   imm.hstride = BRW_HORIZONTAL_STRIDE_1;
-   imm.dw1.ud = ((v0  0) | (v1  8) | (v2  16) | (v3  24));
-   return imm;
+   return brw_imm_vf((int_to_float8(v0)  0) |
+ (int_to_float8(v1)  8) |
+ (int_to_float8(v2)  16) |
+ (int_to_float8(v3)  24));
 }
 
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/23] i965/vec4: Trivial improvements to the with_writemask() function.

2013-12-02 Thread Francisco Jerez
Add assertion that the register is not in the HW_REG or IMM file,
calculate the conjunction of the old and new mask instead of replacing
the old [consistent with the behavior of brw_writemask(), causes no
functional changes right now], make it static inline to let the
compiler do a slightly better job at optimizing things, and shorten
its name.
---
 src/mesa/drivers/dri/i965/brw_vec4.h  |  9 +++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp| 11 +--
 src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 12 ++--
 3 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 19de4c6..50e4794 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -192,8 +192,13 @@ offset(dst_reg reg, unsigned delta)
return reg;
 }
 
-dst_reg
-with_writemask(dst_reg const r, int mask);
+static inline dst_reg
+writemask(dst_reg reg, unsigned mask)
+{
+   assert(reg.file != HW_REG  reg.file != IMM);
+   reg.writemask = mask;
+   return reg;
+}
 
 class vec4_instruction : public backend_instruction {
 public:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c5990ee..5b0ceea 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -928,15 +928,6 @@ vec4_visitor::emit_if_gen6(ir_if *ir)
emit(IF(this-result, src_reg(0), BRW_CONDITIONAL_NZ));
 }
 
-dst_reg
-with_writemask(dst_reg const  r, int mask)
-{
-   dst_reg result = r;
-   result.writemask = mask;
-   return result;
-}
-
-
 void
 vec4_visitor::visit(ir_variable *ir)
 {
@@ -2463,7 +2454,7 @@ vec4_visitor::visit(ir_texture *ir)
   if (type-sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE 
   type-sampler_array) {
  emit_math(SHADER_OPCODE_INT_QUOTIENT,
-   with_writemask(inst-dst, WRITEMASK_Z),
+   writemask(inst-dst, WRITEMASK_Z),
src_reg(inst-dst), src_reg(6));
   }
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 31c42c4..4ab75c1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -59,8 +59,8 @@ vec4_vs_visitor::emit_prolog()
 if (sign_recovery_shift.file == BAD_FILE) {
/* shift constant: 22,22,22,30 */
sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
-   emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_XYZ), 
src_reg(22u)));
-   emit(MOV(with_writemask(sign_recovery_shift, WRITEMASK_W), 
src_reg(30u)));
+   emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), 
src_reg(22u)));
+   emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), 
src_reg(30u)));
 }
 
 emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
@@ -87,9 +87,9 @@ vec4_vs_visitor::emit_prolog()
if (es3_normalize_factor.file == BAD_FILE) {
   /* mul constant: 1 / (2^(b-1) - 1) */
   es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
-  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_XYZ),
+  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
src_reg(1.0f / ((19) - 1;
-  emit(MOV(with_writemask(es3_normalize_factor, WRITEMASK_W),
+  emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
src_reg(1.0f / ((11) - 1;
}
 
@@ -113,9 +113,9 @@ vec4_vs_visitor::emit_prolog()
if (normalize_factor.file == BAD_FILE) {
   /* 1 / (2^b - 1) for b=10,10,10,2 */
   normalize_factor = dst_reg(this, glsl_type::vec4_type);
-  emit(MOV(with_writemask(normalize_factor, WRITEMASK_XYZ),
+  emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
src_reg(1.0f / ((110) - 1;
-  emit(MOV(with_writemask(normalize_factor, WRITEMASK_W),
+  emit(MOV(writemask(normalize_factor, WRITEMASK_W),
src_reg(1.0f / ((12) - 1;
}
 
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/25] i965: Allocate binding table space for shader images.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_context.h  | 5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp | 7 +++
 2 files changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 1387aa9..3f2edcf 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -345,6 +345,7 @@ struct brw_stage_prog_data {
   uint32_t gather_texture_start;
   uint32_t ubo_start;
   uint32_t abo_start;
+  uint32_t image_start;
   uint32_t shader_time_start;
   /** @} */
} binding_table;
@@ -656,6 +657,9 @@ struct brw_gs_prog_data
 /** Max number of atomic counter buffer objects in a shader */
 #define BRW_MAX_ABO 16
 
+/** Max number of image units in a shader */
+#define BRW_MAX_IMAGES 16
+
 /**
  * Max number of binding table entries used for stream output.
  *
@@ -688,6 +692,7 @@ struct brw_gs_prog_data
 BRW_MAX_TEX_UNIT * 2 + /* normal, gather */ \
 12 + /* ubo */  \
 BRW_MAX_ABO +   \
+BRW_MAX_IMAGES +\
 2 /* shader time, pull constants */)
 
 #define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 8e41160..128354a 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -722,6 +722,13 @@ 
backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table
   stage_prog_data-binding_table.abo_start = 0xd0d0d0d0;
}
 
+   if (shader  shader-base.NumImages) {
+  stage_prog_data-binding_table.image_start = next_binding_table_offset;
+  next_binding_table_offset += shader-base.NumImages;
+   } else {
+  stage_prog_data-binding_table.image_start = 0xd0d0d0d0;
+   }
+
/* This may or may not be used depending on how the compile goes. */
stage_prog_data-binding_table.pull_constants_start = 
next_binding_table_offset;
next_binding_table_offset++;
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Francisco Jerez
This defines a backend_reg class that contains most of the common
member variables present in the fs and vec4 register classes.  It's
simply a location in the register file and doesn't know about indirect
addressing or funky align1/align16 access modes.

Some of the image lowering code coming up is going to be shared
between fs and vec4 so we need some common register representation to
pass values around.  The code is complicated enough that I'd hate to
duplicate it [or see someone else duplicate it] just because both
back-ends disagree on the register types.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 32 +++-
 src/mesa/drivers/dri/i965/brw_fs.h | 31 +---
 src/mesa/drivers/dri/i965/brw_reg.h|  1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 53 
 src/mesa/drivers/dri/i965/brw_shader.h | 40 +++
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 58 +++---
 src/mesa/drivers/dri/i965/brw_vec4.h   | 29 ++-
 .../drivers/dri/i965/brw_vec4_reg_allocate.cpp |  2 +-
 8 files changed, 125 insertions(+), 121 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 37e531d..761e7e6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -425,6 +425,12 @@ fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
this-type = fixed_hw_reg.type;
 }
 
+fs_reg::fs_reg(const backend_reg reg)
+{
+   init();
+   *static_castbackend_reg *(this) = reg;
+}
+
 bool
 fs_reg::equals(const fs_reg r) const
 {
@@ -450,32 +456,6 @@ fs_reg::retype(uint32_t type)
 }
 
 bool
-fs_reg::is_zero() const
-{
-   if (file != IMM)
-  return false;
-
-   return type == BRW_REGISTER_TYPE_F ? imm.f == 0.0 : imm.i == 0;
-}
-
-bool
-fs_reg::is_one() const
-{
-   if (file != IMM)
-  return false;
-
-   return type == BRW_REGISTER_TYPE_F ? imm.f == 1.0 : imm.i == 1;
-}
-
-bool
-fs_reg::is_null() const
-{
-   return file == HW_REG 
-  fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE 
-  fixed_hw_reg.nr == BRW_ARF_NULL;
-}
-
-bool
 fs_reg::is_valid_3src() const
 {
return file == GRF || file == UNIFORM;
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index f8fb1c5..dfd3b07 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -59,7 +59,7 @@ namespace brw {
class fs_live_variables;
 }
 
-class fs_reg {
+class fs_reg : public backend_reg {
 public:
DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
 
@@ -70,47 +70,20 @@ public:
fs_reg(int32_t i);
fs_reg(uint32_t u);
fs_reg(struct brw_reg fixed_hw_reg);
+   fs_reg(const backend_reg reg);
fs_reg(enum register_file file, int reg);
fs_reg(enum register_file file, int reg, uint32_t type);
fs_reg(class fs_visitor *v, const struct glsl_type *type);
 
bool equals(const fs_reg r) const;
-   bool is_zero() const;
-   bool is_one() const;
-   bool is_null() const;
bool is_valid_3src() const;
fs_reg retype(uint32_t type);
 
-   /** Register file: GRF, MRF, IMM. */
-   enum register_file file;
-   /**
-* Register number.  For MRF, it's the hardware register.  For
-* GRF, it's a virtual register number until register allocation
-*/
-   int reg;
-   /**
-* Offset from the start of the contiguous register block.
-*
-* For pre-register-allocation GRFs, this is in units of a float per pixel
-* (1 hardware register for SIMD8 mode, or 2 registers for SIMD16 mode).
-* For uniforms, this is in units of 1 float.
-*/
-   int reg_offset;
-   /** Register type.  BRW_REGISTER_TYPE_* */
-   int type;
bool negate;
bool abs;
bool sechalf;
-   struct brw_reg fixed_hw_reg;
int smear; /* -1, or a channel of the reg to smear to all channels. */
 
-   /** Value for file == IMM */
-   union {
-  int32_t i;
-  uint32_t u;
-  float f;
-   } imm;
-
fs_reg *reladdr;
 };
 
diff --git a/src/mesa/drivers/dri/i965/brw_reg.h 
b/src/mesa/drivers/dri/i965/brw_reg.h
index 548d677..66f6aad 100644
--- a/src/mesa/drivers/dri/i965/brw_reg.h
+++ b/src/mesa/drivers/dri/i965/brw_reg.h
@@ -44,6 +44,7 @@
 
 #include stdbool.h
 #include program/prog_instruction.h
+#include main/compiler.h
 #include brw_defines.h
 
 #ifdef __cplusplus
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6ce7653..8e41160 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -536,6 +536,59 @@ brw_instruction_name(enum opcode op)
}
 }
 
+backend_reg::backend_reg() :
+   file(BAD_FILE),
+   reg(0), reg_offset(0),
+   type(BRW_REGISTER_TYPE_UD),
+   fixed_hw_reg(),
+   imm()
+{
+
+}
+
+backend_reg::backend_reg(struct brw_reg fixed_hw_reg) :
+   file(HW_REG),
+   reg(0), reg_offset(0),
+   type(BRW_REGISTER_TYPE_UD),
+   fixed_hw_reg(fixed_hw_reg),

[Mesa-dev] [PATCH 00/23] i965 compiler fixes in preparation for ARB_shader_image_load_store.

2013-12-02 Thread Francisco Jerez
This is a series of assorted clean-up and refactoring changes in
preparation for the actual series implementing shader images.  The
most important changes in this series are PATCH 06, that defines the
register representation class used by the image built-in lowering
code, and PATCH 07-08, that add support for specifying register region
strides and sub-register offsets in the FS IR.  The latter make a
somewhat more efficient form of pixel packing and unpacking possible
for unsupported formats by using register regions instead of binary
shifting and masking.

I'll send out the actual implementation of ARB_shader_image_load_store
in a few minutes.

Thanks.

[PATCH 01/23] i965/fs: Use a separate variable to keep track of the last 
uniform index seen.
[PATCH 02/23] i965/fs: Allocate the param_size array dynamically.
[PATCH 03/23] i965/vec4: Add constructor of src_reg from a fixed hardware reg.
[PATCH 04/23] i965: Move up duplicated fields from stage-specific prog_data to 
brw_stage_prog_data.
[PATCH 05/23] i965: Unify fs_generator:: and vec4_generator::mark_surface_used 
as a free function.
[PATCH 06/23] i965: Define common register base class shared between both 
back-ends.
[PATCH 07/23] i965/fs: Add support for sub-register byte offsets to the FS 
back-end IR.
[PATCH 08/23] i965/fs: Add support for specifying register horizontal strides.
[PATCH 09/23] i965/fs: Remove fs_reg::smear.
[PATCH 10/23] i965/fs: Remove fs_reg::sechalf.
[PATCH 11/23] i965/vec4: Fix confusion between SWIZZLE and BRW_SWIZZLE macros.
[PATCH 12/23] i965/vec4: Fix off-by-one register class overallocation.
[PATCH 13/23] i965/fs: Take into account reg_offset consistently for MRF regs.
[PATCH 14/23] i965: Add non-mutating helper functions to modify the register 
offset.
[PATCH 15/23] i965/vec4: Add non-mutating helper functions to modify 
src_reg::swizzle and ::negate.
[PATCH 16/23] i965: Make sure that backend_reg::type and brw_reg::type are 
consistent for fixed regs.
[PATCH 17/23] i965/vec4: Trivial improvements to the with_writemask() function.
[PATCH 18/23] i965/fs: Remove fs_reg::retype.
[PATCH 19/23] i965/fs: Use offset() in the ARB_fragment_program code.
[PATCH 20/23] i965/vec4: Use swizzle() in the ARB_vertex_program code.
[PATCH 21/23] i965: Add helper function to find out the signedness of a 
register type.
[PATCH 22/23] i965: Have brw_imm_vf4() take the vector components as integer 
values.
[PATCH 23/23] i965: Add polymorphic backend_visitor method to extract the 
result of a visit.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/23] i965: Move up duplicated fields from stage-specific prog_data to brw_stage_prog_data.

2013-12-02 Thread Francisco Jerez
There doesn't seem to be any reason for nr_params, nr_pull_params,
param, and pull_param to be duplicated in the stage-specific
subclasses of brw_stage_prog_data.  Moving their definition to the
common base class will allow some code sharing in a future commit, the
removal of brw_vec4_prog_data_compare and brw_*_prog_data_free, and
the simplification of the stage-specific brw_*_prog_data_compare.
---
 src/mesa/drivers/dri/i965/brw_context.h  | 32 ++-
 src/mesa/drivers/dri/i965/brw_curbe.c| 12 +++---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 38 +-
 src/mesa/drivers/dri/i965/brw_fs_fp.cpp  |  2 +-
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp |  6 +--
 src/mesa/drivers/dri/i965/brw_program.c  | 28 +
 src/mesa/drivers/dri/i965/brw_program.h  |  6 +++
 src/mesa/drivers/dri/i965/brw_state_cache.c  |  6 +--
 src/mesa/drivers/dri/i965/brw_vec4.cpp   | 51 ++--
 src/mesa/drivers/dri/i965/brw_vec4.h |  3 --
 src/mesa/drivers/dri/i965/brw_vec4_gs.c  | 21 --
 src/mesa/drivers/dri/i965/brw_vec4_gs.h  |  1 -
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 14 +++
 src/mesa/drivers/dri/i965/brw_vec4_vp.cpp|  6 +--
 src/mesa/drivers/dri/i965/brw_vs.c   | 20 +++---
 src/mesa/drivers/dri/i965/brw_vs.h   |  1 -
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 10 ++---
 src/mesa/drivers/dri/i965/brw_wm.c   | 28 +
 src/mesa/drivers/dri/i965/brw_wm.h   |  1 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |  8 ++--
 src/mesa/drivers/dri/i965/gen6_vs_state.c| 10 ++---
 src/mesa/drivers/dri/i965/gen6_wm_state.c| 14 +++
 src/mesa/drivers/dri/i965/gen7_wm_state.c|  2 +-
 23 files changed, 146 insertions(+), 174 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 97f8906..1387aa9 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -330,8 +330,7 @@ struct brw_shader {
 };
 
 /* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix the the stage-specific
- * prog_data_compare().
+ * comparing them, be sure to go fix brw_stage_prog_data_compare().
  */
 struct brw_stage_prog_data {
struct {
@@ -349,6 +348,18 @@ struct brw_stage_prog_data {
   uint32_t shader_time_start;
   /** @} */
} binding_table;
+
+   GLuint nr_params;   /** number of float params/constants */
+   GLuint nr_pull_params;
+
+   /* Pointers to tracked values (only valid once
+* _mesa_load_state_parameters has been called at runtime).
+*
+* These must be the last fields of the struct (see
+* brw_stage_prog_data_compare()).
+*/
+   const float **param;
+   const float **pull_param;
 };
 
 /* Data about a particular attempt to compile a program.  Note that
@@ -379,8 +390,6 @@ struct brw_wm_prog_data {
   /** @} */
} binding_table;
 
-   GLuint nr_params;   /** number of float params/constants */
-   GLuint nr_pull_params;
bool dual_src_blend;
bool uses_pos_offset;
bool uses_omask;
@@ -398,15 +407,6 @@ struct brw_wm_prog_data {
 * For varying slots that are not used by the FS, the value is -1.
 */
int urb_setup[VARYING_SLOT_MAX];
-
-   /* Pointers to tracked values (only valid once
-* _mesa_load_state_parameters has been called at runtime).
-*
-* These must be the last fields of the struct (see
-* brw_wm_prog_data_compare()).
-*/
-   const float **param;
-   const float **pull_param;
 };
 
 /**
@@ -588,8 +588,6 @@ struct brw_vec4_prog_data {
GLuint curb_read_length;
GLuint urb_read_length;
GLuint total_grf;
-   GLuint nr_params;   /** number of float params/constants */
-   GLuint nr_pull_params; /** number of dwords referenced by pull_param[] */
GLuint total_scratch;
 
/* Used for calculating urb partitions.  In the VS, this is the size of the
@@ -597,10 +595,6 @@ struct brw_vec4_prog_data {
 * is the size of the URB entry used for output.
 */
GLuint urb_entry_size;
-
-   /* These pointers must appear last.  See brw_vec4_prog_data_compare(). */
-   const float **param;
-   const float **pull_param;
 };
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c 
b/src/mesa/drivers/dri/i965/brw_curbe.c
index e0b4083..2644cb6 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -57,10 +57,10 @@ static void calculate_curbe_offsets( struct brw_context 
*brw )
 {
struct gl_context *ctx = brw-ctx;
/* CACHE_NEW_WM_PROG */
-   const GLuint nr_fp_regs = (brw-wm.prog_data-nr_params + 15) / 16;
+   const GLuint nr_fp_regs = (brw-wm.prog_data-base.nr_params + 15) / 16;

/* BRW_NEW_VERTEX_PROGRAM */
-   const GLuint nr_vp_regs = 

[Mesa-dev] [PATCH 02/25] i965/gen7: Factor out texture surface state set-up from gen7_update_texture_surface().

2013-12-02 Thread Francisco Jerez
This moves most of the surface state set-up logic that can be shared
between textures and shader images to a separate function.
---
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 102 +-
 1 file changed, 62 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index c3bd96d..3dc0cb7 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -268,46 +268,35 @@ gen7_emit_buffer_surface_state(struct brw_context *brw,
 }
 
 static void
-gen7_update_texture_surface(struct gl_context *ctx,
-unsigned unit,
-uint32_t *surf_offset,
-bool for_gather)
+gen7_emit_texture_surface_state(struct brw_context *brw,
+struct gl_texture_object *obj,
+unsigned min_array_element,
+unsigned max_array_element,
+unsigned min_level,
+unsigned max_level,
+unsigned format,
+uint32_t *surf_offset,
+bool rw, bool for_gather)
 {
-   struct brw_context *brw = brw_context(ctx);
-   struct gl_texture_object *tObj = ctx-Texture.Unit[unit]._Current;
-   struct intel_texture_object *intelObj = intel_texture_object(tObj);
+   struct intel_texture_object *intelObj = intel_texture_object(obj);
struct intel_mipmap_tree *mt = intelObj-mt;
-   struct gl_texture_image *firstImage = tObj-Image[0][tObj-BaseLevel];
-   struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
-
-   if (tObj-Target == GL_TEXTURE_BUFFER) {
-  brw_update_buffer_texture_surface(ctx, unit, surf_offset);
-  return;
-   }
-
+   const unsigned depth = max_array_element - min_array_element;
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
 8 * 4, 32, surf_offset);
-   memset(surf, 0, 8 * 4);
-
-   uint32_t tex_format = translate_tex_format(brw,
-  mt-format,
-  tObj-DepthMode,
-  sampler-sRGBDecode);
 
-   if (for_gather  tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT)
-  tex_format = BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
+   memset(surf, 0, 8 * 4);
 
-   surf[0] = translate_tex_target(tObj-Target)  BRW_SURFACE_TYPE_SHIFT |
- tex_format  BRW_SURFACE_FORMAT_SHIFT |
- gen7_surface_tiling_mode(mt-region-tiling) |
- BRW_SURFACE_CUBEFACE_ENABLES;
+   surf[0] = translate_tex_target(obj-Target)  BRW_SURFACE_TYPE_SHIFT |
+  format  BRW_SURFACE_FORMAT_SHIFT |
+  gen7_surface_tiling_mode(mt-region-tiling) |
+  BRW_SURFACE_CUBEFACE_ENABLES;
 
if (mt-align_h == 4)
   surf[0] |= GEN7_SURFACE_VALIGN_4;
if (mt-align_w == 8)
   surf[0] |= GEN7_SURFACE_HALIGN_8;
 
-   if (mt-logical_depth0  1  tObj-Target != GL_TEXTURE_3D)
+   if (mt-logical_depth0  1  obj-Target != GL_TEXTURE_3D)
   surf[0] |= GEN7_SURFACE_IS_ARRAY;
 
if (mt-array_spacing_lod0)
@@ -317,30 +306,32 @@ gen7_update_texture_surface(struct gl_context *ctx,
 
surf[2] = SET_FIELD(mt-logical_width0 - 1, GEN7_SURFACE_WIDTH) |
  SET_FIELD(mt-logical_height0 - 1, GEN7_SURFACE_HEIGHT);
-   surf[3] = SET_FIELD(mt-logical_depth0 - 1, BRW_SURFACE_DEPTH) |
- ((intelObj-mt-region-pitch) - 1);
+   surf[3] = SET_FIELD(depth - 1, BRW_SURFACE_DEPTH) |
+ ((mt-region-pitch) - 1);
 
-   surf[4] = gen7_surface_msaa_bits(mt-num_samples, mt-msaa_layout);
+   surf[4] = gen7_surface_msaa_bits(mt-num_samples, mt-msaa_layout) |
+ min_array_element  GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
+ (depth - 1)  GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
 
surf[5] = (SET_FIELD(GEN7_MOCS_L3, GEN7_SURFACE_MOCS) |
-  SET_FIELD(tObj-BaseLevel - mt-first_level,
+  SET_FIELD(min_level - mt-first_level,
 GEN7_SURFACE_MIN_LOD) |
-  /* mip count */
-  (intelObj-_MaxLevel - tObj-BaseLevel));
+  /* mip count */ (max_level - min_level));
 
if (brw-is_haswell) {
   /* Handling GL_ALPHA as a surface format override breaks 1.30+ style
* texturing functions that return a float, as our code generation always
* selects the .x channel (which would always be 0).
*/
-  const bool alpha_depth = tObj-DepthMode == GL_ALPHA 
+  struct gl_texture_image *firstImage = obj-Image[0][obj-BaseLevel];
+  const bool alpha_depth = obj-DepthMode == GL_ALPHA 
  (firstImage-_BaseFormat == GL_DEPTH_COMPONENT ||
   firstImage-_BaseFormat == GL_DEPTH_STENCIL);
 
   const int swizzle = unlikely(alpha_depth)
- 

[Mesa-dev] [PATCH 11/25] i965: Add the untyped surface write opcode.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_eu.h |  8 
 src/mesa/drivers/dri/i965/brw_eu_emit.c| 49 ++
 src/mesa/drivers/dri/i965/brw_fs.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  6 +++
 .../drivers/dri/i965/brw_schedule_instructions.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  6 +++
 9 files changed, 74 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 67a2aaa..988b07e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -778,6 +778,7 @@ enum opcode {
 
SHADER_OPCODE_UNTYPED_ATOMIC,
SHADER_OPCODE_UNTYPED_SURFACE_READ,
+   SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
 
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 45b421b..e17dc49 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -375,6 +375,14 @@ brw_untyped_surface_read(struct brw_compile *p,
  unsigned msg_length,
  unsigned num_channels);
 
+void
+brw_untyped_surface_write(struct brw_compile *p,
+  struct brw_reg dst,
+  struct brw_reg mrf,
+  struct brw_reg surface,
+  unsigned msg_length,
+  unsigned num_channels);
+
 /*** 
  * brw_eu_util.c:
  */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index b94a6d1..13dd59a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2704,6 +2704,55 @@ brw_untyped_surface_read(struct brw_compile *p,
brw_send_indirect_message(p, sfid, dst, mrf, desc);
 }
 
+static void
+brw_set_dp_untyped_surface_write_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ unsigned num_channels)
+{
+   insn-bits3.gen7_dp.msg_type = (p-brw-is_haswell ?
+   HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE 
:
+   GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE);
+
+   if (p-current-header.access_mode == BRW_ALIGN_1) {
+  if (p-compressed)
+ insn-bits3.ud |= 1  12; /* SIMD16 mode */
+  else
+ insn-bits3.ud |= 2  12; /* SIMD8 mode */
+   } else {
+  if (p-brw-is_haswell)
+ insn-bits3.ud |= 2  12; /* SIMD4x2 mode */
+  else
+ insn-bits3.ud |= 2  12; /* SIMD8 mode */
+   }
+
+   /* Set mask of 32-bit channels to drop. */
+   insn-bits3.ud |= (0xf  (0xf  num_channels))  8;
+}
+
+void
+brw_untyped_surface_write(struct brw_compile *p,
+  struct brw_reg dst,
+  struct brw_reg mrf,
+  struct brw_reg surface,
+  unsigned msg_length,
+  unsigned num_channels)
+{
+   const unsigned sfid = (p-brw-is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 :
+  GEN7_SFID_DATAPORT_DATA_CACHE);
+   const bool header_present = p-current-header.access_mode == BRW_ALIGN_1;
+   struct brw_reg desc = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+   struct brw_instruction *insn;
+
+   insn = brw_load_indirect_message_descriptor(
+  p, desc, surface, msg_length, 0,
+  header_present);
+
+   brw_set_dp_untyped_surface_write_message(
+  p, insn, num_channels);
+
+   brw_send_indirect_message(p, sfid, dst, mrf, desc);
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 4408cbe..721162f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -781,6 +781,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
   return 2;
case SHADER_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
   return 0;
default:
   assert(!not reached);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0d50051..2ebb90a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1690,6 +1690,12 @@ fs_generator::generate_code(exec_list *instructions)
   src[0], inst-mlen, src[1].dw1.ud);
  break;
 
+  case 

[Mesa-dev] [PATCH 07/25] i965/gen7.5: Poke the 3DSTATE UAV access enable bits.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_defines.h   |  3 +++
 src/mesa/drivers/dri/i965/gen7_gs_state.c |  4 +++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c | 13 -
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  3 +++
 4 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 597d3b2..b73e8d0 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1427,6 +1427,7 @@ enum brw_message_target {
 # define GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
 # define GEN6_VS_FLOATING_POINT_MODE_IEEE_754  (0  16)
 # define GEN6_VS_FLOATING_POINT_MODE_ALT   (1  16)
+# define HSW_VS_UAV_ACCESS_ENABLE   (1  12)
 /* DW4 */
 # define GEN6_VS_DISPATCH_START_GRF_SHIFT  20
 # define GEN6_VS_URB_READ_LENGTH_SHIFT 11
@@ -1446,6 +1447,7 @@ enum brw_message_target {
 # define GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT   18
 # define GEN6_GS_FLOATING_POINT_MODE_IEEE_754  (0  16)
 # define GEN6_GS_FLOATING_POINT_MODE_ALT   (1  16)
+# define HSW_GS_UAV_ACCESS_ENABLE  (1  12)
 /* DW4 */
 # define GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT  23
 # define GEN7_GS_OUTPUT_TOPOLOGY_SHIFT 17
@@ -1817,6 +1819,7 @@ enum brw_wm_barycentric_interp_mode {
 # define GEN7_PS_RENDER_TARGET_FAST_CLEAR_ENABLE   (1  8)
 # define GEN7_PS_DUAL_SOURCE_BLEND_ENABLE  (1  7)
 # define GEN7_PS_RENDER_TARGET_RESOLVE_ENABLE  (1  6)
+# define HSW_PS_UAV_ACCESS_ENABLE  (1  5)
 # define GEN7_PS_POSOFFSET_NONE(0  3)
 # define GEN7_PS_POSOFFSET_CENTROID(2  3)
 # define GEN7_PS_POSOFFSET_SAMPLE  (3  3)
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c 
b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index d2ba354..ea724f1 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -102,7 +102,9 @@ upload_gs_state(struct brw_context *brw)
   OUT_BATCH(((ALIGN(stage_state-sampler_count, 4)/4) 
  GEN6_GS_SAMPLER_COUNT_SHIFT) |
 ((brw-gs.prog_data-base.base.binding_table.size_bytes / 4) 
- GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+(brw-is_haswell  prog_data-base.nr_image_params ?
+ HSW_GS_UAV_ACCESS_ENABLE : 0));
 
   if (brw-gs.prog_data-base.total_scratch) {
  OUT_RELOC(stage_state-scratch_bo,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c 
b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 1e76eb1..b9ab0ea 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -71,6 +71,7 @@ upload_vs_state(struct brw_context *brw)
uint32_t floating_point_mode = 0;
const int max_threads_shift = brw-is_haswell ?
   HSW_VS_MAX_THREADS_SHIFT : GEN6_VS_MAX_THREADS_SHIFT;
+   const struct brw_vec4_prog_data *prog_data = brw-vs.prog_data-base;
 
gen7_emit_vs_workaround_flush(brw);
 
@@ -102,19 +103,21 @@ upload_vs_state(struct brw_context *brw)
 ((ALIGN(stage_state-sampler_count, 4)/4) 
   GEN6_VS_SAMPLER_COUNT_SHIFT) |
  ((brw-vs.prog_data-base.base.binding_table.size_bytes / 4) 
-  GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+  GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
+ (brw-is_haswell  prog_data-base.nr_image_params ?
+  HSW_VS_UAV_ACCESS_ENABLE : 0));
 
-   if (brw-vs.prog_data-base.total_scratch) {
+   if (prog_data-total_scratch) {
   OUT_RELOC(stage_state-scratch_bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-   ffs(brw-vs.prog_data-base.total_scratch) - 11);
+   ffs(prog_data-total_scratch) - 11);
} else {
   OUT_BATCH(0);
}
 
-   OUT_BATCH((brw-vs.prog_data-base.dispatch_grf_start_reg 
+   OUT_BATCH((prog_data-dispatch_grf_start_reg 
   GEN6_VS_DISPATCH_START_GRF_SHIFT) |
-(brw-vs.prog_data-base.urb_read_length  
GEN6_VS_URB_READ_LENGTH_SHIFT) |
+(prog_data-urb_read_length  GEN6_VS_URB_READ_LENGTH_SHIFT) |
 (0  GEN6_VS_URB_ENTRY_READ_OFFSET_SHIFT));
 
OUT_BATCH(((brw-max_vs_threads - 1)  max_threads_shift) |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 50a87e2..5db5f69 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -232,6 +232,9 @@ upload_ps_state(struct brw_context *brw)
   _mesa_get_min_invocations_per_fragment(ctx, brw-fragment_program);
assert(min_inv_per_frag = 1);
 
+   if (brw-is_haswell  brw-wm.prog_data-base.nr_image_params)
+  dw4 |= HSW_PS_UAV_ACCESS_ENABLE;
+
if 

[Mesa-dev] [PATCH 05/25] i965: Define and initialize image meta-data structure.

2013-12-02 Thread Francisco Jerez
This will be used to pass image information to the shader when we
cannot use typed surface reads and writes.  All entries except
surface_idx and size are otherwise unused and will get eliminated by
the uniform packing pass.  size will be used for bounds checking with
some image formats and will be useful for ARB_shader_image_size too.
surface_idx is always used.
---
 src/mesa/drivers/dri/i965/brw_context.h   | 42 +
 src/mesa/drivers/dri/i965/brw_program.c   |  5 ++
 src/mesa/drivers/dri/i965/brw_vec4_gs.c   |  4 ++
 src/mesa/drivers/dri/i965/brw_vs.c|  7 ++-
 src/mesa/drivers/dri/i965/brw_wm.c|  7 ++-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 76 +++
 6 files changed, 138 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0816912..dc606c0f 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -352,6 +352,7 @@ struct brw_stage_prog_data {
 
GLuint nr_params;   /** number of float params/constants */
GLuint nr_pull_params;
+   GLuint nr_image_params;
 
/* Pointers to tracked values (only valid once
 * _mesa_load_state_parameters has been called at runtime).
@@ -361,6 +362,47 @@ struct brw_stage_prog_data {
 */
const float **param;
const float **pull_param;
+   struct brw_image_param *image_param;
+};
+
+/*
+ * Image meta-data structure as laid out in the shader parameter
+ * buffer.  Entries have to be 16B-aligned for the vec4 back-end to be
+ * able to use them.  That's okay because the padding and any unused
+ * entries [most of them except when we're doing untyped surface
+ * access] will be removed by the uniform packing pass.
+ */
+#define BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET  0
+#define BRW_IMAGE_PARAM_OFFSET_OFFSET   4
+#define BRW_IMAGE_PARAM_SIZE_OFFSET 8
+#define BRW_IMAGE_PARAM_STRIDE_OFFSET   12
+#define BRW_IMAGE_PARAM_TILING_OFFSET   16
+#define BRW_IMAGE_PARAM_SWIZZLING_OFFSET20
+#define BRW_IMAGE_PARAM_SIZE24
+
+struct brw_image_param {
+   /** Surface binding table index. */
+   uint32_t surface_idx;
+
+   /** Surface X, Y and Z dimensions. */
+   uint32_t size[3];
+
+   /** Offset applied to the X and Y surface coordinates. */
+   uint32_t offset[2];
+
+   /** X-stride in bytes, Y-stride in bytes, horizontal Z-stride in
+* pixels, vertical Z-stride in pixels.
+*/
+   uint32_t stride[4];
+
+   /** Log2 of the tiling modulus in the X, Y and Z dimension. */
+   uint32_t tiling[3];
+
+   /** Right shift to apply for surface address swizzling.  Two
+* different swizzles can be specified and will be applied one
+* after the other.  Use \c 0xff if any of the swizzles is not
+* required. */
+   uint32_t swizzling[2];
 };
 
 /* Data about a particular attempt to compile a program.  Note that
diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 908782b..094deeb 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -552,6 +552,10 @@ brw_stage_prog_data_compare(const void *in_a, const void 
*in_b)
if (memcmp(a-pull_param, b-pull_param, a-nr_pull_params * sizeof(void 
*)))
   return false;
 
+   if (memcmp(a-image_param, b-image_param,
+  a-nr_image_params * sizeof(struct brw_image_param)))
+  return false;
+
return true;
 }
 
@@ -562,4 +566,5 @@ brw_stage_prog_data_free(const void *p)
 
ralloc_free(prog_data-param);
ralloc_free(prog_data-pull_param);
+   ralloc_free(prog_data-image_param);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
index 8dbd1e8..5583bfd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
@@ -61,11 +61,15 @@ do_gs_prog(struct brw_context *brw,
 
/* We also upload clip plane data as uniforms */
param_count += MAX_CLIP_PLANES * 4;
+   param_count += gs-NumImages * BRW_IMAGE_PARAM_SIZE;
 
c.prog_data.base.base.param =
   rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.base.pull_param =
   rzalloc_array(NULL, const float *, param_count);
+   c.prog_data.base.base.image_param =
+  rzalloc_array(NULL, struct brw_image_param, gs-NumImages);
+   c.prog_data.base.base.nr_image_params = gs-NumImages;
 
if (gp-program.OutputType == GL_POINTS) {
   /* When the output type is points, the geometry shader may output data
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
b/src/mesa/drivers/dri/i965/brw_vs.c
index e9f92d4..8118b7e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -231,8 +231,9 @@ do_vs_prog(struct brw_context *brw,
* case being a float value that gets blown up to a vec4, so be
* conservative here.

[Mesa-dev] [PATCH 08/25] i965/gen7: Add missing defines for render cache messages.

2013-12-02 Thread Francisco Jerez
And remove duplicated definition of OWORD_DUAL_BLOCK_WRITE.
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 8 +++-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 2 +-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index b73e8d0..67a2aaa 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -1201,7 +1201,13 @@ enum brw_message_target {
 #define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_UNORM_WRITE   14
 
 /* GEN7 */
-#define GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE  10
+#define GEN7_DATAPORT_RC_MEDIA_BLOCK_READ   4
+#define GEN7_DATAPORT_RC_TYPED_SURFACE_READ 5
+#define GEN7_DATAPORT_RC_TYPED_ATOMIC_OP6
+#define GEN7_DATAPORT_RC_MEMORY_FENCE   7
+#define GEN7_DATAPORT_RC_MEDIA_BLOCK_WRITE  10
+#define GEN7_DATAPORT_RC_RENDER_TARGET_WRITE12
+#define GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE13
 #define GEN7_DATAPORT_DC_OWORD_BLOCK_READ   0
 #define GEN7_DATAPORT_DC_UNALIGNED_OWORD_BLOCK_READ 1
 #define GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_READ  2
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a0c44fe..83a2a27 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -728,7 +728,7 @@ vec4_generator::generate_scratch_write(vec4_instruction 
*inst,
uint32_t msg_type;
 
if (brw-gen = 7)
-  msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
+  msg_type = GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE;
else if (brw-gen == 6)
   msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
else
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/25] i965: Add helper functions to calculate the slice pitch of an array or 3D miptree.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c| 51 +--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 10 ++
 2 files changed, 50 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 2c81eed..d13d855 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -216,6 +216,45 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
}
 }
 
+unsigned
+brw_miptree_get_horizontal_slice_pitch(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   unsigned level)
+{
+   if (mt-target == GL_TEXTURE_3D ||
+   (brw-gen == 4  mt-target == GL_TEXTURE_CUBE_MAP)) {
+  return ALIGN(minify(mt-physical_width0, level), mt-align_w);
+   } else {
+  return 0;
+   }
+}
+
+unsigned
+brw_miptree_get_vertical_slice_pitch(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ unsigned level)
+{
+   if (mt-target == GL_TEXTURE_3D ||
+   (brw-gen == 4  mt-target == GL_TEXTURE_CUBE_MAP)) {
+  return ALIGN(minify(mt-physical_height0, level), mt-align_h);
+
+   } else {
+  const unsigned h0 = ALIGN(mt-physical_height0, mt-align_h);
+  const unsigned h1 = ALIGN(minify(mt-physical_height0, 1), mt-align_h);
+  unsigned qpitch;
+
+  if (mt-array_spacing_lod0)
+ qpitch = h0;
+  else
+ qpitch = (h0 + h1 + (brw-gen = 7 ? 12 : 11) * mt-align_h);
+
+  if (mt-compressed)
+ return qpitch / 4;
+  else
+ return qpitch;
+   }
+}
+
 static void
 align_cube(struct intel_mipmap_tree *mt)
 {
@@ -232,17 +271,7 @@ static void
 brw_miptree_layout_texture_array(struct brw_context *brw,
 struct intel_mipmap_tree *mt)
 {
-   unsigned qpitch = 0;
-   int h0, h1;
-
-   h0 = ALIGN(mt-physical_height0, mt-align_h);
-   h1 = ALIGN(minify(mt-physical_height0, 1), mt-align_h);
-   if (mt-array_spacing_lod0)
-  qpitch = h0;
-   else
-  qpitch = (h0 + h1 + (brw-gen = 7 ? 12 : 11) * mt-align_h);
-   if (mt-compressed)
-  qpitch /= 4;
+   const unsigned qpitch = brw_miptree_get_vertical_slice_pitch(brw, mt, 0);
 
brw_miptree_layout_2d(mt);
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 8777a8c..34bf384 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -707,6 +707,16 @@ void
 intel_miptree_upsample(struct brw_context *brw,
struct intel_mipmap_tree *mt);
 
+unsigned
+brw_miptree_get_horizontal_slice_pitch(struct brw_context *brw,
+   struct intel_mipmap_tree *mt,
+   unsigned level);
+
+unsigned
+brw_miptree_get_vertical_slice_pitch(struct brw_context *brw,
+ struct intel_mipmap_tree *mt,
+ unsigned level);
+
 void brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt);
 
 void *intel_miptree_map_raw(struct brw_context *brw,
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 06/25] i965: Hook up image state upload.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_context.h  |  2 +
 src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 24 
 src/mesa/drivers/dri/i965/brw_state.h|  3 ++
 src/mesa/drivers/dri/i965/brw_state_upload.c |  6 +++
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 24 
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 50 
 6 files changed, 109 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index dc606c0f..4586005 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -181,6 +181,7 @@ enum brw_state_id {
BRW_STATE_STATS_WM,
BRW_STATE_UNIFORM_BUFFER,
BRW_STATE_ATOMIC_BUFFER,
+   BRW_STATE_IMAGE_UNITS,
BRW_STATE_META_IN_PROGRESS,
BRW_STATE_INTERPOLATION_MAP,
BRW_STATE_PUSH_CONSTANT_ALLOCATION,
@@ -220,6 +221,7 @@ enum brw_state_id {
 #define BRW_NEW_STATS_WM   (1  BRW_STATE_STATS_WM)
 #define BRW_NEW_UNIFORM_BUFFER  (1  BRW_STATE_UNIFORM_BUFFER)
 #define BRW_NEW_ATOMIC_BUFFER   (1  BRW_STATE_ATOMIC_BUFFER)
+#define BRW_NEW_IMAGE_UNITS (1  BRW_STATE_IMAGE_UNITS)
 #define BRW_NEW_META_IN_PROGRESS(1  BRW_STATE_META_IN_PROGRESS)
 #define BRW_NEW_INTERPOLATION_MAP   (1  BRW_STATE_INTERPOLATION_MAP)
 #define BRW_NEW_PUSH_CONSTANT_ALLOCATION (1  
BRW_STATE_PUSH_CONSTANT_ALLOCATION)
diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
index 5661941..6db061d 100644
--- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
@@ -110,3 +110,27 @@ const struct brw_tracked_state brw_gs_abo_surfaces = {
},
.emit = brw_upload_gs_abo_surfaces,
 };
+
+static void
+brw_upload_gs_image_surfaces(struct brw_context *brw)
+{
+   struct gl_context *ctx = brw-ctx;
+   /* _NEW_PROGRAM */
+   struct gl_shader_program *prog = ctx-Shader.CurrentGeometryProgram;
+
+   if (prog) {
+  /* CACHE_NEW_GS_PROG */
+  brw_upload_image_surfaces(brw, 
prog-_LinkedShaders[MESA_SHADER_GEOMETRY],
+brw-gs.base, brw-gs.prog_data-base.base,
+brw-gs.prog_data-base.base.image_param);
+   }
+}
+
+const struct brw_tracked_state brw_gs_image_surfaces = {
+   .dirty = {
+  .mesa = _NEW_PROGRAM,
+  .brw = BRW_NEW_BATCH | BRW_NEW_IMAGE_UNITS,
+  .cache = CACHE_NEW_GS_PROG,
+   },
+   .emit = brw_upload_gs_image_surfaces,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 2a4b23c..b8e0ca1 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -72,8 +72,10 @@ extern const struct brw_tracked_state brw_vs_samplers;
 extern const struct brw_tracked_state brw_gs_samplers;
 extern const struct brw_tracked_state brw_vs_ubo_surfaces;
 extern const struct brw_tracked_state brw_vs_abo_surfaces;
+extern const struct brw_tracked_state brw_vs_image_surfaces;
 extern const struct brw_tracked_state brw_gs_ubo_surfaces;
 extern const struct brw_tracked_state brw_gs_abo_surfaces;
+extern const struct brw_tracked_state brw_gs_image_surfaces;
 extern const struct brw_tracked_state brw_vs_unit;
 extern const struct brw_tracked_state brw_gs_prog;
 extern const struct brw_tracked_state brw_wm_prog;
@@ -84,6 +86,7 @@ extern const struct brw_tracked_state brw_gs_binding_table;
 extern const struct brw_tracked_state brw_vs_binding_table;
 extern const struct brw_tracked_state brw_wm_ubo_surfaces;
 extern const struct brw_tracked_state brw_wm_abo_surfaces;
+extern const struct brw_tracked_state brw_wm_image_surfaces;
 extern const struct brw_tracked_state brw_wm_unit;
 extern const struct brw_tracked_state brw_interpolation_map;
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 666af34..d11ab14 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -195,6 +195,10 @@ static const struct brw_tracked_state *gen7_atoms[] =
gen6_color_calc_state, /* must do before cc unit */
gen6_depth_stencil_state,  /* must do before cc unit */
 
+   brw_vs_image_surfaces, /* Before vs push/pull constants and binding table 
*/
+   brw_gs_image_surfaces, /* Before gs push/pull constants and binding table 
*/
+   brw_wm_image_surfaces, /* Before wm push/pull constants and binding table 
*/
+
gen6_vs_push_constants, /* Before vs_state */
gen7_gs_push_constants, /* Before gs_state */
gen6_wm_push_constants, /* Before wm_surfaces and constant_buffer */
@@ -308,6 +312,7 @@ void brw_init_state( struct brw_context *brw )
ctx-DriverFlags.NewRasterizerDiscard = BRW_NEW_RASTERIZER_DISCARD;
ctx-DriverFlags.NewUniformBuffer = BRW_NEW_UNIFORM_BUFFER;
ctx-DriverFlags.NewAtomicBuffer = BRW_NEW_ATOMIC_BUFFER;
+   

[Mesa-dev] [PATCH 20/25] i965/vec4: Hook up the surface visitor.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_vec4.h   |   9 --
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 109 +
 2 files changed, 19 insertions(+), 99 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 989c1a9..341bdbb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -531,13 +531,6 @@ public:
void emit_shader_time_write(enum shader_time_shader_type type,
src_reg value);
 
-   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-dst_reg dst, src_reg offset, src_reg src0,
-src_reg src1);
-
-   void emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
-  src_reg offset);
-
src_reg get_scratch_offset(vec4_instruction *inst,
  src_reg *reladdr, int reg_offset);
src_reg get_pull_constant_offset(vec4_instruction *inst,
@@ -563,8 +556,6 @@ public:
 
void dump_instruction(backend_instruction *inst);
 
-   void visit_atomic_counter_intrinsic(ir_call *ir);
-
 protected:
void emit_vertex();
void lower_attributes_to_hw_regs(const int *attribute_map,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index eceac91..e04fc69 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -22,6 +22,7 @@
  */
 
 #include brw_vec4.h
+#include brw_vec4_surface_visitor.h
 #include glsl/ir_uniform.h
 extern C {
 #include program/sampler.h
@@ -2188,45 +2189,6 @@ vec4_visitor::visit(ir_constant *ir)
 }
 
 void
-vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
-   ir_dereference *deref = static_castir_dereference *(
-  ir-actual_parameters.get_head());
-   ir_variable *location = deref-variable_referenced();
-   unsigned surf_index = (prog_data-base.binding_table.abo_start +
-  location-atomic.buffer_index);
-
-   /* Calculate the surface offset */
-   src_reg offset(this, glsl_type::uint_type);
-   ir_dereference_array *deref_array = deref-as_dereference_array();
-   if (deref_array) {
-  deref_array-array_index-accept(this);
-
-  src_reg tmp(this, glsl_type::uint_type);
-  emit(MUL(dst_reg(tmp), this-result, ATOMIC_COUNTER_SIZE));
-  emit(ADD(dst_reg(offset), tmp, location-atomic.offset));
-   } else {
-  offset = location-atomic.offset;
-   }
-
-   /* Emit the appropriate machine instruction */
-   const char *callee = ir-callee-function_name();
-   dst_reg dst = get_assignment_lhs(ir-return_deref, this);
-
-   if (!strcmp(__intrinsic_atomic_read, callee)) {
-  emit_untyped_surface_read(surf_index, dst, offset);
-
-   } else if (!strcmp(__intrinsic_atomic_increment, callee)) {
-  emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
-  src_reg(), src_reg());
-
-   } else if (!strcmp(__intrinsic_atomic_predecrement, callee)) {
-  emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
-  src_reg(), src_reg());
-   }
-}
-
-void
 vec4_visitor::visit(ir_call *ir)
 {
const char *callee = ir-callee-function_name();
@@ -2234,8 +2196,24 @@ vec4_visitor::visit(ir_call *ir)
if (!strcmp(__intrinsic_atomic_read, callee) ||
!strcmp(__intrinsic_atomic_increment, callee) ||
!strcmp(__intrinsic_atomic_predecrement, callee)) {
-  visit_atomic_counter_intrinsic(ir);
-   } else {
+  brw_vec4_surface_visitor(this).visit_atomic_counter_intrinsic(ir);
+
+} else if (!strcmp(__intrinsic_image_load, callee) ||
+  !strcmp(__intrinsic_image_store, callee) ||
+  !strcmp(__intrinsic_image_atomic_add, callee) ||
+  !strcmp(__intrinsic_image_atomic_min, callee) ||
+  !strcmp(__intrinsic_image_atomic_max, callee) ||
+  !strcmp(__intrinsic_image_atomic_and, callee) ||
+  !strcmp(__intrinsic_image_atomic_or, callee) ||
+  !strcmp(__intrinsic_image_atomic_xor, callee) ||
+  !strcmp(__intrinsic_image_atomic_exchange, callee) ||
+  !strcmp(__intrinsic_image_atomic_comp_swap, callee)) {
+  brw_vec4_surface_visitor(this).visit_image_intrinsic(ir);
+
+   } else if (!strcmp(__intrinsic_memory_barrier, callee)) {
+  brw_vec4_surface_visitor(this).visit_barrier_intrinsic(ir);
+
+  } else {
   assert(!Unsupported intrinsic.);
}
 }
@@ -2632,55 +2610,6 @@ vec4_visitor::visit(ir_end_primitive *)
 }
 
 void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-  dst_reg dst, src_reg offset,
-  src_reg src0, src_reg src1)
-{
-   unsigned mlen = 0;
-
-   /* Set the atomic operation offset. */
-   emit(MOV(brw_writemask(brw_uvec_mrf(8, mlen, 0), WRITEMASK_X), offset));
-   

[Mesa-dev] [PATCH 15/25] i965: Add visitor support for handling image uniforms.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 36 +++--
 src/mesa/drivers/dri/i965/brw_fs.h |  4 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 25 +++
 src/mesa/drivers/dri/i965/brw_shader.h |  4 +++
 src/mesa/drivers/dri/i965/brw_vec4.h   |  2 ++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 44 +-
 6 files changed, 90 insertions(+), 25 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index cce6ed0..ca152d1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -505,6 +505,8 @@ fs_visitor::type_size(const struct glsl_type *type)
   return 0;
case GLSL_TYPE_ATOMIC_UINT:
   return 0;
+   case GLSL_TYPE_IMAGE:
+  return BRW_IMAGE_PARAM_SIZE;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
@@ -871,6 +873,20 @@ fs_visitor::import_uniforms(fs_visitor *v)
this-nr_params_remap = v-nr_params_remap;
 }
 
+void
+fs_visitor::setup_vector_uniform_values(void *values, unsigned stride,
+unsigned size)
+{
+   static float zero = 0;
+
+   for (unsigned i = 0; i  size; ++i)
+  stage_prog_data-param[uniforms++] =
+ (float *)((char *)values + i * stride);
+
+   for (unsigned i = size; i  4; ++i)
+  stage_prog_data-param[uniforms++] = zero;
+}
+
 /* Our support for uniforms is piggy-backed on the struct
  * gl_fragment_program, because that's where the values actually
  * get stored, rather than in some global gl_shader_program uniform
@@ -887,7 +903,6 @@ fs_visitor::setup_uniform_values(ir_variable *ir)
 * order we'd walk the type, so walk the list of storage and find anything
 * with our name, or the prefix of a component that starts with our name.
 */
-   unsigned params_before = uniforms;
for (unsigned u = 0; u  shader_prog-NumUserUniformStorage; u++) {
   struct gl_uniform_storage *storage = shader_prog-UniformStorage[u];
 
@@ -898,17 +913,18 @@ fs_visitor::setup_uniform_values(ir_variable *ir)
  continue;
   }
 
-  unsigned slots = storage-type-component_slots();
-  if (storage-array_elements)
- slots *= storage-array_elements;
+  if (storage-type-is_image()) {
+ setup_image_uniform_values(storage);
 
-  for (unsigned i = 0; i  slots; i++)
- stage_prog_data-param[uniforms++] = storage-storage[i].f;
-   }
+  } else {
+ unsigned slots = storage-type-component_slots();
+ if (storage-array_elements)
+slots *= storage-array_elements;
 
-   /* Make sure we actually initialized the right amount of stuff here. */
-   assert(params_before + ir-type-component_slots() == uniforms);
-   (void)params_before;
+ for (unsigned i = 0; i  slots; i++)
+stage_prog_data-param[uniforms++] = storage-storage[i].f;
+  }
+   }
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 7bfa9fd..eb71676 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -434,8 +434,12 @@ public:
fs_reg get_timestamp();
 
struct brw_reg interp_reg(int location, int channel);
+
+   virtual void setup_vector_uniform_values(void *values, unsigned stride,
+unsigned size);
void setup_uniform_values(ir_variable *ir);
void setup_builtin_uniform_values(ir_variable *ir);
+
int implied_mrf_writes(fs_inst *inst);
 
void dump_instruction(backend_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index dc6d35e..d903bb5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -310,6 +310,8 @@ brw_type_for_base_type(const struct glsl_type *type)
* way to trip up if we don't.
*/
   return BRW_REGISTER_TYPE_UD;
+   case GLSL_TYPE_IMAGE:
+  return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
@@ -761,3 +763,26 @@ 
backend_visitor::assign_common_binding_table_offsets(uint32_t next_binding_table
 
/* prog_data-base.binding_table.size will be set by brw_mark_surface_used. 
*/
 }
+
+void
+backend_visitor::setup_image_uniform_values(const gl_uniform_storage *storage)
+{
+   const unsigned stage = _mesa_program_target_to_index(prog-Target);
+
+   for (unsigned i = 0; i  MAX2(storage-array_elements, 1); i++) {
+  const unsigned image_idx = storage-image[stage].index + i;
+  brw_image_param *param = stage_prog_data-image_param[image_idx];
+
+  param-surface_idx =
+ stage_prog_data-binding_table.image_start + image_idx;
+
+  setup_vector_uniform_values(param-surface_idx, sizeof(uint32_t), 1);
+  setup_vector_uniform_values(param-offset, sizeof(uint32_t), 2);
+  

[Mesa-dev] [PATCH 16/25] i965/fs: Obtain atomic counter locations by recursing through the visitor.

2013-12-02 Thread Francisco Jerez
Until now atomic counter built-ins were handled in a way that
prevented the visitor from encountering atomic counter IR variables
and dereferences directly.  In the new surface lowering code it's
going to be more convenient to be able to call back into the visitor
to let it handle the ugly details of atomic counter array
dereferences, and it will make sharing the rest of the atomic
intrinsic handling code easier.
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 102 ---
 1 file changed, 60 insertions(+), 42 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index b5957c6..d65809f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -102,34 +102,40 @@ fs_visitor::visit(ir_variable *ir)
 }
   }
} else if (ir-mode == ir_var_uniform) {
-  int param_index = uniforms;
-
-  /* Thanks to the lower_ubo_reference pass, we will see only
-   * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-   * variables, so no need for them to be in variable_ht.
-   *
-   * Atomic counters take no uniform storage, no need to do
-   * anything here.
-   */
-  if (ir-is_in_uniform_block() || ir-type-contains_atomic())
+  if (ir-is_in_uniform_block()) {
+ /* Thanks to the lower_ubo_reference pass, we will see only
+  * ir_binop_ubo_load expressions and not ir_dereference_variable for 
UBO
+  * variables, so no need for them to be in variable_ht.
+  */
  return;
 
-  if (dispatch_width == 16) {
-if (!variable_storage(ir)) {
-   fail(Failed to find uniform '%s' in 16-wide\n, ir-name);
-}
-return;
-  }
+  } else if (ir-type-contains_atomic()) {
+ reg = new(this-mem_ctx) fs_reg(ir-atomic.offset);
+
+ brw_mark_surface_used(stage_prog_data,
+   stage_prog_data-binding_table.abo_start +
+   ir-atomic.buffer_index);
 
-  param_size[param_index] = type_size(ir-type);
-  if (!strncmp(ir-name, gl_, 3)) {
-setup_builtin_uniform_values(ir);
   } else {
-setup_uniform_values(ir);
-  }
+ int param_index = uniforms;
 
-  reg = new(this-mem_ctx) fs_reg(UNIFORM, param_index);
-  reg-type = brw_type_for_base_type(ir-type);
+ if (dispatch_width == 16) {
+if (!variable_storage(ir)) {
+   fail(Failed to find uniform '%s' in 16-wide\n, ir-name);
+}
+return;
+ }
+
+ param_size[param_index] = type_size(ir-type);
+ if (!strncmp(ir-name, gl_, 3)) {
+setup_builtin_uniform_values(ir);
+ } else {
+setup_uniform_values(ir);
+ }
+
+ reg = new(this-mem_ctx) fs_reg(UNIFORM, param_index);
+ reg-type = brw_type_for_base_type(ir-type);
+  }
 
} else if (ir-mode == ir_var_system_value) {
   if (ir-location == SYSTEM_VALUE_SAMPLE_POS) {
@@ -182,31 +188,43 @@ fs_visitor::visit(ir_dereference_array *ir)
src = this-result;
src.type = brw_type_for_base_type(ir-type);
 
-   if (constant_index) {
-  assert(src.file == UNIFORM || src.file == GRF);
-  src.reg_offset += constant_index-value.i[0] * element_size;
-   } else {
-  /* Variable index array dereference.  We attach the variable index
-   * component to the reg as a pointer to a register containing the
-   * offset.  Currently only uniform arrays are supported in this patch,
-   * and that reladdr pointer is resolved by
-   * move_uniform_array_access_to_pull_constants().  All other array types
-   * are lowered by lower_variable_index_to_cond_assign().
-   */
+   if (ir-array-type-contains_atomic()) {
+  fs_reg tmp(this, glsl_type::uint_type);
+
   ir-array_index-accept(this);
 
-  fs_reg index_reg;
-  index_reg = fs_reg(this, glsl_type::int_type);
-  emit(BRW_OPCODE_MUL, index_reg, this-result, fs_reg(element_size));
+  emit(MUL(tmp, this-result, ATOMIC_COUNTER_SIZE));
+  emit(ADD(tmp, tmp, src));
+  this-result = tmp;
+
+   } else {
+  if (constant_index) {
+ assert(src.file == UNIFORM || src.file == GRF);
+ src.reg_offset += constant_index-value.i[0] * element_size;
+  } else {
+ /* Variable index array dereference.  We attach the variable index
+  * component to the reg as a pointer to a register containing the
+  * offset.  Currently only uniform arrays are supported in this patch,
+  * and that reladdr pointer is resolved by
+  * move_uniform_array_access_to_pull_constants().  All other array 
types
+  * are lowered by lower_variable_index_to_cond_assign().
+  */
+ ir-array_index-accept(this);
+
+ fs_reg index_reg;
+ index_reg = fs_reg(this, glsl_type::int_type);
+   

[Mesa-dev] [PATCH 13/25] i965: Add memory fence opcode.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_defines.h  |  2 +
 src/mesa/drivers/dri/i965/brw_eu.h   |  4 ++
 src/mesa/drivers/dri/i965/brw_eu_emit.c  | 69 
 src/mesa/drivers/dri/i965/brw_fs.cpp |  2 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |  4 ++
 src/mesa/drivers/dri/i965/brw_shader.cpp |  1 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp   |  2 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  4 ++
 8 files changed, 88 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 631473a..9e51e2c 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -784,6 +784,8 @@ enum opcode {
SHADER_OPCODE_TYPED_SURFACE_READ,
SHADER_OPCODE_TYPED_SURFACE_WRITE,
 
+   SHADER_OPCODE_MEMORY_FENCE,
+
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index 17822ce..a47c730 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -408,6 +408,10 @@ brw_typed_surface_write(struct brw_compile *p,
 unsigned msg_length,
 unsigned num_channels);
 
+void
+brw_memory_fence(struct brw_compile *p,
+ struct brw_reg mrf);
+
 /*** 
  * brw_eu_util.c:
  */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 772be7a..3ee86c6 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2919,6 +2919,75 @@ brw_typed_surface_write(struct brw_compile *p,
brw_send_indirect_message(p, sfid, dst, mrf, desc);
 }
 
+static void
+brw_set_memory_fence_message(struct brw_compile *p,
+ struct brw_instruction *insn,
+ enum brw_message_target sfid,
+ bool commit_enable)
+{
+   brw_set_message_descriptor(p, insn, sfid,
+  1 /* message length */,
+  (commit_enable ? 1 : 0) /* response length */,
+  true /* header present */,
+  false);
+
+   switch (sfid) {
+   case GEN6_SFID_DATAPORT_RENDER_CACHE:
+  insn-bits3.gen7_dp.msg_type = GEN7_DATAPORT_RC_MEMORY_FENCE;
+  break;
+   case GEN7_SFID_DATAPORT_DATA_CACHE:
+  insn-bits3.gen7_dp.msg_type = GEN7_DATAPORT_DC_MEMORY_FENCE;
+  break;
+   default:
+  unreachable();
+   }
+
+   if (commit_enable)
+  insn-bits3.ud |= 1  13;
+}
+
+void
+brw_memory_fence(struct brw_compile *p,
+ struct brw_reg mrf)
+{
+   const bool commit_enable = !p-brw-is_haswell;
+   struct brw_instruction *insn;
+
+   /* Set mrf as destination for dependency tracking, the MEMORY_FENCE
+* message doesn't write anything back.
+*/
+   insn = next_insn(p, BRW_OPCODE_SEND);
+   mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
+   brw_set_dest(p, insn, mrf);
+   brw_set_src0(p, insn, mrf);
+   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
+commit_enable);
+
+   if (!p-brw-is_haswell) {
+  /* IVB does typed surface access through the render cache, so we
+   * need to flush that too.  Use a different register so both
+   * flushes can be pipelined by the hardware.
+   */
+  insn = next_insn(p, BRW_OPCODE_SEND);
+  brw_set_dest(p, insn, offset(mrf, 1));
+  brw_set_src0(p, insn, offset(mrf, 1));
+  brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
+   commit_enable);
+
+  /* Now write the response of the second message into the
+   * response of the first to trigger a pipeline stall -- This way
+   * future render and data cache messages will be properly
+   * ordered with respect to past data and render cache messages
+   * respectively.
+   */
+  brw_push_insn_state(p);
+  brw_set_compression_control(p, BRW_COMPRESSION_NONE);
+  brw_set_mask_control(p, BRW_MASK_DISABLE);
+  brw_MOV(p, mrf, offset(mrf, 1));
+  brw_pop_insn_state(p);
+   }
+}
+
 /**
  * This instruction is generated as a single-channel align1 instruction by
  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 20cb4b9..cce6ed0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -786,6 +786,8 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
   return 0;
+   case SHADER_OPCODE_MEMORY_FENCE:
+  return 1;
default:
   assert(!not 

[Mesa-dev] [PATCH 10/25] i965: Simplify generator code for untyped surface messages.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  9 --
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 38 --
 src/mesa/drivers/dri/i965/brw_vec4.h |  9 --
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 40 
 4 files changed, 12 insertions(+), 84 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 4ada075..7bfa9fd 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -601,15 +601,6 @@ private:
  struct brw_reg offset,
  struct brw_reg value);
 
-   void generate_untyped_atomic(fs_inst *inst,
-struct brw_reg dst,
-struct brw_reg atomic_op,
-struct brw_reg surf_index);
-
-   void generate_untyped_surface_read(fs_inst *inst,
-  struct brw_reg dst,
-  struct brw_reg surf_index);
-
void patch_discard_jumps_to_fb_writes();
 
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 4eb651f..0d50051 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1255,36 +1255,6 @@ fs_generator::generate_shader_time_add(fs_inst *inst,
 }
 
 void
-fs_generator::generate_untyped_atomic(fs_inst *inst, struct brw_reg dst,
-  struct brw_reg atomic_op,
-  struct brw_reg surf_index)
-{
-   assert(atomic_op.file == BRW_IMMEDIATE_VALUE 
-  atomic_op.type == BRW_REGISTER_TYPE_UD 
-  surf_index.file == BRW_IMMEDIATE_VALUE 
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
-   brw_untyped_atomic(p, dst, brw_message_reg(inst-base_mrf),
-  surf_index, atomic_op.dw1.ud,
-  inst-mlen, true);
-
-   brw_mark_surface_used(c-prog_data.base, surf_index.dw1.ud);
-}
-
-void
-fs_generator::generate_untyped_surface_read(fs_inst *inst, struct brw_reg dst,
-struct brw_reg surf_index)
-{
-   assert(surf_index.file == BRW_IMMEDIATE_VALUE 
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
-   brw_untyped_surface_read(p, dst, brw_message_reg(inst-base_mrf),
-surf_index, inst-mlen, 1);
-
-   brw_mark_surface_used(c-prog_data.base, surf_index.dw1.ud);
-}
-
-void
 fs_generator::generate_code(exec_list *instructions)
 {
int last_native_insn_offset = p-next_insn_offset;
@@ -1709,11 +1679,15 @@ fs_generator::generate_code(exec_list *instructions)
  break;
 
   case SHADER_OPCODE_UNTYPED_ATOMIC:
- generate_untyped_atomic(inst, dst, src[0], src[1]);
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_atomic(p, dst, brw_message_reg(inst-base_mrf),
+src[0], src[1].dw1.ud, inst-mlen, true);
  break;
 
   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- generate_untyped_surface_read(inst, dst, src[0]);
+ assert(src[1].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_surface_read(p, dst, brw_message_reg(inst-base_mrf),
+  src[0], inst-mlen, src[1].dw1.ud);
  break;
 
   case FS_OPCODE_SET_SIMD4X2_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
b/src/mesa/drivers/dri/i965/brw_vec4.h
index 355d497..7e07929 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -666,15 +666,6 @@ private:
void generate_unpack_flags(vec4_instruction *inst,
   struct brw_reg dst);
 
-   void generate_untyped_atomic(vec4_instruction *inst,
-struct brw_reg dst,
-struct brw_reg atomic_op,
-struct brw_reg surf_index);
-
-   void generate_untyped_surface_read(vec4_instruction *inst,
-  struct brw_reg dst,
-  struct brw_reg surf_index);
-
struct brw_context *brw;
 
struct brw_compile *p;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index 3ac45a9..d29c3dd 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -847,38 +847,6 @@ 
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
brw_mark_surface_used(prog_data-base, surf_index.dw1.ud);
 }
 
-void
-vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
-struct brw_reg dst,
-struct brw_reg atomic_op,
-struct brw_reg 

[Mesa-dev] [PATCH 09/25] i965/gen7: Fix the untyped surface messages to deal with indirect surface access.

2013-12-02 Thread Francisco Jerez
Change brw_untyped_atomic() and brw_untyped_surface_read() to take the
surface index as a register instead of a constant, construct the
message descriptor dynamically by OR'ing the surface index and other
descriptor bits together and use the non-immediate variant of SEND to
submit the surface message.
---
 src/mesa/drivers/dri/i965/brw_eu.h   |  18 +-
 src/mesa/drivers/dri/i965/brw_eu_emit.c  | 200 +++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   |   7 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |   7 +-
 4 files changed, 147 insertions(+), 85 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index a6a65ca..45b421b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -360,20 +360,20 @@ void brw_CMP(struct brw_compile *p,
 
 void
 brw_untyped_atomic(struct brw_compile *p,
-   struct brw_reg dest,
+   struct brw_reg dst,
struct brw_reg mrf,
-   GLuint atomic_op,
-   GLuint bind_table_index,
-   GLuint msg_length,
-   GLuint response_length);
+   struct brw_reg surface,
+   unsigned atomic_op,
+   unsigned msg_length,
+   bool response_expected);
 
 void
 brw_untyped_surface_read(struct brw_compile *p,
- struct brw_reg dest,
+ struct brw_reg dst,
  struct brw_reg mrf,
- GLuint bind_table_index,
- GLuint msg_length,
- GLuint response_length);
+ struct brw_reg surface,
+ unsigned msg_length,
+ unsigned num_channels);
 
 /*** 
  * brw_eu_util.c:
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index cc093e0..b94a6d1 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2527,23 +2527,87 @@ brw_svb_write(struct brw_compile *p,
 send_commit_msg); /* send_commit_msg */
 }
 
+static struct brw_instruction *
+brw_load_indirect_message_descriptor(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg src,
+ unsigned msg_length,
+ unsigned response_length,
+ bool header_present)
+{
+   struct brw_instruction *insn;
+
+   brw_push_insn_state(p);
+   brw_set_access_mode(p, BRW_ALIGN_1);
+   brw_set_mask_control(p, BRW_MASK_DISABLE);
+   brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+
+   if (src.file == BRW_IMMEDIATE_VALUE) {
+  insn = brw_MOV(p, dst, brw_imm_ud(src.dw1.ud));
+   } else {
+  struct brw_reg tmp = suboffset(vec1(retype(src, BRW_REGISTER_TYPE_UD)),
+ BRW_GET_SWZ(src.dw1.bits.swizzle, 0));
+  insn = brw_OR(p, dst, tmp, brw_imm_ud(0));
+   }
+
+   insn-bits3.generic_gen5.msg_length = msg_length;
+   insn-bits3.generic_gen5.response_length = response_length;
+   insn-bits3.generic_gen5.header_present = header_present;
+
+   brw_pop_insn_state(p);
+
+   return insn;
+}
+
+static struct brw_instruction *
+brw_send_indirect_message(struct brw_compile *p,
+  unsigned sfid,
+  struct brw_reg dst,
+  struct brw_reg mrf,
+  struct brw_reg desc)
+{
+   /* Due to a hardware limitation the message descriptor desc MUST be
+* stored in a0.0.  That means that there's only room for one
+* descriptor and the surface indices of different channels in the
+* same SIMD thread cannot diverge.  That's OK for the moment
+* because OpenGL requires image (and atomic counter) array
+* indexing to be dynamically uniform.
+*/
+   struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
+
+   brw_set_dest(p, insn, retype(dst, BRW_REGISTER_TYPE_UD));
+   brw_set_src0(p, insn, retype(mrf, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, insn, retype(desc, BRW_REGISTER_TYPE_UD));
+
+   /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
+   insn-header.destreg__conditionalmod = sfid;
+
+   return insn;
+}
+
+static unsigned
+brw_surface_payload_size(struct brw_compile *p,
+ unsigned num_channels,
+ bool has_simd4x2,
+ bool has_simd16)
+{
+   if (has_simd4x2  p-current-header.access_mode == BRW_ALIGN_16)
+  return 1;
+   else if (has_simd16  p-compressed)
+  return 2 * num_channels;
+   else
+  return num_channels;
+}
+
 static void
 brw_set_dp_untyped_atomic_message(struct 

[Mesa-dev] [PATCH 12/25] i965: Add typed surface access messages.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_defines.h|   4 +
 src/mesa/drivers/dri/i965/brw_eu.h |  25 
 src/mesa/drivers/dri/i965/brw_eu_emit.c| 166 +
 src/mesa/drivers/dri/i965/brw_fs.cpp   |   3 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp |  18 +++
 .../drivers/dri/i965/brw_schedule_instructions.cpp |   3 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   |   2 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp |   3 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp   |  18 +++
 9 files changed, 242 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 988b07e..631473a 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -780,6 +780,10 @@ enum opcode {
SHADER_OPCODE_UNTYPED_SURFACE_READ,
SHADER_OPCODE_UNTYPED_SURFACE_WRITE,
 
+   SHADER_OPCODE_TYPED_ATOMIC,
+   SHADER_OPCODE_TYPED_SURFACE_READ,
+   SHADER_OPCODE_TYPED_SURFACE_WRITE,
+
SHADER_OPCODE_GEN4_SCRATCH_READ,
SHADER_OPCODE_GEN4_SCRATCH_WRITE,
SHADER_OPCODE_GEN7_SCRATCH_READ,
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h 
b/src/mesa/drivers/dri/i965/brw_eu.h
index e17dc49..17822ce 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -383,6 +383,31 @@ brw_untyped_surface_write(struct brw_compile *p,
   unsigned msg_length,
   unsigned num_channels);
 
+void
+brw_typed_atomic(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg mrf,
+ struct brw_reg surface,
+ unsigned atomic_op,
+ unsigned msg_length,
+ bool response_expected);
+
+void
+brw_typed_surface_read(struct brw_compile *p,
+   struct brw_reg dst,
+   struct brw_reg mrf,
+   struct brw_reg surface,
+   unsigned msg_length,
+   unsigned num_channels);
+
+void
+brw_typed_surface_write(struct brw_compile *p,
+struct brw_reg dst,
+struct brw_reg mrf,
+struct brw_reg surface,
+unsigned msg_length,
+unsigned num_channels);
+
 /*** 
  * brw_eu_util.c:
  */
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c 
b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 13dd59a..772be7a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -2753,6 +2753,172 @@ brw_untyped_surface_write(struct brw_compile *p,
brw_send_indirect_message(p, sfid, dst, mrf, desc);
 }
 
+static void
+brw_set_dp_typed_atomic_message(struct brw_compile *p,
+struct brw_instruction *insn,
+unsigned atomic_op,
+bool response_expected)
+{
+   const unsigned access_mode = p-current-header.access_mode;
+   const unsigned compression_control = p-current-header.compression_control;
+
+   if (p-brw-is_haswell) {
+  if (access_mode == BRW_ALIGN_1) {
+ if (compression_control == GEN6_COMPRESSION_2Q)
+insn-bits3.ud |= 1  12; /* Use high 8 slots of the sample mask 
*/
+
+ insn-bits3.gen7_dp.msg_type =
+HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP;
+  } else {
+ insn-bits3.gen7_dp.msg_type =
+HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2;
+  }
+
+   } else {
+  insn-bits3.gen7_dp.msg_type = GEN7_DATAPORT_RC_TYPED_ATOMIC_OP;
+
+  if (compression_control == GEN6_COMPRESSION_2Q)
+ insn-bits3.ud |= 1  12; /* Use high 8 slots of the sample mask */
+   }
+
+   if (response_expected)
+  insn-bits3.ud |= 1  13; /* Return data expected */
+
+   insn-bits3.ud |= atomic_op  8;
+}
+
+void
+brw_typed_atomic(struct brw_compile *p,
+ struct brw_reg dst,
+ struct brw_reg mrf,
+ struct brw_reg surface,
+ unsigned atomic_op,
+ unsigned msg_length,
+ bool response_expected) {
+   const unsigned sfid = (p-brw-is_haswell ? HSW_SFID_DATAPORT_DATA_CACHE_1 :
+  GEN6_SFID_DATAPORT_RENDER_CACHE);
+   struct brw_reg desc = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+   struct brw_instruction *insn;
+
+   insn = brw_load_indirect_message_descriptor(
+  p, desc, surface, msg_length,
+  brw_surface_payload_size(p, response_expected, p-brw-is_haswell, 
false),
+  true);
+
+   brw_set_dp_typed_atomic_message(
+  p, insn, atomic_op, response_expected);
+
+   brw_send_indirect_message(p, sfid, dst, mrf, desc);
+}
+
+static void
+brw_set_dp_typed_surface_read_message(struct brw_compile *p,
+  

[Mesa-dev] [PATCH 19/25] i965/fs: Hook up the surface visitor.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_fs.h   |   9 --
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 138 ---
 2 files changed, 18 insertions(+), 129 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index eb71676..ae4a6f5 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -414,13 +414,6 @@ public:
void emit_shader_time_write(enum shader_time_shader_type type,
fs_reg value);
 
-   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-fs_reg dst, fs_reg offset, fs_reg src0,
-fs_reg src1);
-
-   void emit_untyped_surface_read(unsigned surf_index, fs_reg dst,
-  fs_reg offset);
-
bool try_rewrite_rhs_to_dst(ir_assignment *ir,
   fs_reg dst,
   fs_reg src,
@@ -444,8 +437,6 @@ public:
 
void dump_instruction(backend_instruction *inst);
 
-   void visit_atomic_counter_intrinsic(ir_call *ir);
-
struct gl_fragment_program *fp;
struct brw_wm_compile *c;
unsigned int sanity_param_count;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index d65809f..b674232 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -44,6 +44,7 @@ extern C {
 #include brw_wm.h
 }
 #include brw_fs.h
+#include brw_fs_surface_visitor.h
 #include main/uniforms.h
 #include glsl/glsl_types.h
 #include glsl/ir_optimization.h
@@ -2218,47 +2219,6 @@ fs_visitor::visit(ir_loop_jump *ir)
 }
 
 void
-fs_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
-   ir_dereference *deref = static_castir_dereference *(
-  ir-actual_parameters.get_head());
-   ir_variable *location = deref-variable_referenced();
-   unsigned surf_index = (c-prog_data.base.binding_table.abo_start +
-  location-atomic.buffer_index);
-
-   /* Calculate the surface offset */
-   fs_reg offset(this, glsl_type::uint_type);
-   ir_dereference_array *deref_array = deref-as_dereference_array();
-
-   if (deref_array) {
-  deref_array-array_index-accept(this);
-
-  fs_reg tmp(this, glsl_type::uint_type);
-  emit(MUL(tmp, this-result, ATOMIC_COUNTER_SIZE));
-  emit(ADD(offset, tmp, location-atomic.offset));
-   } else {
-  offset = location-atomic.offset;
-   }
-
-   /* Emit the appropriate machine instruction */
-   const char *callee = ir-callee-function_name();
-   ir-return_deref-accept(this);
-   fs_reg dst = this-result;
-
-   if (!strcmp(__intrinsic_atomic_read, callee)) {
-  emit_untyped_surface_read(surf_index, dst, offset);
-
-   } else if (!strcmp(__intrinsic_atomic_increment, callee)) {
-  emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
-  fs_reg(), fs_reg());
-
-   } else if (!strcmp(__intrinsic_atomic_predecrement, callee)) {
-  emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
-  fs_reg(), fs_reg());
-   }
-}
-
-void
 fs_visitor::visit(ir_call *ir)
 {
const char *callee = ir-callee-function_name();
@@ -2266,7 +2226,23 @@ fs_visitor::visit(ir_call *ir)
if (!strcmp(__intrinsic_atomic_read, callee) ||
!strcmp(__intrinsic_atomic_increment, callee) ||
!strcmp(__intrinsic_atomic_predecrement, callee)) {
-  visit_atomic_counter_intrinsic(ir);
+  brw_fs_surface_visitor(this).visit_atomic_counter_intrinsic(ir);
+
+   } else if (!strcmp(__intrinsic_image_load, callee) ||
+  !strcmp(__intrinsic_image_store, callee) ||
+  !strcmp(__intrinsic_image_atomic_add, callee) ||
+  !strcmp(__intrinsic_image_atomic_min, callee) ||
+  !strcmp(__intrinsic_image_atomic_max, callee) ||
+  !strcmp(__intrinsic_image_atomic_and, callee) ||
+  !strcmp(__intrinsic_image_atomic_or, callee) ||
+  !strcmp(__intrinsic_image_atomic_xor, callee) ||
+  !strcmp(__intrinsic_image_atomic_exchange, callee) ||
+  !strcmp(__intrinsic_image_atomic_comp_swap, callee)) {
+  brw_fs_surface_visitor(this).visit_image_intrinsic(ir);
+
+   } else if (!strcmp(__intrinsic_memory_barrier, callee)) {
+  brw_fs_surface_visitor(this).visit_barrier_intrinsic(ir);
+
} else {
   assert(!Unsupported intrinsic.);
}
@@ -2320,84 +2296,6 @@ fs_visitor::visit(ir_end_primitive *)
assert(!not reached);
 }
 
-void
-fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
-fs_reg dst, fs_reg offset, fs_reg src0,
-fs_reg src1)
-{
-   const unsigned operand_len = dispatch_width / 8;
-   unsigned mlen = 0;
-
-   /* Initialize the sample mask in the message header. */
-   emit(MOV(brw_uvec_mrf(8, mlen, 0), brw_imm_ud(0)))
-  

[Mesa-dev] [PATCH 03/25] i965/gen7: Implement surface state set-up for shader images.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_context.h   |  10 ++
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 175 ++
 2 files changed, 185 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 3f2edcf..0816912 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -978,6 +978,11 @@ struct brw_context
  struct gl_renderbuffer *rb,
  bool layered,
  unsigned unit);
+  void (*update_image_surface)(struct brw_context *brw,
+   struct gl_image_unit *u,
+   GLenum access,
+   uint32_t *surf_offset,
+   struct brw_image_param *param);
   void (*update_null_renderbuffer_surface)(struct brw_context *brw,
   unsigned unit);
 
@@ -1643,6 +1648,11 @@ void brw_upload_abo_surfaces(struct brw_context *brw,
  struct gl_shader_program *prog,
  struct brw_stage_state *stage_state,
  struct brw_stage_prog_data *prog_data);
+void brw_upload_image_surfaces(struct brw_context *brw,
+   struct gl_shader *shader,
+   struct brw_stage_state *stage_state,
+   struct brw_stage_prog_data *prog_data,
+   struct brw_image_param *params);
 
 /* brw_surface_formats.c */
 bool brw_is_hiz_depth_format(struct brw_context *ctx, gl_format format);
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 3dc0cb7..532a18c 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -564,6 +564,180 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
gen7_check_surface_setup(surf, true /* is_render_target */);
 }
 
+static uint32_t
+get_image_format(struct brw_context *brw, gl_format format)
+{
+   switch (format) {
+   case MESA_FORMAT_RGBA_UINT32:
+   case MESA_FORMAT_RGBA_INT32:
+   case MESA_FORMAT_RGBA_FLOAT32:
+  /* Fail...  We need to fall back to untyped surface access for
+   * all 128 bpp formats.
+   */
+  return BRW_SURFACEFORMAT_RAW;
+
+   case MESA_FORMAT_RGBA_UINT16:
+   case MESA_FORMAT_RGBA_INT16:
+   case MESA_FORMAT_RGBA_FLOAT16:
+   case MESA_FORMAT_RGBA_16:
+   case MESA_FORMAT_SIGNED_RGBA_16:
+   case MESA_FORMAT_RG_UINT32:
+   case MESA_FORMAT_RG_INT32:
+   case MESA_FORMAT_RG_FLOAT32:
+  /* HSW supports the R16G16B16A16_UINT format natively and
+   * handles the pixel packing, unpacking and type conversion in
+   * the shader for other 64 bpp formats.  IVB falls back to
+   * untyped.
+   */
+  return (brw-is_haswell ? BRW_SURFACEFORMAT_R16G16B16A16_UINT :
+  BRW_SURFACEFORMAT_RAW);
+
+   case MESA_FORMAT_RGBA_UINT8:
+   case MESA_FORMAT_RGBA_INT8:
+   case MESA_FORMAT_RGBA_REV:
+   case MESA_FORMAT_SIGNED_RGBA_REV:
+  /* HSW supports the R8G8B8A8_UINT format natively, type
+   * conversion to other formats is handled in the shader.  IVB
+   * uses R32_UINT and handles the pixel packing, unpacking and
+   * type conversion in the shader.
+   */
+  return (brw-is_haswell ? BRW_SURFACEFORMAT_R8G8B8A8_UINT :
+  BRW_SURFACEFORMAT_R32_UINT);
+
+   case MESA_FORMAT_RG_UINT16:
+   case MESA_FORMAT_RG_INT16:
+   case MESA_FORMAT_RG_FLOAT16:
+   case MESA_FORMAT_GR1616:
+   case MESA_FORMAT_SIGNED_GR1616:
+  /* HSW supports the R16G16_UINT format natively, type conversion
+   * to other formats is handled in the shader.  IVB uses R32_UINT
+   * and handles the pixel packing, unpacking and type conversion
+   * in the shader.
+   */
+  return (brw-is_haswell ? BRW_SURFACEFORMAT_R16G16_UINT :
+  BRW_SURFACEFORMAT_R32_UINT);
+
+   case MESA_FORMAT_ABGR2101010_UINT:
+   case MESA_FORMAT_ABGR2101010:
+   case MESA_FORMAT_R11_G11_B10_FLOAT:
+   case MESA_FORMAT_R_UINT32:
+  /* Neither the 2/10/10/10 nor the 11/11/10 packed formats are
+   * supported by the hardware.  Use R32_UINT and handle the pixel
+   * packing, unpacking, and type conversion in the shader.
+   */
+  return BRW_SURFACEFORMAT_R32_UINT;
+
+   case MESA_FORMAT_R_INT32:
+  return BRW_SURFACEFORMAT_R32_SINT;
+
+   case MESA_FORMAT_R_FLOAT32:
+  return BRW_SURFACEFORMAT_R32_FLOAT;
+
+   case MESA_FORMAT_RG_UINT8:
+   case MESA_FORMAT_RG_INT8:
+   case MESA_FORMAT_GR88:
+   case MESA_FORMAT_SIGNED_RG88_REV:
+  /* HSW supports the R8G8_UINT format natively, type conversion
+   * to other formats is handled in the shader.  IVB uses R16_UINT
+   * 

[Mesa-dev] [PATCH 14/25] i965: Update brw_instruction_name() with some recently added opcodes.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_shader.cpp | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 26300a6..dc6d35e 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -527,6 +527,26 @@ brw_instruction_name(enum opcode op)
case GS_OPCODE_SET_CHANNEL_MASKS:
   return set_channel_masks;
 
+   case SHADER_OPCODE_SHADER_TIME_ADD:
+  return shader_time_add;
+
+   case SHADER_OPCODE_UNTYPED_ATOMIC:
+  return untyped_atomic;
+   case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+  return untyped_surface_read;
+   case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+  return untyped_surface_write;
+
+   case SHADER_OPCODE_TYPED_ATOMIC:
+  return typed_atomic;
+   case SHADER_OPCODE_TYPED_SURFACE_READ:
+  return typed_surface_read;
+   case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+  return typed_surface_write;
+
+   case SHADER_OPCODE_MEMORY_FENCE:
+  return memory_fence;
+
default:
   /* Yes, this leaks.  It's in debug code, it should never occur, and if
* it does, you should just add the case to the list above.
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/25] i965/vec4: Obtain atomic counter locations by recursing through the visitor.

2013-12-02 Thread Francisco Jerez
Until now atomic counter built-ins were handled in a way that
prevented the visitor from encountering atomic counter IR variables
and dereferences directly.  In the new surface lowering code it's
going to be more convenient to be able to call back into the visitor
to let it handle the ugly details of atomic counter array
dereferences, and it will make sharing the rest of the atomic
intrinsic handling code easier.
---
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 110 +++--
 1 file changed, 65 insertions(+), 45 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 3e8043f..eceac91 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -973,27 +973,35 @@ vec4_visitor::visit(ir_variable *ir)
   break;
 
case ir_var_uniform:
-  reg = new(this-mem_ctx) dst_reg(UNIFORM, this-uniforms);
-
-  /* Thanks to the lower_ubo_reference pass, we will see only
-   * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
-   * variables, so no need for them to be in variable_ht.
-   *
-   * Atomic counters take no uniform storage, no need to do
-   * anything here.
-   */
-  if (ir-is_in_uniform_block() || ir-type-contains_atomic())
+  if (ir-is_in_uniform_block()) {
+ /* Thanks to the lower_ubo_reference pass, we will see only
+  * ir_binop_ubo_load expressions and not
+  * ir_dereference_variable for UBO variables, so no need for
+  * them to be in variable_ht.
+  */
  return;
 
-  /* Track how big the whole uniform variable is, in case we need to put a
-   * copy of its data into pull constants for array access.
-   */
-  this-uniform_size[this-uniforms] = type_size(ir-type);
+  } else if (ir-type-contains_atomic()) {
+ reg = new(this-mem_ctx) dst_reg(ir-atomic.offset);
+
+ brw_mark_surface_used(stage_prog_data,
+   stage_prog_data-binding_table.abo_start +
+   ir-atomic.buffer_index);
 
-  if (!strncmp(ir-name, gl_, 3)) {
-setup_builtin_uniform_values(ir);
   } else {
-setup_uniform_values(ir);
+ reg = new(this-mem_ctx) dst_reg(UNIFORM, this-uniforms);
+
+ /* Track how big the whole uniform variable is, in case we
+  * need to put a copy of its data into pull constants for
+  * array access.
+  */
+ this-uniform_size[this-uniforms] = type_size(ir-type);
+
+ if (!strncmp(ir-name, gl_, 3)) {
+setup_builtin_uniform_values(ir);
+ } else {
+setup_uniform_values(ir);
+ }
   }
   break;
 
@@ -1793,45 +1801,57 @@ vec4_visitor::visit(ir_dereference_array *ir)
ir-array-accept(this);
src = this-result;
 
-   if (constant_index) {
-  src.reg_offset += constant_index-value.i[0] * array_stride;
-   } else {
-  /* Variable index array dereference.  It eats the vec4 of the
-   * base of the array and an index that offsets the Mesa register
-   * index.
-   */
+   if (ir-array-type-contains_atomic()) {
+  src_reg tmp(this, glsl_type::uint_type);
+
   ir-array_index-accept(this);
 
-  src_reg index_reg;
+  emit(MUL(tmp, this-result, ATOMIC_COUNTER_SIZE));
+  emit(ADD(tmp, tmp, src));
+  this-result = tmp;
 
-  if (array_stride == 1) {
-index_reg = this-result;
+   } else {
+  if (constant_index) {
+ src.reg_offset += constant_index-value.i[0] * array_stride;
   } else {
-index_reg = src_reg(this, glsl_type::int_type);
+ /* Variable index array dereference.  It eats the vec4 of the
+  * base of the array and an index that offsets the Mesa register
+  * index.
+  */
+ ir-array_index-accept(this);
 
-emit(MUL(dst_reg(index_reg), this-result, src_reg(array_stride)));
-  }
+ src_reg index_reg;
 
-  if (src.reladdr) {
-src_reg temp = src_reg(this, glsl_type::int_type);
+ if (array_stride == 1) {
+index_reg = this-result;
+ } else {
+index_reg = src_reg(this, glsl_type::int_type);
 
-emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
+emit(MUL(dst_reg(index_reg), this-result, src_reg(array_stride)));
+ }
 
-index_reg = temp;
-  }
+ if (src.reladdr) {
+src_reg temp = src_reg(this, glsl_type::int_type);
 
-  src.reladdr = ralloc(mem_ctx, src_reg);
-  memcpy(src.reladdr, index_reg, sizeof(index_reg));
-   }
+emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
 
-   /* If the type is smaller than a vec4, replicate the last channel out. */
-   if (ir-type-is_scalar() || ir-type-is_vector() || ir-type-is_matrix())
-  src.swizzle = swizzle_for_size(ir-type-vector_elements);
-   else
-  

[Mesa-dev] [PATCH 22/25] i965/gen6+: Factor out PIPE_CONTROL submission from intel_batchbuffer_emit_mi_flush.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/intel_batchbuffer.c | 54 ---
 src/mesa/drivers/dri/i965/intel_batchbuffer.h |  2 +
 2 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 25aacd9..d11de49 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -633,6 +633,30 @@ intel_emit_post_sync_nonzero_flush(struct brw_context *brw)
brw-batch.need_workaround_flush = false;
 }
 
+void
+intel_batchbuffer_emit_pipe_control(struct brw_context *brw,
+unsigned bits)
+{
+   assert(brw-gen = 6);
+
+   if (brw-gen == 6) {
+  /* Hardware workaround: SNB B-Spec says:
+   *
+   * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
+   * Enable =1, a PIPE_CONTROL with any non-zero post-sync-op is
+   * required.
+   */
+  intel_emit_post_sync_nonzero_flush(brw);
+   }
+
+   BEGIN_BATCH(4);
+   OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
+   OUT_BATCH(bits);
+   OUT_BATCH(0); /* write address */
+   OUT_BATCH(0); /* write data */
+   ADVANCE_BATCH();
+}
+
 /* Emit a pipelined flush to either flush render and texture cache for
  * reading from a FBO-drawn texture, or flush so that frontbuffer
  * render appears on the screen in DRI1.
@@ -651,28 +675,14 @@ intel_batchbuffer_emit_mi_flush(struct brw_context *brw)
 OUT_BATCH(0);
 ADVANCE_BATCH();
   } else {
-if (brw-gen == 6) {
-   /* Hardware workaround: SNB B-Spec says:
-*
-* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache
-* Flush Enable =1, a PIPE_CONTROL with any non-zero
-* post-sync-op is required.
-*/
-   intel_emit_post_sync_nonzero_flush(brw);
-}
-
-BEGIN_BATCH(4);
-OUT_BATCH(_3DSTATE_PIPE_CONTROL | (4 - 2));
-OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |
-  PIPE_CONTROL_WRITE_FLUSH |
-  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
-   PIPE_CONTROL_VF_CACHE_INVALIDATE |
-  PIPE_CONTROL_TC_FLUSH |
-  PIPE_CONTROL_NO_WRITE |
-   PIPE_CONTROL_CS_STALL);
-OUT_BATCH(0); /* write address */
-OUT_BATCH(0); /* write data */
-ADVANCE_BATCH();
+ intel_batchbuffer_emit_pipe_control(
+brw, (PIPE_CONTROL_INSTRUCTION_FLUSH |
+  PIPE_CONTROL_WRITE_FLUSH |
+  PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+  PIPE_CONTROL_VF_CACHE_INVALIDATE |
+  PIPE_CONTROL_TC_FLUSH |
+  PIPE_CONTROL_NO_WRITE |
+  PIPE_CONTROL_CS_STALL));
   }
} else {
   BEGIN_BATCH(4);
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.h 
b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
index ac8eb7d..c561149 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.h
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.h
@@ -65,6 +65,8 @@ bool intel_batchbuffer_emit_reloc_fenced(struct brw_context 
*brw,
  uint32_t write_domain,
  uint32_t offset);
 void intel_batchbuffer_emit_mi_flush(struct brw_context *brw);
+void intel_batchbuffer_emit_pipe_control(struct brw_context *brw,
+ unsigned flush_bits);
 void intel_emit_post_sync_nonzero_flush(struct brw_context *brw);
 void intel_emit_depth_stall_flushes(struct brw_context *brw);
 void gen7_emit_vs_workaround_flush(struct brw_context *brw);
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 21/25] i965: Add missing PIPE_CONTROL invalidate flag to intel_reg.h.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/intel_reg.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/mesa/drivers/dri/i965/intel_reg.h 
b/src/mesa/drivers/dri/i965/intel_reg.h
index 13b68cc..23bf0ac 100644
--- a/src/mesa/drivers/dri/i965/intel_reg.h
+++ b/src/mesa/drivers/dri/i965/intel_reg.h
@@ -70,6 +70,7 @@
 #define PIPE_CONTROL_ISP_DIS   (1  9)
 #define PIPE_CONTROL_INTERRUPT_ENABLE  (1  8)
 /* GT */
+#define PIPE_CONTROL_DATA_CACHE_INVALIDATE (1  5)
 #define PIPE_CONTROL_VF_CACHE_INVALIDATE   (1  4)
 #define PIPE_CONTROL_CONST_CACHE_INVALIDATE(1  3)
 #define PIPE_CONTROL_STATE_CACHE_INVALIDATE(1  2)
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/25] i965/gen7: Define implementation constants for ARB_shader_image_load_store.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_context.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index 6de6759..ce8c311 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -429,6 +429,15 @@ brw_initialize_context_constants(struct brw_context *brw)
   ctx-Const.VertexProgram.MaxAtomicBuffers = BRW_MAX_ABO;
   ctx-Const.GeometryProgram.MaxAtomicBuffers = BRW_MAX_ABO;
   ctx-Const.MaxCombinedAtomicBuffers = 3 * BRW_MAX_ABO;
+
+  ctx-Const.FragmentProgram.MaxImageUniforms = MAX_UNIFORMS;
+  ctx-Const.VertexProgram.MaxImageUniforms = MAX_UNIFORMS;
+  ctx-Const.FragmentProgram.MaxImageUniforms = MAX_UNIFORMS;
+  ctx-Const.MaxImageUnits = BRW_MAX_IMAGES;
+  ctx-Const.MaxCombinedImageUnitsAndFragmentOutputs =
+ BRW_MAX_IMAGES + BRW_MAX_DRAW_BUFFERS;
+  ctx-Const.MaxImageSamples = 1;
+  ctx-Const.MaxCombinedImageUniforms = 3 * MAX_UNIFORMS;
}
 
/* Gen6 converts quads to polygon in beginning of 3D pipeline,
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/25] i965/gen7: Expose ARB_shader_image_load_store.

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index ab27d43..5145034 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -307,6 +307,8 @@ intelInitExtensions(struct gl_context *ctx)
 
ctx-Extensions.ANGLE_texture_compression_dxt = true;
 
-   if (brw-gen = 7)
+   if (brw-gen = 7) {
   ctx-Extensions.ARB_shader_atomic_counters = true;
+  ctx-Extensions.ARB_shader_image_load_store = true;
+   }
 }
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 23/25] i965/gen7: Implement glMemoryBarrier().

2013-12-02 Thread Francisco Jerez
---
 src/mesa/drivers/dri/i965/brw_program.c | 30 ++
 1 file changed, 30 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 094deeb..44af46f 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -41,6 +41,7 @@
 
 #include brw_context.h
 #include brw_wm.h
+#include intel_batchbuffer.h
 
 static unsigned
 get_new_program_id(struct intel_screen *screen)
@@ -188,6 +189,33 @@ brwProgramStringNotify(struct gl_context *ctx,
return true;
 }
 
+static void
+brwMemoryBarrier(struct gl_context *ctx, GLbitfield barriers)
+{
+   struct brw_context *brw = brw_context(ctx);
+   unsigned bits = (PIPE_CONTROL_DATA_CACHE_INVALIDATE |
+PIPE_CONTROL_NO_WRITE |
+PIPE_CONTROL_CS_STALL);
+
+   if (barriers  (GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT |
+   GL_ELEMENT_ARRAY_BARRIER_BIT |
+   GL_COMMAND_BARRIER_BIT))
+  bits |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
+
+   if (barriers  (GL_UNIFORM_BARRIER_BIT |
+   GL_TEXTURE_FETCH_BARRIER_BIT))
+  bits |= PIPE_CONTROL_TC_FLUSH;
+
+   if (barriers  GL_TEXTURE_UPDATE_BARRIER_BIT)
+  bits |= PIPE_CONTROL_WRITE_FLUSH;
+
+   if (barriers  GL_FRAMEBUFFER_BARRIER_BIT)
+  bits |= (PIPE_CONTROL_DEPTH_CACHE_FLUSH |
+   PIPE_CONTROL_WRITE_FLUSH);
+
+   intel_batchbuffer_emit_pipe_control(brw, bits);
+}
+
 void
 brw_add_texrect_params(struct gl_program *prog)
 {
@@ -248,6 +276,8 @@ void brwInitFragProgFuncs( struct dd_function_table 
*functions )
functions-NewShader = brw_new_shader;
functions-NewShaderProgram = brw_new_shader_program;
functions-LinkShader = brw_link_shader;
+
+   functions-MemoryBarrier = brwMemoryBarrier;
 }
 
 void
-- 
1.8.3.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72230] Unable to extract MesaLib-10.0.0.tar.{gz, bz2} with bsdtar

2013-12-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72230

--- Comment #1 from Emil Velikov emil.l.veli...@gmail.com ---
Created attachment 90116
  -- https://bugs.freedesktop.org/attachment.cgi?id=90116action=edit
include only one copy of VERSION in tarball

Seems like adding VERSION to EXTRA_FILES was an overkill.

Can you guys confirm that the patch fixes the problem? It seems to work fine
here.

Note: you'll have to re-generate the tarballs with make tarballs

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] automake: include only one copy of VERSION in tarball

2013-12-02 Thread Emil Velikov
The VERSION file is tracked by git (git ls-files), thus
adding it to EXTRA_FILES will result in a duplicate copy
within the final tarball.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72230
Cc: 10.0 mesa-sta...@lists.freedesktop.org
Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
---
 Makefile.am | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Makefile.am b/Makefile.am
index 361fb51..600adad 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -36,7 +36,6 @@ PACKAGE_NAME = MesaLib-$(PACKAGE_VERSION)
 EXTRA_FILES = \
aclocal.m4  \
configure   \
-   VERSION \
bin/ar-lib  \
bin/compile \
bin/config.sub  \
-- 
1.8.4.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72230] Unable to extract MesaLib-10.0.0.tar.{gz, bz2} with bsdtar

2013-12-02 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72230

--- Comment #2 from Patrick Steinhardt p...@pks.im ---
I can confirm the attached patch fixes the problem. Thanks.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Chris Forbes
+fs_reg::fs_reg(const backend_reg reg)
+{
+   init();
+   *static_castbackend_reg *(this) = reg;

This is a bit gross...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Francisco Jerez
Chris Forbes chr...@ijw.co.nz writes:

 +fs_reg::fs_reg(const backend_reg reg)
 +{
 +   init();
 +   *static_castbackend_reg *(this) = reg;

 This is a bit gross...

Would you prefer 'this-operator=(reg);'?

Meh... :)


pgphocK2Fll37.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Chris Forbes
Ideally, we wouldn't need init() and you could just have a
copy-constructor do the right thing, I think -- currently, all the
backend_reg fields get initialized 3 times:

- backend_reg ctor
- init() blasting everything to zero
- this assignment



On Tue, Dec 3, 2013 at 9:22 AM, Francisco Jerez curroje...@riseup.net wrote:
 Chris Forbes chr...@ijw.co.nz writes:

 +fs_reg::fs_reg(const backend_reg reg)
 +{
 +   init();
 +   *static_castbackend_reg *(this) = reg;

 This is a bit gross...

 Would you prefer 'this-operator=(reg);'?

 Meh... :)
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/23] i965: Define common register base class shared between both back-ends.

2013-12-02 Thread Francisco Jerez
Chris Forbes chr...@ijw.co.nz writes:

 Ideally, we wouldn't need init() and you could just have a
 copy-constructor do the right thing, I think -- currently, all the
 backend_reg fields get initialized 3 times:

 - backend_reg ctor
 - init() blasting everything to zero
 - this assignment

Yeah, I completely agree, but this is nothing new.  Changing that
probably belongs to a separate clean-up patch...



 On Tue, Dec 3, 2013 at 9:22 AM, Francisco Jerez curroje...@riseup.net wrote:
 Chris Forbes chr...@ijw.co.nz writes:

 +fs_reg::fs_reg(const backend_reg reg)
 +{
 +   init();
 +   *static_castbackend_reg *(this) = reg;

 This is a bit gross...

 Would you prefer 'this-operator=(reg);'?

 Meh... :)


pgpfPqIxbB0EH.pgp
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH v3] egl: add HAVE_LIBDRM define, fix EGL X11 platform

2013-12-02 Thread Chad Versace

On 12/01/2013 01:53 AM, Tapani Pälli wrote:

Commit a594cec broke EGL X11 backend by adding dependency between
X11 and DRM backends requiring HAVE_EGL_PLATFORM_DRM defined for X11.

This patch fixes the issue by adding additional define for libdrm
detection independent of which backend is being compiled. Tested by
compiling Mesa with '--with-egl-platforms=x11' and running es2gears_x11
+ glbenchmark2.7 successfully.

v2: return true for dri2_auth if running without libdrm (Samuel)
v3: check libdrm when building EGL drm platform + AM_CFLAGS fix (Emil)

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72062
Signed-off-by: Tapani Pälli tapani.pa...@intel.com
Reviewed-by: Emil Velikov emil.l.veli...@gmail.com
Cc: Samuel Thibault samuel.thiba...@ens-lyon.org
Cc: mesa-sta...@lists.freedesktop.org



I tested this patch with --with-egl-platforms=x11, and it works
for me.

But, why return true from dri2_authenticate() when building
without libdrm? I expected dri2_authenticate() to return false
in that case, because the function never authenticates with the
Xserver without drmGetMagic().





---
  configure.ac| 5 +
  src/egl/drivers/dri2/platform_x11.c | 9 +++--
  2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/configure.ac b/configure.ac
index 8c52535..91a5817 100644
--- a/configure.ac
+++ b/configure.ac
@@ -761,6 +761,9 @@ AC_SUBST([MESA_LLVM])
  # Check for libdrm
  PKG_CHECK_MODULES([LIBDRM], [libdrm = $LIBDRM_REQUIRED],
[have_libdrm=yes], [have_libdrm=no])
+if test x$have_libdrm = xyes; then
+   DEFINES=$DEFINES -DHAVE_LIBDRM
+fi

  PKG_CHECK_MODULES([LIBUDEV], [libudev = $LIBUDEV_REQUIRED],
have_libudev=yes, have_libudev=no)
@@ -1439,6 +1442,8 @@ for plat in $egl_platforms; do
drm)
test x$enable_gbm = xno 
AC_MSG_ERROR([EGL platform drm needs gbm])
+   test x$have_libdrm != xyes 
+   AC_MSG_ERROR([EGL platform drm requires libdrm = 
$LIBDRM_REQUIRED])
;;

android|fbdev|gdi|null)
diff --git a/src/egl/drivers/dri2/platform_x11.c 
b/src/egl/drivers/dri2/platform_x11.c
index c56a413..04cb62b 100644
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -33,7 +33,7 @@
  #include fcntl.h
  #include errno.h
  #include unistd.h
-#ifdef HAVE_DRM_PLATFORM
+#ifdef HAVE_LIBDRM
  #include xf86drm.h
  #endif
  #include sys/types.h
@@ -608,7 +608,7 @@ dri2_x11_authenticate(_EGLDisplay *disp, uint32_t id)
  static EGLBoolean
  dri2_authenticate(_EGLDisplay *disp)
  {
-#ifdef HAVE_DRM_PLATFORM
+#ifdef HAVE_LIBDRM
 struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
 drm_magic_t magic;

@@ -621,11 +621,8 @@ dri2_authenticate(_EGLDisplay *disp)
_eglLog(_EGL_WARNING, DRI2: failed to authenticate);
return EGL_FALSE;
 }
-
-   return EGL_TRUE;
-#else
-   return EGL_FALSE;
  #endif
+   return EGL_TRUE;
  }

  static EGLBoolean



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 9/9] i965: Print conditional mod in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f10f5c0..f3b121a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2831,7 +2831,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst)
if (inst-saturate)
   printf(.sat);
if (inst-conditional_mod) {
-  printf(.cmod);
+  printf(%s, conditional_modifier[inst-conditional_mod]);
   if (!inst-predicate 
   (brw-gen  5 || (inst-opcode != BRW_OPCODE_SEL 
   inst-opcode != BRW_OPCODE_IF 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 06ae052..962b4cf 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1109,7 +1109,11 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
 {
vec4_instruction *inst = (vec4_instruction *)be_inst;
 
-   printf(%s , brw_instruction_name(inst-opcode));
+   printf(%s, brw_instruction_name(inst-opcode));
+   if (inst-conditional_mod) {
+  printf(%s, conditional_modifier[inst-conditional_mod]);
+   }
+   printf( );
 
switch (inst-dst.file) {
case GRF:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/9] i965: Externalize reg_encoding for use in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_context.h | 1 +
 src/mesa/drivers/dri/i965/brw_disasm.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 97f8906..0a45f05 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1827,6 +1827,7 @@ struct opcode_desc {
 };
 
 extern const struct opcode_desc opcode_descs[128];
+extern const char * const reg_encoding[8];
 
 void
 brw_emit_depthbuffer(struct brw_context *brw);
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index 128e717..4de8485 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -244,7 +244,7 @@ static const char * const access_mode[2] = {
 [1] = align16,
 };
 
-static const char * const reg_encoding[8] = {
+const char * const reg_encoding[8] = {
 [0] = UD,
 [1] = D,
 [2] = UW,
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/9] i965/vec4: Print negate and absolute value for src args.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 551f0a2..71f5307 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1166,6 +1166,10 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
printf(, );
 
for (int i = 0; i  3  inst-src[i].file != BAD_FILE; i++) {
+  if (inst-src[i].negate)
+ printf(-);
+  if (inst-src[i].abs)
+ printf(|);
   switch (inst-src[i].file) {
   case GRF:
  printf(vgrf%d, inst-src[i].reg);
@@ -1242,6 +1246,9 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
  printf(%s, chans[BRW_GET_SWZ(inst-src[i].swizzle, c)]);
   }
 
+  if (inst-src[i].abs)
+ printf(|);
+
   if (i  2  inst-src[i + 1].file != BAD_FILE)
  printf(, );
}
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/9] i965/fs: Print ARF registers properly in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 48 ++--
 1 file changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index b59ab82..c21239f 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2858,7 +2858,29 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst)
   printf(***u%d***, inst-dst.reg);
   break;
case HW_REG:
-  printf(hw_reg%d, inst-dst.fixed_hw_reg.nr);
+  if (inst-dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (inst-dst.fixed_hw_reg.nr) {
+ case BRW_ARF_NULL:
+printf(null);
+break;
+ case BRW_ARF_ADDRESS:
+printf(a0.%d, inst-dst.fixed_hw_reg.subnr);
+break;
+ case BRW_ARF_ACCUMULATOR:
+printf(acc%d, inst-dst.fixed_hw_reg.subnr);
+break;
+ case BRW_ARF_FLAG:
+printf(f%d.%d, inst-dst.fixed_hw_reg.nr  0xf,
+ inst-dst.fixed_hw_reg.subnr);
+break;
+ default:
+printf(arf%d.%d, inst-dst.fixed_hw_reg.nr  0xf,
+   inst-dst.fixed_hw_reg.subnr);
+break;
+ }
+  } else {
+ printf(hw_reg%d, inst-dst.fixed_hw_reg.nr);
+  }
   if (inst-dst.fixed_hw_reg.subnr)
  printf(+%d, inst-dst.fixed_hw_reg.subnr);
   break;
@@ -2911,7 +2933,29 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst)
 printf(-);
  if (inst-src[i].fixed_hw_reg.abs)
 printf(|);
- printf(hw_reg%d, inst-src[i].fixed_hw_reg.nr);
+ if (inst-src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) 
{
+switch (inst-src[i].fixed_hw_reg.nr) {
+case BRW_ARF_NULL:
+   printf(null);
+   break;
+case BRW_ARF_ADDRESS:
+   printf(a0.%d, inst-src[i].fixed_hw_reg.subnr);
+   break;
+case BRW_ARF_ACCUMULATOR:
+   printf(acc%d, inst-src[i].fixed_hw_reg.subnr);
+   break;
+case BRW_ARF_FLAG:
+   printf(f%d.%d, inst-src[i].fixed_hw_reg.nr  0xf,
+inst-src[i].fixed_hw_reg.subnr);
+   break;
+default:
+   printf(arf%d.%d, inst-src[i].fixed_hw_reg.nr  0xf,
+  inst-src[i].fixed_hw_reg.subnr);
+   break;
+}
+ } else {
+printf(hw_reg%d, inst-src[i].fixed_hw_reg.nr);
+ }
  if (inst-src[i].fixed_hw_reg.subnr)
 printf(+%d, inst-src[i].fixed_hw_reg.subnr);
  if (inst-src[i].fixed_hw_reg.abs)
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/9] i965/vec4: Don't print swizzles for immediate values.

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 71f5307..13f69f1 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1240,10 +1240,12 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
   if (inst-src[i].reg_offset)
  printf(.%d, inst-src[i].reg_offset);
 
-  static const char *chans[4] = {x, y, z, w};
-  printf(.);
-  for (int c = 0; c  4; c++) {
- printf(%s, chans[BRW_GET_SWZ(inst-src[i].swizzle, c)]);
+  if (inst-src[i].file != IMM) {
+ static const char *chans[4] = {x, y, z, w};
+ printf(.);
+ for (int c = 0; c  4; c++) {
+printf(%s, chans[BRW_GET_SWZ(inst-src[i].swizzle, c)]);
+ }
   }
 
   if (inst-src[i].abs)
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/9] i965: Externalize conditional_modifier for use in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_context.h | 1 +
 src/mesa/drivers/dri/i965/brw_disasm.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 0a45f05..d25e145 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1827,6 +1827,7 @@ struct opcode_desc {
 };
 
 extern const struct opcode_desc opcode_descs[128];
+extern const char * const conditional_modifier[16];
 extern const char * const reg_encoding[8];
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_disasm.c 
b/src/mesa/drivers/dri/i965/brw_disasm.c
index 4de8485..5f6cd40 100644
--- a/src/mesa/drivers/dri/i965/brw_disasm.c
+++ b/src/mesa/drivers/dri/i965/brw_disasm.c
@@ -101,7 +101,7 @@ const struct opcode_desc opcode_descs[128] = {
 };
 static const struct opcode_desc *opcode = opcode_descs;
 
-static const char * const conditional_modifier[16] = {
+const char * const conditional_modifier[16] = {
 [BRW_CONDITIONAL_NONE] = ,
 [BRW_CONDITIONAL_Z] = .e,
 [BRW_CONDITIONAL_NZ] = .ne,
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/9] i965: Print argument types in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_fs.cpp   | 6 +-
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 6 +-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c21239f..f10f5c0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2888,7 +2888,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst)
   printf(???);
   break;
}
-   printf(, );
+   printf(:%s, , reg_encoding[inst-dst.type]);
 
for (int i = 0; i  3  inst-src[i].file != BAD_FILE; i++) {
   if (inst-src[i].negate)
@@ -2968,6 +2968,10 @@ fs_visitor::dump_instruction(backend_instruction 
*be_inst)
   if (inst-src[i].abs)
  printf(|);
 
+  if (inst-src[i].file != IMM) {
+ printf(:%s, reg_encoding[inst-src[i].type]);
+  }
+
   if (i  2  inst-src[i + 1].file != BAD_FILE)
  printf(, );
}
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index 13f69f1..06ae052 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1163,7 +1163,7 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
   if (inst-dst.writemask  8)
  printf(w);
}
-   printf(, );
+   printf(:%s, , reg_encoding[inst-dst.type]);
 
for (int i = 0; i  3  inst-src[i].file != BAD_FILE; i++) {
   if (inst-src[i].negate)
@@ -1251,6 +1251,10 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
   if (inst-src[i].abs)
  printf(|);
 
+  if (inst-src[i].file != IMM) {
+ printf(:%s, reg_encoding[inst-src[i].type]);
+  }
+
   if (i  2  inst-src[i + 1].file != BAD_FILE)
  printf(, );
}
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/9] i965/vec4: Add support for printing HW_REGs in dump_instruction().

2013-12-02 Thread Matt Turner
---
 src/mesa/drivers/dri/i965/brw_vec4.cpp | 60 ++
 1 file changed, 60 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index fed6e61..551f0a2 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1118,6 +1118,33 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
case MRF:
   printf(m%d, inst-dst.reg);
   break;
+   case HW_REG:
+  if (inst-dst.fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) {
+ switch (inst-dst.fixed_hw_reg.nr) {
+ case BRW_ARF_NULL:
+printf(null);
+break;
+ case BRW_ARF_ADDRESS:
+printf(a0.%d, inst-dst.fixed_hw_reg.subnr);
+break;
+ case BRW_ARF_ACCUMULATOR:
+printf(acc%d, inst-dst.fixed_hw_reg.subnr);
+break;
+ case BRW_ARF_FLAG:
+printf(f%d.%d, inst-dst.fixed_hw_reg.nr  0xf,
+ inst-dst.fixed_hw_reg.subnr);
+break;
+ default:
+printf(arf%d.%d, inst-dst.fixed_hw_reg.nr  0xf,
+   inst-dst.fixed_hw_reg.subnr);
+break;
+ }
+  } else {
+ printf(hw_reg%d, inst-dst.fixed_hw_reg.nr);
+  }
+  if (inst-dst.fixed_hw_reg.subnr)
+ printf(+%d, inst-dst.fixed_hw_reg.subnr);
+  break;
case BAD_FILE:
   printf((null));
   break;
@@ -1165,6 +1192,39 @@ vec4_visitor::dump_instruction(backend_instruction 
*be_inst)
 break;
  }
  break;
+  case HW_REG:
+ if (inst-src[i].fixed_hw_reg.negate)
+printf(-);
+ if (inst-src[i].fixed_hw_reg.abs)
+printf(|);
+ if (inst-src[i].fixed_hw_reg.file == BRW_ARCHITECTURE_REGISTER_FILE) 
{
+switch (inst-src[i].fixed_hw_reg.nr) {
+case BRW_ARF_NULL:
+   printf(null);
+   break;
+case BRW_ARF_ADDRESS:
+   printf(a0.%d, inst-src[i].fixed_hw_reg.subnr);
+   break;
+case BRW_ARF_ACCUMULATOR:
+   printf(acc%d, inst-src[i].fixed_hw_reg.subnr);
+   break;
+case BRW_ARF_FLAG:
+   printf(f%d.%d, inst-src[i].fixed_hw_reg.nr  0xf,
+inst-src[i].fixed_hw_reg.subnr);
+   break;
+default:
+   printf(arf%d.%d, inst-src[i].fixed_hw_reg.nr  0xf,
+  inst-src[i].fixed_hw_reg.subnr);
+   break;
+}
+ } else {
+printf(hw_reg%d, inst-src[i].fixed_hw_reg.nr);
+ }
+ if (inst-src[i].fixed_hw_reg.subnr)
+printf(+%d, inst-src[i].fixed_hw_reg.subnr);
+ if (inst-src[i].fixed_hw_reg.abs)
+printf(|);
+ break;
   case BAD_FILE:
  printf((null));
  break;
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] automake: include only one copy of VERSION in tarball

2013-12-02 Thread Matt Turner
On Mon, Dec 2, 2013 at 11:53 AM, Emil Velikov emil.l.veli...@gmail.com wrote:
 The VERSION file is tracked by git (git ls-files), thus
 adding it to EXTRA_FILES will result in a duplicate copy
 within the final tarball.

Reviewed-by: Matt Turner matts...@gmail.com

Ugh. :(
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/15] i965/cfg: Rework to make IF ELSE blocks flow into ENDIF.

2013-12-02 Thread Matt Turner
And since this change makes ENDIF start blocks, I'll squash this
change in as well to make sure dead_control_flow_eliminate continues
working:

diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
index ad8ed82..63a3e5b 100644
--- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
+++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp
@@ -45,10 +45,10 @@ dead_control_flow_eliminate(backend_visitor *v)
   bblock_t *block = cfg.blocks[b];
   bool found = false;

-  /* ENDIF instructions, by definition, can only be found at the ends of
+  /* ENDIF instructions, by definition, can only be found at the start of
* basic blocks.
*/
-  backend_instruction *endif_inst = block-end;
+  backend_instruction *endif_inst = block-start;
   if (endif_inst-opcode != BRW_OPCODE_ENDIF)
  continue;
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Emit better code for ir_unop_sign.

2013-12-02 Thread Matt Turner
total instructions in shared programs: 1519751 - 1519442 (-0.02%)
instructions in affected programs: 10154 - 9845 (-3.04%)
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp   | 23 +++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 23 ---
 2 files changed, 31 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 9eb9a9d..a0e803b 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -382,18 +382,25 @@ fs_visitor::visit(ir_expression *ir)
   emit(MOV(this-result, op[0]));
   break;
case ir_unop_sign:
-  temp = fs_reg(this, ir-type);
+  if (ir-type-is_float()) {
+ emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
 
-  emit(MOV(this-result, fs_reg(0.0f)));
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ this-result.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(this-result, op[0], fs_reg(0x8000u)));
 
-  emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
-  inst = emit(MOV(this-result, fs_reg(1.0f)));
-  inst-predicate = BRW_PREDICATE_NORMAL;
+ inst = emit(OR(this-result, this-result, fs_reg(0x3f80u)));
+ inst-predicate = BRW_PREDICATE_NORMAL;
 
-  emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
-  inst = emit(MOV(this-result, fs_reg(-1.0f)));
-  inst-predicate = BRW_PREDICATE_NORMAL;
+ this-result.type = BRW_REGISTER_TYPE_F;
+  } else {
+ emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_G));
 
+ emit(ASR(this-result, op[0], fs_reg(31)));
+
+ inst = emit(OR(this-result, this-result, fs_reg(1)));
+ inst-predicate = BRW_PREDICATE_NORMAL;
+  }
   break;
case ir_unop_rcp:
   emit_math(SHADER_OPCODE_RCP, this-result, op[0]);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index a13eafb..986663b 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1259,16 +1259,25 @@ vec4_visitor::visit(ir_expression *ir)
   break;
 
case ir_unop_sign:
-  emit(MOV(result_dst, src_reg(0.0f)));
+  if (ir-type-is_float()) {
+ emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
 
-  emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_G));
-  inst = emit(MOV(result_dst, src_reg(1.0f)));
-  inst-predicate = BRW_PREDICATE_NORMAL;
+ op[0].type = BRW_REGISTER_TYPE_UD;
+ result_dst.type = BRW_REGISTER_TYPE_UD;
+ emit(AND(result_dst, op[0], src_reg(0x8000u)));
 
-  emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_L));
-  inst = emit(MOV(result_dst, src_reg(-1.0f)));
-  inst-predicate = BRW_PREDICATE_NORMAL;
+ inst = emit(OR(result_dst, src_reg(result_dst), 
src_reg(0x3f80u)));
+ inst-predicate = BRW_PREDICATE_NORMAL;
+
+ this-result.type = BRW_REGISTER_TYPE_F;
+  } else {
+ emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
+
+ emit(ASR(result_dst, op[0], src_reg(31)));
 
+ inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
+ inst-predicate = BRW_PREDICATE_NORMAL;
+  }
   break;
 
case ir_unop_rcp:
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >