[Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Chad Versace
We need to emit depth stall flushes before depth and hiz resolves.
Placing them at the top of blorp's state emission fixes the hang.

Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
Tested by zooming in and out continuously for 2 hours.

This patch is based on
https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd

CC: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
Signed-off-by: Stéphane Marchesin marc...@chromium.org
Signed-off-by: Chad Versace chad.vers...@linux.intel.com
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
 1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 6a5841f..3a0e7ec 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
ADVANCE_BATCH();
 }
 
+static void
+gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op hiz_op)
+{
+   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
+   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {
+  brw-batch.need_workaround_flush = true;
+  intel_emit_post_sync_nonzero_flush(brw);
+  intel_emit_depth_stall_flushes(brw);
+   }
+}
 
 /**
  * \brief Execute a blit or render pass operation.
@@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
uint32_t wm_bind_bo_offset = 0;
 
uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
+
+   gen6_emit_hiz_workaround(brw, params-hiz_op);
gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
gen6_emit_3dstate_sample_mask(brw,
  params-dst.num_samples  1 ?
-- 
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/42] i965/blorp: unit test compiling msaa-8 cms to cms

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 62 ++
 1 file changed, 62 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 29f815c..5d49ace 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -375,6 +375,67 @@ test_gen7_msaa_8_ums_to_cms(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+static bool
+test_gen7_msaa_8_cms_to_cms(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: and(1)  g541UDg00,1,0UD 
0x00c0UD{ align1 WE_normal };\n
+  0x0050: shr(1)  g541UDg540,1,0UD
0x0005UD{ align1 WE_normal };\n
+  0x0060: mov(16) g561UW0x3210V 
{ align1 WE_normal 1H };\n
+  0x0070: add(16) g521UDg540,1,0UW
g561,4,0UW{ align1 WE_normal 1H };\n
+  0x0080: add(8)  g531UDg540,1,0UW
g56.21,4,0UW  { align1 WE_normal 1Q };\n
+  0x0090: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x00b0: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x00c0: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x00d0: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x00e0: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00f0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1141UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) g1161UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0130: send(16)g361UWg1148,8,1F\n
+  sampler (0, 0, 29, 2) mlen 4 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0140: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0150: mov(16) g1161UD   g368,8,1UD
{ align1 WE_normal 1H };\n
+  0x0160: mov(16) g1181UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0170: mov(16) g1201UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0180: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 30, 2) mlen 8 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0190: mov(16) g1141Fg48,8,1F  
{ align1 WE_normal 1H };\n
+  0x01a0: mov(16) g1161Fg68,8,1F  
{ align1 WE_normal 1H };\n
+  0x01b0: mov(16) g1181Fg88,8,1F  
{ align1 WE_normal 1H };\n
+  0x01c0: mov(16) g1201Fg108,8,1F 
{ align1 WE_normal 1H };\n
+  0x01d0: sendc(16)   nullg1148,8,1F\n
+  render ( RT write, 1, 0, 12) mlen 8 rlen 0  { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 8;
+   key.tex_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.src_samples = 8;
+   key.src_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.rt_samples = 8;
+   key.rt_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.dst_samples = 8;
+   key.dst_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = true;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 4.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -386,6 +447,7 @@ main(int argc, char **argv)
 
pass = test_gen7_blend_scaled_msaa_8(brw)  pass;
pass = test_gen7_msaa_8_ums_to_cms(brw)  pass;
+   pass = test_gen7_msaa_8_cms_to_cms(brw)  pass;
 
   

[Mesa-dev] Blorp blit compiler to use FS LIR

2013-12-20 Thread Topi Pohjolainen
This is a rather long series but I suspect it will be eventually
squashed into fewer patches. The motivation for the work is to
prepare for gen8 blorp support.

Currently the compiler for the blorp blit programs is written to
emit EU-level instructions directly. This is not a big deal at
the time being as blorp is supported only from gen6 onwards meaning
that there are not that many hw-specific choices to be taken. But
as one moves on to gen8 that will not be the case anymore.
The plan is to lift the core of the blit compiler one level higher,
i.e., to emit FS LIR which can be subsequently fed into gen-specific
generator.

This series aims to keep the generated EU-stream exactly the same
as before. In order to increase the confidence that this is really
the case I wrote a series of unit tests. I identified some 20 odd
individual code blocks in the compiler that emit instructions for
some special purpose. Then I started running piglit tests and
hand-picked some dozen cases that together exercised each of the code
blocks in question at least once. I dumped the key used to drive the
compiler along with the original assembly in human readbale format.
Then I simply placed each such pair as its own unit test case.
I had to modify the existing assembly dumping infra a bit in order to
have it in designated files instead of just stdout. Some care had to
be taken to take the dump in correct phase - after jump instruction
patching but before instruction stream compression. I suppose it
could have been done after the compression as well but I thought
safer to do it just after the generator phase instead.

I also spent some time understanding how the execution size is
controlled in the compiler (also referred to as compression control).
I grouped instructions needing special care into their own
subroutines which I'm hoping to explain better what is going on when
one switches from direct control to higher level.

Similar treatment was required for combining comparisons and for
conditional assignments (predicate control).

Message sending itself (texture fetch, render target write) is going
to be different in gen8 but the message contents look to be more or
less the same. Hence I chose to keep the message building in place
and let the generators to handle only the issuing of the send
command itself.

Finally about the split in general. There is clearly the possibility
of doing all the logic replacement in the compiler core itself
instead of the split. However, that will need to be done more or less
in one go and I thought that the split makes it clearer at least for
initial review what sort of things are involved making it easier to
discuss about the changes.

I'll run tests on SNB also once I get my hands on one.

Topi Pohjolainen (42):
  i965/fs: generate fs programs also without any 8-width instructions
  i965/fs: allow fs-generator use without gl_fragment_program
  i965: dump the disassembly to the given file
  i965/blorp: allow unit tests to compile and dump assembly
  i965/blorp: unit test compiling blend and scaled
  i965/blorp: unit test compiling msaa-8 ums to cms
  i965/blorp: unit test compiling msaa-8 cms to cms
  i965/blorp: unit test compiling msaa-4 ums to cms
  i965/blorp: unit test compiling msaa-8 cms alpha blend
  i965/blorp: unit test compiling unaligned msaa-8
  i965/blorp: unit test compiling simple zero-src sampled
  i965/blorp: unit test compiling bilinear filtered
  i965/blorp: unit test compiling gen6 msaa-8 cms alpha blend
  i965/blorp: unit test compiling simple gen6 zero-src sampled
  i965/blorp: unit test compiling integer typed texture fetches
  i965/blorp: remove dependency to compression control state
  i965/blorp: reduce the scope of the explicit compression control
  i965/blorp: introduce separate eu-emitter for blit compiler
  i965/blorp: move emission of pixel kill into eu-emitter
  i965: rename tex_ms to tex_cms
  i965/fs: introduce non-compressed equivalent of tex_cms
  i965/blorp: move emission of texture lookup into eu-emitter
  i965/blorp: move emission of rt-write into eu-emitter
  i965/blorp: move emission of sample combining into eu-emitter
  i965/blorp: wrap emission of conditional assignment
  i965/blorp: wrap emission of if-equal-assignment
  i965/blorp: wrap LRP
  i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/)
  i965/blorp: wrap AND (/brw_AND(func, /emit_and(/)
  i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/)
  i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/)
  i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/)
  i965/blorp: wrap OR (/brw_OR(func, /emit_or(/)
  i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/)
  i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/)
  i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/)
  i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter
  i965/fs: allow unit tests to dump the final patched assembly
  i965/fs: introduce blorp specific rt-write for fs_generator
  i965/fs: add support for BRW_OPCODE_AVG in fs_generator
  i965/eu: 

[Mesa-dev] [PATCH 06/42] i965/blorp: unit test compiling msaa-8 ums to cms

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 61 ++
 1 file changed, 61 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index d48c3db..29f815c 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -315,6 +315,66 @@ test_gen7_blend_scaled_msaa_8(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ * One of the flavours gotten when running piglit test:
+ * ext_framebuffer_multisample-blit-scaled 8
+ */
+static bool
+test_gen7_msaa_8_ums_to_cms(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: and(1)  g541UDg00,1,0UD 
0x00c0UD{ align1 WE_normal };\n
+  0x0050: shr(1)  g541UDg540,1,0UD
0x0005UD{ align1 WE_normal };\n
+  0x0060: mov(16) g561UW0x3210V 
{ align1 WE_normal 1H };\n
+  0x0070: add(16) g521UDg540,1,0UW
g561,4,0UW{ align1 WE_normal 1H };\n
+  0x0080: add(8)  g531UDg540,1,0UW
g56.21,4,0UW  { align1 WE_normal 1Q };\n
+  0x0090: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x00b0: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x00c0: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x00d0: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x00e0: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00f0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0130: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0140: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0150: mov(16) g1141Fg48,8,1F  
{ align1 WE_normal 1H };\n
+  0x0160: mov(16) g1161Fg68,8,1F  
{ align1 WE_normal 1H };\n
+  0x0170: mov(16) g1181Fg88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0180: mov(16) g1201Fg108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0190: sendc(16)   nullg1148,8,1F\n
+  render ( RT write, 1, 0, 12) mlen 8 rlen 0  { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 8;
+   key.tex_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.src_samples = 8;
+   key.src_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.rt_samples = 8;
+   key.rt_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.dst_samples = 8;
+   key.dst_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = true;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 4.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -325,6 +385,7 @@ main(int argc, char **argv)
brw.gen = 7;
 
pass = test_gen7_blend_scaled_msaa_8(brw)  pass;
+   pass = test_gen7_msaa_8_ums_to_cms(brw)  pass;
 
/* Test suite expects zero for success */
return !pass;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/42] i965/blorp: unit test compiling simple gen6 zero-src sampled

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 51 ++
 1 file changed, 51 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index ec590ed..1545148 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -929,6 +929,56 @@ test_gen6_alpha_blend(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+static bool
+test_gen6_simple_src_samples_zero(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: mov(16) m21UD g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x00d0: mov(16) m41UD g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x00e0: send(16)g41UW m28,8,1F\n
+  sampler (0, 0, 7, 2) mlen 4 rlen 8  { 
align1 WE_normal 1H };\n
+  0x00f0: mov(16) m21F  g48,8,1F  
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) m41F  g68,8,1F  
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) m61F  g88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) m81F  g108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0130: sendc(16)   nullm28,8,1F\n
+  render ( RT write, 1, 0, 12, 0) mlen 8 rlen 0   { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 0;
+   key.tex_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.src_samples = 0;
+   key.src_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.rt_samples = 0;
+   key.rt_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.dst_samples = 0;
+   key.dst_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = false;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 0.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -949,6 +999,7 @@ main(int argc, char **argv)
 
brw.gen = 6;
pass = test_gen6_alpha_blend(brw)  pass;
+   pass = test_gen6_simple_src_samples_zero(brw)  pass;
 
/* Test suite expects zero for success */
return !pass;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/42] i965/blorp: unit test compiling gen6 msaa-8 cms alpha blend

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 57 ++
 1 file changed, 57 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index fb9a60a..ec590ed 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -875,6 +875,60 @@ test_gen7_bilinear(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+static bool
+test_gen6_alpha_blend(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: shl(16) g541UDg448,8,1UD1W  
{ align1 WE_normal 1H };\n
+  0x00d0: shl(16) g561UDg468,8,1UD1W  
{ align1 WE_normal 1H };\n
+  0x00e0: add(16) g481UDg548,8,1UD1W  
{ align1 WE_normal 1H };\n
+  0x00f0: add(16) g501UDg568,8,1UD1W  
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) m21F  g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) m41F  g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: send(16)g41UW m28,8,1F\n
+  sampler (0, 0, 0, 2) mlen 4 rlen 8  { 
align1 WE_normal 1H };\n
+  0x0130: mov(16) m21F  g48,8,1F  
{ align1 WE_normal 1H };\n
+  0x0140: mov(16) m41F  g68,8,1F  
{ align1 WE_normal 1H };\n
+  0x0150: mov(16) m61F  g88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0160: mov(16) m81F  g108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0170: sendc(16)   nullm28,8,1F\n
+  render ( RT write, 1, 0, 12, 0) mlen 8 rlen 0   { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 8;
+   key.tex_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.src_samples = 8;
+   key.src_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.rt_samples = 0;
+   key.rt_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.dst_samples = 0;
+   key.dst_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = true;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = false;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 4.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -893,6 +947,9 @@ main(int argc, char **argv)
pass = test_gen7_simple_src_samples_zero(brw)  pass;
pass = test_gen7_bilinear(brw)  pass;
 
+   brw.gen = 6;
+   pass = test_gen6_alpha_blend(brw)  pass;
+
/* Test suite expects zero for success */
return !pass;
 }
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 09/42] i965/blorp: unit test compiling msaa-8 cms alpha blend

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 145 +
 1 file changed, 145 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index fa6b53d..9692ce4 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -494,6 +494,150 @@ test_gen7_msaa_4_ums_to_cms(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ * Gotten when running piglit test:
+ * ext_framebuffer_multisample-alpha-blending
+ */
+static bool
+test_gen7_alpha_blend(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: mov(16) g1141UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x00d0: mov(16) g1161UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x00e0: send(16)g361UWg1148,8,1F\n
+  sampler (0, 0, 29, 2) mlen 4 rlen 8 { 
align1 WE_normal 1H };\n
+  0x00f0: mov(16) g1141UD   0xUD
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) g1161UD   g368,8,1UD
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1181UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) g1201UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0130: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 30, 2) mlen 8 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0140: cmp.ne.f0(16)   nullg368,8,1UD
0xUD{ align1 WE_normal 1H switch };\n
+  0x0150: (+f0) if(16) 150 150null
0x00960096UD{ align1 WE_normal 1H switch };\n
+  0x0160: mov(16) g521UD0x0001UD
{ align1 WE_normal 1H };\n
+  0x0170: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0180: mov(16) g1161UD   g368,8,1UD
{ align1 WE_normal 1H };\n
+  0x0190: mov(16) g1181UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x01a0: mov(16) g1201UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x01b0: send(16)g121UWg1148,8,1F\n
+  sampler (0, 0, 30, 2) mlen 8 rlen 8 { 
align1 WE_normal 1H };\n
+  0x01c0: add(16) g41F  g48,8,1F  g128,8,1F 
{ align1 WE_normal 1H };\n
+  0x01d0: add(16) g61F  g68,8,1F  g148,8,1F 
{ align1 WE_normal 1H };\n
+  0x01e0: add(16) g81F  g88,8,1F  g168,8,1F 
{ align1 WE_normal 1H };\n
+  0x01f0: add(16) g101F g108,8,1F g188,8,1F 
{ align1 WE_normal 1H };\n
+  0x0200: mov(16) g521UD0x0002UD
{ align1 WE_normal 1H };\n
+  0x0210: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0220: mov(16) g1161UD   g368,8,1UD
{ align1 WE_normal 1H };\n
+  0x0230: mov(16) g1181UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0240: mov(16) g1201UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0250: send(16)g121UWg1148,8,1F\n
+  

[Mesa-dev] [PATCH 20/42] i965: rename tex_ms to tex_cms

2013-12-20 Thread Topi Pohjolainen
Prepares for the introduction of non-compressed multi-sampled
lookup used in the blorp programs.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h  | 2 +-
 src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp   | 4 ++--
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 4 ++--
 src/mesa/drivers/dri/i965/brw_shader.cpp | 6 +++---
 src/mesa/drivers/dri/i965/brw_vec4.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++--
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   | 2 +-
 8 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 2121013..ecfcf72 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -769,7 +769,7 @@ enum opcode {
SHADER_OPCODE_TXL,
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
-   SHADER_OPCODE_TXF_MS,
+   SHADER_OPCODE_TXF_CMS,
SHADER_OPCODE_TXF_MCS,
SHADER_OPCODE_LOD,
SHADER_OPCODE_TG4,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index baf9220..b18a0a4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -766,7 +766,7 @@ fs_visitor::implied_mrf_writes(fs_inst *inst)
case FS_OPCODE_TXB:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
-   case SHADER_OPCODE_TXF_MS:
+   case SHADER_OPCODE_TXF_CMS:
case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2bbf687..82d7255 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -425,7 +425,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
   case SHADER_OPCODE_TXF:
 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
 break;
-  case SHADER_OPCODE_TXF_MS:
+  case SHADER_OPCODE_TXF_CMS:
  if (brw-gen = 7)
 msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
  else
@@ -1657,7 +1657,7 @@ fs_generator::generate_code(exec_list *instructions)
   case FS_OPCODE_TXB:
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
-  case SHADER_OPCODE_TXF_MS:
+  case SHADER_OPCODE_TXF_CMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8e516a5..be1b583 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1214,7 +1214,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
   /* sample index */
   emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), 
sample_index));
   mlen += reg_width;
-  inst = emit(SHADER_OPCODE_TXF_MS, dst);
+  inst = emit(SHADER_OPCODE_TXF_CMS, dst);
   break;
case ir_lod:
   inst = emit(SHADER_OPCODE_LOD, dst);
@@ -1404,7 +1404,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, 
fs_reg coordinate,
case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
-   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst, payload); break;
+   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break;
case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6f0ace8..4eac3aa 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -447,8 +447,8 @@ brw_instruction_name(enum opcode op)
   return txs;
case FS_OPCODE_TXB:
   return txb;
-   case SHADER_OPCODE_TXF_MS:
-  return txf_ms;
+   case SHADER_OPCODE_TXF_CMS:
+  return txf_cms;
case SHADER_OPCODE_TXF_MCS:
   return txf_mcs;
case SHADER_OPCODE_TG4:
@@ -545,7 +545,7 @@ backend_instruction::is_tex()
opcode == FS_OPCODE_TXB ||
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
-   opcode == SHADER_OPCODE_TXF_MS ||
+   opcode == SHADER_OPCODE_TXF_CMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode == SHADER_OPCODE_TXL ||
opcode == SHADER_OPCODE_TXS ||
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp 
b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index fb57707..5eda0d7 100644
--- 

[Mesa-dev] [PATCH 12/42] i965/blorp: unit test compiling bilinear filtered

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 49 ++
 1 file changed, 49 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index e87ad60..fb9a60a 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -827,6 +827,54 @@ test_gen7_simple_src_samples_zero(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+static bool
+test_gen7_bilinear(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g1141Fg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g1161Fg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 0, 2) mlen 4 rlen 8  { 
align1 WE_normal 1H };\n
+  0x00d0: mov(16) g1141Fg48,8,1F  
{ align1 WE_normal 1H };\n
+  0x00e0: mov(16) g1161Fg68,8,1F  
{ align1 WE_normal 1H };\n
+  0x00f0: mov(16) g1181Fg88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) g1201Fg108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0110: sendc(16)   nullg1148,8,1F\n
+  render ( RT write, 1, 0, 12) mlen 8 rlen 0  { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 0;
+   key.tex_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.src_samples = 0;
+   key.src_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.rt_samples = 0;
+   key.rt_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.dst_samples = 0;
+   key.dst_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = false;
+   key.blit_scaled = true;
+   key.x_scale = 2.00;
+   key.y_scale = 0.00;
+   key.bilinear_filter = true;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -843,6 +891,7 @@ main(int argc, char **argv)
pass = test_gen7_alpha_blend(brw)  pass;
pass = test_gen7_unaligned_8_msaa(brw)  pass;
pass = test_gen7_simple_src_samples_zero(brw)  pass;
+   pass = test_gen7_bilinear(brw)  pass;
 
/* Test suite expects zero for success */
return !pass;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/42] i965/fs: allow fs-generator use without gl_fragment_program

2013-12-20 Thread Topi Pohjolainen
Prepares the generator to accept hand-crafted blorp programs.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 2381fb0..2bbf687 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -116,7 +116,7 @@ fs_generator::generate_fb_write(fs_inst *inst)
brw_set_mask_control(p, BRW_MASK_DISABLE);
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
 
-   if (fp-UsesKill || c-key.alpha_test_func) {
+   if ((fp  fp-UsesKill) || c-key.alpha_test_func) {
   struct brw_reg pixel_mask;
 
   if (brw-gen = 6)
@@ -1304,9 +1304,12 @@ fs_generator::generate_code(exec_list *instructions)
   if (shader) {
  printf(Native code for fragment shader %d (%d-wide dispatch):\n,
 prog-Name, dispatch_width);
-  } else {
+  } else if (fp) {
  printf(Native code for fragment program %d (%d-wide dispatch):\n,
 fp-Base.Id, dispatch_width);
+  } else {
+ printf(Native code for blorp program (%d-wide dispatch):\n,
+dispatch_width);
   }
}
 
@@ -1344,7 +1347,7 @@ fs_generator::generate_code(exec_list *instructions)
else {
   const prog_instruction *fpi;
   fpi = (const prog_instruction *)inst-ir;
-  printf(%d: , (int)(fpi - fp-Base.Instructions));
+  printf(%d: , (int)(fpi - (fp ? fp-Base.Instructions : 
0)));
   _mesa_fprint_instruction_opt(stdout,
fpi,
0, PROG_PRINT_DEBUG, NULL);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/42] i965/blorp: unit test compiling integer typed texture fetches

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 86 ++
 1 file changed, 86 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 1545148..8b0cac2 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -979,6 +979,91 @@ test_gen6_simple_src_samples_zero(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ * Gotten by running piglit test:
+ *   ext_framebuffer_multisample-int-draw-buffers-alpha-to-one 4 -auto
+ */
+static bool
+test_gen7_multisample_int(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: mov(16) g1141UD   0xUD
{ align1 WE_normal 1H };\n
+  0x00d0: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x00e0: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x00f0: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0100: mov(16) g521UD0x0001UD
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0130: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0140: send(16)g121UWg1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0150: avg(16) g41D  g48,8,1D  g128,8,1D 
{ align1 WE_normal 1H };\n
+  0x0160: avg(16) g61D  g68,8,1D  g148,8,1D 
{ align1 WE_normal 1H };\n
+  0x0170: avg(16) g81D  g88,8,1D  g168,8,1D 
{ align1 WE_normal 1H };\n
+  0x0180: avg(16) g101D g108,8,1D g188,8,1D 
{ align1 WE_normal 1H };\n
+  0x0190: mov(16) g521UD0x0002UD
{ align1 WE_normal 1H };\n
+  0x01a0: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x01b0: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x01c0: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x01d0: send(16)g121UWg1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x01e0: mov(16) g521UD0x0003UD
{ align1 WE_normal 1H };\n
+  0x01f0: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0200: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0210: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0220: send(16)g201UWg1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0230: avg(16) g121D g128,8,1D g208,8,1D 
{ align1 WE_normal 1H };\n
+  0x0240: avg(16) g141D g148,8,1D g228,8,1D 
{ align1 WE_normal 1H };\n
+  0x0250: 

[Mesa-dev] [PATCH 03/42] i965: dump the disassembly to the given file

2013-12-20 Thread Topi Pohjolainen
instead of ignoring the argument and always dumping to
standard output.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_eu.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_eu.c 
b/src/mesa/drivers/dri/i965/brw_eu.c
index 8c7235a..9d0c856 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.c
+++ b/src/mesa/drivers/dri/i965/brw_eu.c
@@ -236,14 +236,14 @@ brw_dump_compile(struct brw_compile *p, FILE *out, int 
start, int end)
for (int offset = start; offset  end;) {
   struct brw_instruction *insn = store + offset;
   struct brw_instruction uncompacted;
-  printf(0x%08x: , offset);
+  fprintf(out, 0x%08x: , offset);
 
   if (insn-header.cmpt_control) {
 struct brw_compact_instruction *compacted = (void *)insn;
 if (dump_hex) {
-   printf(0x%08x 0x%08x   ,
-  ((uint32_t *)insn)[1],
-  ((uint32_t *)insn)[0]);
+   fprintf(out, 0x%08x 0x%08x   ,
+   ((uint32_t *)insn)[1],
+   ((uint32_t *)insn)[0]);
 }
 
 brw_uncompact_instruction(brw, uncompacted, compacted);
@@ -251,15 +251,15 @@ brw_dump_compile(struct brw_compile *p, FILE *out, int 
start, int end)
 offset += 8;
   } else {
 if (dump_hex) {
-   printf(0x%08x 0x%08x 0x%08x 0x%08x ,
-  ((uint32_t *)insn)[3],
-  ((uint32_t *)insn)[2],
-  ((uint32_t *)insn)[1],
-  ((uint32_t *)insn)[0]);
+   fprintf(out, 0x%08x 0x%08x 0x%08x 0x%08x ,
+   ((uint32_t *)insn)[3],
+   ((uint32_t *)insn)[2],
+   ((uint32_t *)insn)[1],
+   ((uint32_t *)insn)[0]);
 }
 offset += 16;
   }
 
-  brw_disasm(stdout, insn, p-brw-gen);
+  brw_disasm(out, insn, p-brw-gen);
}
 }
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/42] i965/blorp: unit test compiling msaa-4 ums to cms

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 59 ++
 1 file changed, 59 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 5d49ace..fa6b53d 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -436,6 +436,64 @@ test_gen7_msaa_8_cms_to_cms(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ *  * One of the flavours gotten when running piglit test:
+ *   * ext_framebuffer_multisample-blit-scaled 4
+ **/
+static bool
+test_gen7_msaa_4_ums_to_cms(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g541UW0x3210V 
{ align1 WE_normal 1H };\n
+  0x0050: mov(8)  g521UDg541,4,0UW
{ align1 WE_normal 1Q };\n
+  0x0060: mov(8)  g531UDg54.21,4,0UW  
{ align1 WE_normal 1Q };\n
+  0x0070: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0080: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0090: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x00b0: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x00c0: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00d0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00e0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00f0: mov(16) g1141UD   g528,8,1UD
{ align1 WE_normal 1H };\n
+  0x0100: mov(16) g1161UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x0120: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 31, 2) mlen 6 rlen 8 { 
align1 WE_normal 1H };\n
+  0x0130: mov(16) g1141Fg48,8,1F  
{ align1 WE_normal 1H };\n
+  0x0140: mov(16) g1161Fg68,8,1F  
{ align1 WE_normal 1H };\n
+  0x0150: mov(16) g1181Fg88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0160: mov(16) g1201Fg108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0170: sendc(16)   nullg1148,8,1F\n
+  render ( RT write, 1, 0, 12) mlen 8 rlen 0  { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 4;
+   key.tex_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.src_samples = 4;
+   key.src_layout = INTEL_MSAA_LAYOUT_UMS;
+   key.rt_samples = 4;
+   key.rt_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.dst_samples = 4;
+   key.dst_layout = INTEL_MSAA_LAYOUT_CMS;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = true;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 2.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -448,6 +506,7 @@ main(int argc, char **argv)
pass = test_gen7_blend_scaled_msaa_8(brw)  pass;
pass = test_gen7_msaa_8_ums_to_cms(brw)  pass;
pass = test_gen7_msaa_8_cms_to_cms(brw)  pass;
+   pass = test_gen7_msaa_4_ums_to_cms(brw)  pass;
 
/* Test suite expects zero for success */
return !pass;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 41/42] i965/eu: introduce blorp specific flavour of lrp

2013-12-20 Thread Topi Pohjolainen
This is rather ugly but as I couldn't think of anything better
for now and wanted to get the rest of the series under review,
I left it as it is.
Even though immediately surrounding code has tabs this piece is
written space-indented.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_fs.h | 2 ++
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 6 --
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 2137aee..cb802e3 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -514,6 +514,8 @@ public:
  bool dump_enabled = false,
  FILE *dump_file = stdout);
 
+   bool no_2nd_half_ctrl;
+
 private:
void generate_code(exec_list *instructions, bool dump_enabled,
   FILE *dump_file);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 16c08b0..36121ca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -42,7 +42,8 @@ fs_generator::fs_generator(struct brw_context *brw,
struct gl_fragment_program *fp,
bool dual_source_output)
 
-   : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output)
+   : no_2nd_half_ctrl(false), brw(brw), c(c), prog(prog), fp(fp),
+ dual_source_output(dual_source_output)
 {
ctx = brw-ctx;
 
@@ -1452,7 +1453,8 @@ fs_generator::generate_code(exec_list *instructions, bool 
dump_enabled,
  if (dispatch_width == 16  !brw-is_haswell) {
brw_set_compression_control(p, BRW_COMPRESSION_NONE);
brw_LRP(p, dst, src[0], src[1], src[2]);
-   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
+if (!no_2nd_half_ctrl)
+   brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);
brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), 
sechalf(src[2]));
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 } else {
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 11/42] i965/blorp: unit test compiling simple zero-src sampled

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 56 ++
 1 file changed, 56 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 1d57640..e87ad60 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -772,6 +772,61 @@ test_gen7_unaligned_8_msaa(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ * One of the most common flavours gotten running piglit tests:
+ * ext_framebuffer_multisample-*
+ */
+static bool
+test_gen7_simple_src_samples_zero(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0050: mov(16) g461F g508,8,1UD
{ align1 WE_normal 1H };\n
+  0x0060: mul(16) g481F g448,8,1F 
g2.60,1,0F{ align1 WE_normal 1H };\n
+  0x0070: mul(16) g501F g468,8,1F g30,1,0F  
{ align1 WE_normal 1H };\n
+  0x0080: add(16) g481F g488,8,1F 
g2.70,1,0F{ align1 WE_normal 1H };\n
+  0x0090: add(16) g501F g508,8,1F 
g3.10,1,0F{ align1 WE_normal 1H };\n
+  0x00a0: mov(16) g441UDg488,8,1F 
{ align1 WE_normal 1H };\n
+  0x00b0: mov(16) g461UDg508,8,1F 
{ align1 WE_normal 1H };\n
+  0x00c0: mov(16) g1141UD   g448,8,1UD
{ align1 WE_normal 1H };\n
+  0x00d0: mov(16) g1161UD   0xUD
{ align1 WE_normal 1H };\n
+  0x00e0: mov(16) g1181UD   g468,8,1UD
{ align1 WE_normal 1H };\n
+  0x00f0: send(16)g41UW g1148,8,1F\n
+  sampler (0, 0, 7, 2) mlen 6 rlen 8  { 
align1 WE_normal 1H };\n
+  0x0100: mov(16) g1141Fg48,8,1F  
{ align1 WE_normal 1H };\n
+  0x0110: mov(16) g1161Fg68,8,1F  
{ align1 WE_normal 1H };\n
+  0x0120: mov(16) g1181Fg88,8,1F  
{ align1 WE_normal 1H };\n
+  0x0130: mov(16) g1201Fg108,8,1F 
{ align1 WE_normal 1H };\n
+  0x0140: sendc(16)   nullg1148,8,1F\n
+  render ( RT write, 1, 0, 12) mlen 8 rlen 0  { 
align1 WE_normal 1H EOT };\n;
+   struct brw_blorp_blit_prog_key key;
+
+   key.tex_samples = 0;
+   key.tex_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.src_samples = 0;
+   key.src_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.rt_samples = 0;
+   key.rt_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.dst_samples = 0;
+   key.dst_layout = INTEL_MSAA_LAYOUT_NONE;
+   key.texture_data_type = BRW_REGISTER_TYPE_F;
+   key.src_tiled_w = false;
+   key.dst_tiled_w = false;
+   key.blend = false;
+   key.use_kill = false;
+   key.persample_msaa_dispatch = false;
+   key.blit_scaled = false;
+   key.x_scale = 2.00;
+   key.y_scale = 0.00;
+   key.bilinear_filter = false;
+
+   return check(brw, key, expected, sizeof(expected) - 1);
+}
+
 int
 main(int argc, char **argv)
 {
@@ -787,6 +842,7 @@ main(int argc, char **argv)
pass = test_gen7_msaa_4_ums_to_cms(brw)  pass;
pass = test_gen7_alpha_blend(brw)  pass;
pass = test_gen7_unaligned_8_msaa(brw)  pass;
+   pass = test_gen7_simple_src_samples_zero(brw)  pass;
 
/* Test suite expects zero for success */
return !pass;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 19/42] i965/blorp: move emission of pixel kill into eu-emitter

2013-12-20 Thread Topi Pohjolainen
The combination of four separate comparison operations and
and the masked and require special treatment when moving
to FS LIR.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 26 +++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 26 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  7 +++
 3 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 836d5a6..7f18d0b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -640,7 +640,6 @@ private:
void translate_tiling(bool old_tiled_w, bool new_tiled_w);
void encode_msaa(unsigned num_samples, intel_msaa_layout layout);
void decode_msaa(unsigned num_samples, intel_msaa_layout layout);
-   void kill_if_outside_dst_rect();
void translate_dst_to_src();
void clamp_tex_coords(struct brw_reg regX, struct brw_reg regY,
  struct brw_reg clampX0, struct brw_reg clampY0,
@@ -828,7 +827,9 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
 */
 
if (key-use_kill)
-  kill_if_outside_dst_rect();
+  emit_kill_if_outside_rect(x_coords[xy_coord_index],
+y_coords[xy_coord_index],
+dst_x0, dst_x1, dst_y0, dst_y1);
 
/* Next, apply a translation to obtain coordinates in the source image. */
translate_dst_to_src();
@@ -1370,27 +1371,6 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples,
 }
 
 /**
- * Emit code that kills pixels whose X and Y coordinates are outside the
- * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
- * dst_x1, dst_y1).
- */
-void
-brw_blorp_blit_program::kill_if_outside_dst_rect()
-{
-   struct brw_reg f0 = brw_flag_reg(0, 0);
-   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
-
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, X, dst_x0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, Y, dst_y0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, X, dst_x1);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
-
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-   brw_AND(func, g1, f0, g1)-header.mask_control = BRW_MASK_DISABLE;
-}
-
-/**
  * Emit code to translate from destination (X, Y) coordinates to source (X, Y)
  * coordinates.
  */
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 8e24f80..8012d4b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -61,3 +61,29 @@ brw_blorp_eu_emitter::get_program(unsigned *program_size, 
FILE *dump_file)
 
return brw_get_program(func, program_size);
 }
+
+/**
+ * Emit code that kills pixels whose X and Y coordinates are outside the
+ * boundary of the rectangle defined by the push constants (dst_x0, dst_y0,
+ * dst_x1, dst_y1).
+ */
+void
+brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct brw_reg x,
+const struct brw_reg y,
+const struct brw_reg dst_x0,
+const struct brw_reg dst_x1,
+const struct brw_reg dst_y0,
+const struct brw_reg dst_y1)
+{
+   struct brw_reg f0 = brw_flag_reg(0, 0);
+   struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
+   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
+
+   brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0);
+   brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0);
+   brw_CMP(func, null32, BRW_CONDITIONAL_L, x, dst_x1);
+   brw_CMP(func, null32, BRW_CONDITIONAL_L, y, dst_y1);
+
+   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+   brw_AND(func, g1, f0, g1)-header.mask_control = BRW_MASK_DISABLE;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 1bcb0d9..3f74e0e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -35,6 +35,13 @@ protected:
 
const unsigned *get_program(unsigned *program_size, FILE *dump_file);
 
+   void emit_kill_if_outside_rect(const struct brw_reg x,
+  const struct brw_reg y,
+  const struct brw_reg dst_x0,
+  const struct brw_reg dst_x1,
+  const struct brw_reg dst_y0,
+  const struct brw_reg dst_y1);
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1


[Mesa-dev] [PATCH 27/42] i965/blorp: wrap LRP

2013-12-20 Thread Topi Pohjolainen
The split of the emission of the two halfs into single emission
call prapares for fs_generator support that already does similar
thing. No regressions seen on IVB (unit tests and piglit quick).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 26 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 13 +
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 1b7310b..b95104e 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1679,29 +1679,23 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
}
 
 #define SAMPLE(x, y) offset(texture_data[x], y)
-   brw_set_access_mode(func, BRW_ALIGN_16);
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
for (int index = 3; index  0; ) {
   /* Since we're doing SIMD16, 4 color channels fits in to 8 registers.
* Counter value of 8 in 'for' loop below is used to interpolate all
* the color components.
*/
-  for (int k = 0; k  8; ++k)
- brw_LRP(func,
- vec8(SAMPLE(index - 1, k)),
- offset(x_frac, k  1),
- SAMPLE(index, k),
- SAMPLE(index - 1, k));
+  for (int k = 0; k  8; k += 2)
+ emit_lrp(vec8(SAMPLE(index - 1, k)),
+  offset(x_frac, k  1),
+  SAMPLE(index, k),
+  SAMPLE(index - 1, k));
   index -= 2;
}
-   for (int k = 0; k  8; ++k)
-  brw_LRP(func,
-  vec8(SAMPLE(0, k)),
-  offset(y_frac, k  1),
-  vec8(SAMPLE(2, k)),
-  vec8(SAMPLE(0, k)));
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
-   brw_set_access_mode(func, BRW_ALIGN_1);
+   for (int k = 0; k  8; k += 2)
+  emit_lrp(vec8(SAMPLE(0, k)),
+   offset(y_frac, k  1),
+   vec8(SAMPLE(2, k)),
+   vec8(SAMPLE(0, k)));
 #undef SAMPLE
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 1a023fc..4a7a772 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -74,6 +74,19 @@ protected:
   emit_cond_mov(x, brw_imm_d(y), BRW_CONDITIONAL_EQ, dst, brw_imm_d(src));
}
 
+   inline void emit_lrp(const struct brw_reg dst,
+const struct brw_reg src1,
+const struct brw_reg src2,
+const struct brw_reg src3)
+   {
+  brw_set_access_mode(func, BRW_ALIGN_16);
+  brw_set_compression_control(func, BRW_COMPRESSION_NONE);
+  brw_LRP(func, dst, src1, src2, src3);
+  brw_LRP(func, sechalf(dst), sechalf(src1), sechalf(src2), 
sechalf(src3));
+  brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+  brw_set_access_mode(func, BRW_ALIGN_1);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 36/42] i965/blorp: wrap RNDD (/brw_RNDD(func, /emit_rndd(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 4 ++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 83d2e56..37fc7ac 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1412,8 +1412,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   emit_frc(y_frac, Y_f);
 
   /* Round the float coordinates down to nearest integer */
-  brw_RNDD(func, Xp_f, X_f);
-  brw_RNDD(func, Yp_f, Y_f);
+  emit_rndd(Xp_f, X_f);
+  emit_rndd(Yp_f, Y_f);
   emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale));
   emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
   SWAP_XY_AND_XPYP();
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index c9606b1..1ecf076 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -156,6 +156,12 @@ protected:
   brw_FRC(func, dst, src);
}
 
+   inline void emit_rndd(const struct brw_reg dst,
+ const struct brw_reg src)
+   {
+  brw_RNDD(func, dst, src);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 22/42] i965/blorp: move emission of texture lookup into eu-emitter

2013-12-20 Thread Topi Pohjolainen
Resolving of the hardware message type is moved into the
emitter also in preparation for switching to use fs_generator.
The generator wants to translate the high level op-code into
the message type and hence the emitter needs to know the
original op-code.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 34 +++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 43 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +++
 3 files changed, 60 insertions(+), 22 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 7f18d0b..5cf949a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -650,7 +650,7 @@ private:
void sample(struct brw_reg dst);
void texel_fetch(struct brw_reg dst);
void mcs_fetch();
-   void texture_lookup(struct brw_reg dst, GLuint msg_type,
+   void texture_lookup(struct brw_reg dst, enum opcode op,
const sampler_message_arg *args, int num_args);
void render_target_write();
 
@@ -1751,8 +1751,7 @@ brw_blorp_blit_program::sample(struct brw_reg dst)
   SAMPLER_MESSAGE_ARG_V_FLOAT
};
 
-   texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE, args,
-  ARRAY_SIZE(args));
+   texture_lookup(dst, SHADER_OPCODE_TEX, args, ARRAY_SIZE(args));
 }
 
 /**
@@ -1788,8 +1787,7 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst)
 
switch (brw-gen) {
case 6:
-  texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen6_args,
- s_is_zero ? 2 : 5);
+  texture_lookup(dst, SHADER_OPCODE_TXF, gen6_args, s_is_zero ? 2 : 5);
   break;
case 7:
   switch (key-tex_layout) {
@@ -1805,16 +1803,16 @@ brw_blorp_blit_program::texel_fetch(struct brw_reg dst)
   * INTEL_MSAA_LAYOUT_CMS.
   */
   case INTEL_MSAA_LAYOUT_CMS:
- texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS,
+ texture_lookup(dst, SHADER_OPCODE_TXF_CMS,
 gen7_ld2dms_args, ARRAY_SIZE(gen7_ld2dms_args));
  break;
   case INTEL_MSAA_LAYOUT_UMS:
- texture_lookup(dst, GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS,
+ texture_lookup(dst, SHADER_OPCODE_TXF_UMS,
 gen7_ld2dss_args, ARRAY_SIZE(gen7_ld2dss_args));
  break;
   case INTEL_MSAA_LAYOUT_NONE:
  assert(s_is_zero);
- texture_lookup(dst, GEN5_SAMPLER_MESSAGE_SAMPLE_LD, gen7_ld_args,
+ texture_lookup(dst, SHADER_OPCODE_TXF, gen7_ld_args,
 ARRAY_SIZE(gen7_ld_args));
  break;
   }
@@ -1832,13 +1830,13 @@ brw_blorp_blit_program::mcs_fetch()
   SAMPLER_MESSAGE_ARG_U_INT,
   SAMPLER_MESSAGE_ARG_V_INT
};
-   texture_lookup(vec16(mcs_data), GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS,
+   texture_lookup(vec16(mcs_data), SHADER_OPCODE_TXF_MCS,
   gen7_ld_mcs_args, ARRAY_SIZE(gen7_ld_mcs_args));
 }
 
 void
 brw_blorp_blit_program::texture_lookup(struct brw_reg dst,
-   GLuint msg_type,
+   enum opcode op,
const sampler_message_arg *args,
int num_args)
 {
@@ -1902,18 +1900,10 @@ brw_blorp_blit_program::texture_lookup(struct brw_reg 
dst,
   mrf.nr += 2;
}
 
-   brw_SAMPLE(func,
-  retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
-  base_mrf /* msg_reg_nr */,
-  brw_message_reg(base_mrf) /* src0 */,
-  BRW_BLORP_TEXTURE_BINDING_TABLE_INDEX,
-  0 /* sampler */,
-  msg_type,
-  8 /* response_length.  TODO: should be smaller for non-RGBA 
formats? */,
-  mrf.nr - base_mrf /* msg_length */,
-  0 /* header_present */,
-  BRW_SAMPLER_SIMD_MODE_SIMD16,
-  BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
+   emit_texture_lookup(retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
+   op,
+   base_mrf,
+   mrf.nr - base_mrf /* msg_length */);
 }
 
 #undef X
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 8012d4b..91455d6 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -23,6 +23,7 @@
 
 #include glsl/ralloc.h
 #include brw_blorp_blit_eu.h
+#include brw_blorp.h
 
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw)
: mem_ctx(ralloc_context(NULL))
@@ -87,3 +88,45 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const struct 
brw_reg x,
brw_set_predicate_control(func, BRW_PREDICATE_NONE);
brw_AND(func, g1, f0, g1)-header.mask_control = BRW_MASK_DISABLE;
 }
+
+void
+brw_blorp_eu_emitter::emit_texture_lookup(const 

[Mesa-dev] [PATCH 21/42] i965/fs: introduce non-compressed equivalent of tex_cms

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h| 1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +
 src/mesa/drivers/dri/i965/brw_shader.cpp   | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index ecfcf72..5ee4165 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -770,6 +770,7 @@ enum opcode {
SHADER_OPCODE_TXS,
FS_OPCODE_TXB,
SHADER_OPCODE_TXF_CMS,
+   SHADER_OPCODE_TXF_UMS,
SHADER_OPCODE_TXF_MCS,
SHADER_OPCODE_LOD,
SHADER_OPCODE_TG4,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 82d7255..982fcd0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -431,6 +431,10 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg 
dst, struct brw_reg src
  else
 msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
  break;
+  case SHADER_OPCODE_TXF_UMS:
+ assert(brw-gen = 7);
+ msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
+ break;
   case SHADER_OPCODE_TXF_MCS:
  assert(brw-gen = 7);
  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
@@ -1658,6 +1662,7 @@ fs_generator::generate_code(exec_list *instructions)
   case SHADER_OPCODE_TXD:
   case SHADER_OPCODE_TXF:
   case SHADER_OPCODE_TXF_CMS:
+  case SHADER_OPCODE_TXF_UMS:
   case SHADER_OPCODE_TXF_MCS:
   case SHADER_OPCODE_TXL:
   case SHADER_OPCODE_TXS:
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 4eac3aa..c0683a8 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -449,6 +449,8 @@ brw_instruction_name(enum opcode op)
   return txb;
case SHADER_OPCODE_TXF_CMS:
   return txf_cms;
+   case SHADER_OPCODE_TXF_UMS:
+  return txf_ums;
case SHADER_OPCODE_TXF_MCS:
   return txf_mcs;
case SHADER_OPCODE_TG4:
@@ -546,6 +548,7 @@ backend_instruction::is_tex()
opcode == SHADER_OPCODE_TXD ||
opcode == SHADER_OPCODE_TXF ||
opcode == SHADER_OPCODE_TXF_CMS ||
+   opcode == SHADER_OPCODE_TXF_UMS ||
opcode == SHADER_OPCODE_TXF_MCS ||
opcode == SHADER_OPCODE_TXL ||
opcode == SHADER_OPCODE_TXS ||
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/42] i965/blorp: unit test compiling unaligned msaa-8

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp| 135 +
 1 file changed, 135 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp 
b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
index 9692ce4..1d57640 100644
--- a/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
+++ b/src/mesa/drivers/dri/i965/test_blorp_blit_eu_gen.cpp
@@ -638,6 +638,140 @@ test_gen7_alpha_blend(struct brw_context *brw)
return check(brw, key, expected, sizeof(expected) - 1);
 }
 
+/**
+ * Gotten when running piglit test:
+ * ext_framebuffer_multisample-unaligned-blit 8 stencil msaa
+ */
+static bool
+test_gen7_unaligned_8_msaa(struct brw_context *brw)
+{
+   static const char expected[] =
+  0x: add(16) g441UWg1.42,4,0UW   0x10101010V 
{ align1 WE_normal 1H };\n
+  0x0010: add(16) g461UWg1.52,4,0UW   0x11001100V 
{ align1 WE_normal 1H };\n
+  0x0020: mov(16) g481UDg448,8,1UW
{ align1 WE_normal 1H };\n
+  0x0030: mov(16) g501UDg468,8,1UW
{ align1 WE_normal 1H };\n
+  0x0040: and(16) g541UDg488,8,1UD0xfff4UW
{ align1 WE_normal 1H };\n
+  0x0050: shr(16) g541UDg548,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0060: and(16) g561UDg508,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0070: shl(16) g561UDg568,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x0080: or(16)  g541UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x0090: and(16) g561UDg488,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x00a0: or(16)  g441UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x00b0: and(16) g541UDg508,8,1UD0xfffeUW
{ align1 WE_normal 1H };\n
+  0x00c0: shl(16) g541UDg548,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x00d0: and(16) g561UDg488,8,1UD0x0008UW
{ align1 WE_normal 1H };\n
+  0x00e0: shr(16) g561UDg568,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x00f0: or(16)  g541UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x0100: and(16) g561UDg488,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x0110: shr(16) g561UDg568,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0120: or(16)  g461UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x0130: and(16) g541UDg448,8,1UD0xfff8UW
{ align1 WE_normal 1H };\n
+  0x0140: shr(16) g541UDg548,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x0150: and(16) g561UDg448,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0160: or(16)  g481UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x0170: and(16) g541UDg468,8,1UD0xfffcUW
{ align1 WE_normal 1H };\n
+  0x0180: shr(16) g541UDg548,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0190: and(16) g561UDg468,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x01a0: or(16)  g501UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x01b0: and(16) g541UDg448,8,1UD0x0004UW
{ align1 WE_normal 1H };\n
+  0x01c0: and(16) g561UDg468,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x01d0: or(16)  g541UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x01e0: and(16) g561UDg448,8,1UD0x0002UW
{ align1 WE_normal 1H };\n
+  0x01f0: shr(16) g561UDg568,8,1UD0x0001UW
{ align1 WE_normal 1H };\n
+  0x0200: or(16)  g521UDg548,8,1UD
g568,8,1UD{ align1 WE_normal 1H };\n
+  0x0210: cmp.ge.f0(16)   nullg488,8,1UDg20,1,0UD 
{ align1 WE_normal 1H switch };\n
+  0x0220: (+f0) cmp.ge.f0(16) nullg508,8,1UD
g2.20,1,0UD   { align1 WE_normal 1H switch };\n
+  0x0230: (+f0) cmp.l.f0(16) null g488,8,1UD
g2.10,1,0UD   { align1 WE_normal 1H switch };\n
+  0x0240: (+f0) cmp.l.f0(16) null g508,8,1UD
g2.30,1,0UD   { align1 WE_normal 1H switch };\n
+  0x0250: and(1)  g1.141UW  f00,1,0UW 
g1.140,1,0UW  { align1 WE_all };\n
+  0x0260: mov(16) g441F g488,8,1UD
{ align1 WE_normal 1H };\n
+  0x0270: 

[Mesa-dev] [PATCH 32/42] i965/blorp: wrap SHL (/brw_SHL(func, /emit_shl(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 24 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index d721ee9..cbdb313 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1161,12 +1161,12 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
   emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
-  brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
+  emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
-  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
@@ -1182,12 +1182,12 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
* Y' = (Y  ~0b11)  1 | (X  0b100)  2
*/
   emit_and(t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
-  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
+  emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
   emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
-  brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
+  emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
-  brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
+  emit_shl(t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
@@ -1241,12 +1241,12 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
 brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
@@ -1261,19 +1261,19 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
   * Y' = (Y  ~0b1)  1 | (S  0b10) | (Y  0b1)
   */
  emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
+ emit_shl(t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(4)); /* S  0b100 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
-brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
+emit_shl(t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
   | (S  0b1)  1 */
  }
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
- brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
+ emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
 brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
@@ -1454,8 +1454,8 @@ brw_blorp_blit_program::single_to_blend()
 * that maxe up a pixel).  So we need to multiply our X and Y coordinates
 * each by 2 and then add 1.
 */
-   brw_SHL(func, t1, X, brw_imm_w(1));
-   brw_SHL(func, t2, Y, brw_imm_w(1));
+   emit_shl(t1, X, brw_imm_w(1));
+   emit_shl(t2, Y, brw_imm_w(1));
emit_add(Xp, t1, brw_imm_w(1));
emit_add(Yp, t2, brw_imm_w(1));
SWAP_XY_AND_XPYP();
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 

[Mesa-dev] [PATCH 29/42] i965/blorp: wrap AND (/brw_AND(func, /emit_and(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 78 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index cedf580..8bf30a2 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1081,7 +1081,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg t1_ud1 = vec1(retype(t1, BRW_REGISTER_TYPE_UD));
  struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
- brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
+ emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
@@ -1160,19 +1160,19 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
*   X' = (X  ~0b1011)  1 | (Y  0b1)  2 | X  0b1 (4)
*   Y' = (Y  ~0b1)  1 | (X  0b1000)  2 | (X  0b10)  1
*/
-  brw_AND(func, t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
+  emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
   brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
-  brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y  0b1 */
+  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
-  brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
-  brw_AND(func, t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
+  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(8)); /* X  0b1000 */
+  emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
-  brw_AND(func, t2, X, brw_imm_uw(2)); /* X  0b10 */
+  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
   brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
@@ -1183,20 +1183,20 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
* X' = (X  ~0b101)  1 | (Y  0b10)  2 | (Y  0b1)  1 | X  0b1
* Y' = (Y  ~0b11)  1 | (X  0b100)  2
*/
-  brw_AND(func, t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
+  emit_and(t1, X, brw_imm_uw(0xfffa)); /* X  ~0b101 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
-  brw_AND(func, t2, Y, brw_imm_uw(2)); /* Y  0b10 */
+  emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
-  brw_AND(func, t2, Y, brw_imm_uw(1)); /* Y  0b1 */
+  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
-  brw_AND(func, t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
+  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
   brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
-  brw_AND(func, t2, X, brw_imm_uw(4)); /* X  0b100 */
+  emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
   brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
@@ -1238,22 +1238,22 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
   *   where X' = (X  ~0b1)  1 | (S  0b1)  1 | (X  0b1)
   * Y' = (Y  ~0b1)  1 | (S  0b10) | (Y  0b1)
   */
- brw_AND(func, t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
+ emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
  if (!s_is_zero) {
-brw_AND(func, t2, S, brw_imm_uw(1)); /* S  0b1 */
+emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
  brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
- brw_AND(func, t2, X, brw_imm_uw(1)); /* X  0b1 */
+ emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
- brw_AND(func, t1, Y, brw_imm_uw(0xfffe)); /* Y  

[Mesa-dev] [PATCH 34/42] i965/blorp: wrap MUL (/brw_MUL(func, /emit_mul(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 18 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 60d199b..d6db6ef 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1382,16 +1382,16 @@ brw_blorp_blit_program::translate_dst_to_src()
emit_mov(Xp_f, X);
emit_mov(Yp_f, Y);
/* Scale and offset */
-   brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
-   brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
+   emit_mul(X_f, Xp_f, x_transform.multiplier);
+   emit_mul(Y_f, Yp_f, y_transform.multiplier);
emit_add(X_f, X_f, x_transform.offset);
emit_add(Y_f, Y_f, y_transform.offset);
if (key-blit_scaled  key-blend) {
   /* Translate coordinates to lay out the samples in a rectangular  grid
* roughly corresponding to sample locations.
*/
-  brw_MUL(func, X_f, X_f, brw_imm_f(key-x_scale));
-  brw_MUL(func, Y_f, Y_f, brw_imm_f(key-y_scale));
+  emit_mul(X_f, X_f, brw_imm_f(key-x_scale));
+  emit_mul(Y_f, Y_f, brw_imm_f(key-y_scale));
  /* Adjust coordinates so that integers represent pixel centers rather
   * than pixel edges.
   */
@@ -1414,8 +1414,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Round the float coordinates down to nearest integer */
   brw_RNDD(func, Xp_f, X_f);
   brw_RNDD(func, Yp_f, Y_f);
-  brw_MUL(func, X_f, Xp_f, brw_imm_f(1 / key-x_scale));
-  brw_MUL(func, Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
+  emit_mul(X_f, Xp_f, brw_imm_f(1 / key-x_scale));
+  emit_mul(Y_f, Yp_f, brw_imm_f(1 / key-y_scale));
   SWAP_XY_AND_XPYP();
} else if (!key-bilinear_filter) {
   /* Round the float coordinates down to nearest integer by moving to
@@ -1570,7 +1570,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
   /* Scale the result down by a factor of num_samples */
   /* TODO: should use a smaller loop bound for non-RGBA formats */
   for (int j = 0; j  4; ++j) {
- brw_MUL(func, offset(texture_data[0], 2*j),
+ emit_mul(offset(texture_data[0], 2*j),
  offset(vec8(texture_data[0]), 2*j),
  brw_imm_f(1.0/num_samples));
   }
@@ -1645,8 +1645,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   */
   brw_FRC(func, vec16(t1_f), x_sample_coords);
   brw_FRC(func, vec16(t2_f), y_sample_coords);
-  brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
-  brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
+  emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
+  emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale));
   emit_add(vec16(t1_f), t1_f, t2_f);
   emit_mov(vec16(S), t1_f);
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 3a7b363..17208eb 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -122,6 +122,13 @@ protected:
   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
}
 
+   inline void emit_mul(const struct brw_reg dst,
+const struct brw_reg src1,
+const struct brw_reg src2)
+   {
+  brw_MUL(func, dst, src1, src2);
+   }
+
inline void emit_shr(const struct brw_reg dst,
 const struct brw_reg src1,
 const struct brw_reg src2)
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 31/42] i965/blorp: wrap SHR (/brw_SHR(func, /emit_shr(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 24 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 +++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index e24cfe1..d721ee9 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1082,7 +1082,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg t2_uw1 = retype(t2, BRW_REGISTER_TYPE_UW);
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
  emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
- brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
+ emit_shr(t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
   stride(t2_uw1, 1, 4, 0));
@@ -1159,7 +1159,7 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
*   Y' = (Y  ~0b1)  1 | (X  0b1000)  2 | (X  0b10)  1
*/
   emit_and(t1, X, brw_imm_uw(0xfff4)); /* X  ~0b1011 */
-  brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
+  emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   brw_SHL(func, t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
   brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
@@ -1168,10 +1168,10 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   brw_SHL(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
-  brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
+  emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
   brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
   emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
-  brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
+  emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
} else {
@@ -1193,9 +1193,9 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
   brw_OR(func, Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
-  brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+  emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
   emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
-  brw_SHR(func, t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
+  emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
}
@@ -1326,16 +1326,16 @@ brw_blorp_blit_program::decode_msaa(unsigned 
num_samples,
   * S = (Y  0b10) | (X  0b10)  1
   */
  emit_and(t1, X, brw_imm_uw(0xfffc)); /* X  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (X  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b11)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
  brw_OR(func, Yp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(2)); /* Y  0b10 */
  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
- brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
+ emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
  brw_OR(func, S, t1, t2);
  break;
   case 8:
@@ -1345,18 +1345,18 @@ brw_blorp_blit_program::decode_msaa(unsigned 
num_samples,
   * S = (X  0b100) | (Y  0b10) | (X  0b10)  1
   */
  emit_and(t1, X, brw_imm_uw(0xfff8)); /* X  ~0b111 */
- brw_SHR(func, t1, t1, brw_imm_uw(2)); /* (X  ~0b111)  2 */
+ emit_shr(t1, t1, brw_imm_uw(2)); /* (X  ~0b111)  2 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
  brw_OR(func, Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
- brw_SHR(func, t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
+ emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
  brw_OR(func, Yp, t1, t2);
  emit_and(t1, X, brw_imm_uw(4)); /* X  0b100 */
  emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
  brw_OR(func, t1, t1, t2); /* (X  0b100) | (Y  0b10) */
  emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
- brw_SHR(func, t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
+ emit_shr(t2, t2, 

[Mesa-dev] [PATCH 42/42] i965/blorp: switch eu-emitter to use FS IR and fs_generator

2013-12-20 Thread Topi Pohjolainen
Unfortunately the unit tests need to be patched as well. This is
because the direct eu-emitter only patches jump counters for
if-else (see patch_IF_ELSE()) while the fs_generator patches the
endif as well (see brw_set_uip_jip()).

No regressions on IVB (piglit quick + unit tests).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp| 118 -
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h  |  63 +--
 .../drivers/dri/i965/test_blorp_blit_eu_gen.cpp|   6 +-
 3 files changed, 83 insertions(+), 104 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index dcfd82b..46033ba 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -26,24 +26,10 @@
 #include brw_blorp.h
 
 brw_blorp_eu_emitter::brw_blorp_eu_emitter(struct brw_context *brw)
-   : mem_ctx(ralloc_context(NULL))
+   : mem_ctx(ralloc_context(NULL)), c(rzalloc(mem_ctx, struct brw_wm_compile)),
+ generator(brw, c, NULL, NULL, false)
 {
-   brw_init_compile(brw, func, mem_ctx);
-
-   /*
-* By default everything is emitted as 16-wide with only a few expections
-* handled explicitly either here in the compiler or by one of the specific
-* code emission calls.
-* It should be also noted that here in this file any alterations of the
-* compression control settings are only used to affect the execution size
-* of the instructions. The instruction template used to initialise all the
-* instructions is effectively not altered -- the value stays at zero
-* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending
-* on the context.
-* If any other settings are used in the instruction headers, they are set
-* elsewhere by the individual code emission calls.
-*/
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+   generator.no_2nd_half_ctrl = true;
 }
 
 brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
@@ -54,13 +40,18 @@ brw_blorp_eu_emitter::~brw_blorp_eu_emitter()
 const unsigned *
 brw_blorp_eu_emitter::get_program(unsigned *program_size, FILE *dump_file)
 {
+   const unsigned *res;
+
if (unlikely(INTEL_DEBUG  DEBUG_BLORP)) {
   printf(Native code for BLORP blit:\n);
-  brw_dump_compile(func, dump_file, 0, func.next_insn_offset);
+  res = generator.generate_assembly(NULL, insts, program_size, true,
+dump_file);
   printf(\n);
+   } else {
+  res = generator.generate_assembly(NULL, insts, program_size);
}
 
-   return brw_get_program(func, program_size);
+   return res;
 }
 
 /**
@@ -78,15 +69,15 @@ brw_blorp_eu_emitter::emit_kill_if_outside_rect(const 
struct brw_reg x,
 {
struct brw_reg f0 = brw_flag_reg(0, 0);
struct brw_reg g1 = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW);
-   struct brw_reg null32 = vec16(retype(brw_null_reg(), BRW_REGISTER_TYPE_UD));
 
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, x, dst_x0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_GE, y, dst_y0);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, x, dst_x1);
-   brw_CMP(func, null32, BRW_CONDITIONAL_L, y, dst_y1);
+   emit_cmp(BRW_CONDITIONAL_GE, x, dst_x0);
+   emit_cmp(BRW_CONDITIONAL_GE, y, dst_y0)-predicate = BRW_PREDICATE_NORMAL;
+   emit_cmp(BRW_CONDITIONAL_L, x, dst_x1)-predicate = BRW_PREDICATE_NORMAL;
+   emit_cmp(BRW_CONDITIONAL_L, y, dst_y1)-predicate = BRW_PREDICATE_NORMAL;
 
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-   brw_AND(func, g1, f0, g1)-header.mask_control = BRW_MASK_DISABLE;
+   fs_inst *inst = new (mem_ctx) fs_inst(BRW_OPCODE_AND, g1, f0, g1);
+   inst-force_writemask_all = true;
+   insts.push_tail(inst);
 }
 
 void
@@ -95,40 +86,14 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct 
brw_reg dst,
   unsigned base_mrf,
   unsigned msg_length)
 {
-   unsigned msg_type;
-
-   switch (op) {
-   case SHADER_OPCODE_TEX:
-  msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
-  break;
-   case SHADER_OPCODE_TXF:
-  msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
-  break;
-   case SHADER_OPCODE_TXF_CMS:
-  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
-  break;
-   case SHADER_OPCODE_TXF_UMS:
-  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DSS;
-  break;
-   case SHADER_OPCODE_TXF_MCS:
-  msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
-  break;
-   default:
-  assert(!Unsupported texture lookup operation);
-   }
+   fs_inst *inst = new (mem_ctx) fs_inst(op, dst, brw_message_reg(base_mrf));
+
+   inst-base_mrf = base_mrf;
+   inst-mlen = msg_length;
+   inst-sampler = 0;
+   inst-header_present = false;
 
-   brw_SAMPLE(func,
-  retype(dst, BRW_REGISTER_TYPE_UW) /* dest */,
-  base_mrf /* msg_reg_nr */,
-  

[Mesa-dev] [PATCH 04/42] i965/blorp: allow unit tests to compile and dump assembly

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 51a3bef..112a307 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -631,7 +631,8 @@ public:
   const brw_blorp_blit_prog_key *key);
~brw_blorp_blit_program();
 
-   const GLuint *compile(struct brw_context *brw, GLuint *program_size);
+   const GLuint *compile(struct brw_context *brw, GLuint *program_size,
+ FILE *dump_file = stdout);
 
brw_blorp_prog_data prog_data;
 
@@ -753,7 +754,8 @@ brw_blorp_blit_program::~brw_blorp_blit_program()
 
 const GLuint *
 brw_blorp_blit_program::compile(struct brw_context *brw,
-GLuint *program_size)
+GLuint *program_size,
+FILE *dump_file)
 {
/* Sanity checks */
if (key-dst_tiled_w  key-rt_samples  0) {
@@ -910,7 +912,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
 
if (unlikely(INTEL_DEBUG  DEBUG_BLORP)) {
   printf(Native code for BLORP blit:\n);
-  brw_dump_compile(func, stdout, 0, func.next_insn_offset);
+  brw_dump_compile(func, dump_file, 0, func.next_insn_offset);
   printf(\n);
}
return brw_get_program(func, program_size);
@@ -2368,3 +2370,14 @@ brw_blorp_blit_params::get_wm_prog(struct brw_context 
*brw,
}
return prog_offset;
 }
+
+void
+brw_blorp_blit_test_compile(struct brw_context *brw,
+const brw_blorp_blit_prog_key *key,
+FILE *out)
+{
+   GLuint program_size;
+   brw_blorp_blit_program prog(brw, key);
+   INTEL_DEBUG |= DEBUG_BLORP;
+   prog.compile(brw, program_size, out);
+}
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 25/42] i965/blorp: wrap emission of conditional assignment

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 19 ---
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 11 +++
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index f651846..261ef53 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1439,21 +1439,10 @@ brw_blorp_blit_program::clamp_tex_coords(struct brw_reg 
regX,
  struct brw_reg clampX1,
  struct brw_reg clampY1)
 {
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regX, clampX0);
-   brw_MOV(func, regX, clampX0);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regX, clampX1);
-   brw_MOV(func, regX, clampX1);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L, regY, clampY0);
-   brw_MOV(func, regY, clampY0);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-
-   brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_G, regY, clampY1);
-   brw_MOV(func, regY, clampY1);
-   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+   emit_cond_mov(regX, clampX0, BRW_CONDITIONAL_L, regX, clampX0);
+   emit_cond_mov(regX, clampX1, BRW_CONDITIONAL_G, regX, clampX1);
+   emit_cond_mov(regY, clampY0, BRW_CONDITIONAL_L, regY, clampY0);
+   emit_cond_mov(regY, clampY1, BRW_CONDITIONAL_G, regY, clampY1);
 }
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 00624c9..259d8db 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -57,6 +57,17 @@ protected:
  const struct brw_reg src_1,
  const struct brw_reg src_2);
 
+   inline void emit_cond_mov(const struct brw_reg x,
+ const struct brw_reg y,
+ int op,
+ const struct brw_reg dst,
+ const struct brw_reg src)
+   {
+  brw_CMP(func, vec16(brw_null_reg()), op, x, y);
+  brw_MOV(func, dst, src);
+  brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 24/42] i965/blorp: move emission of sample combining into eu-emitter

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 13 -
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 12 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +
 3 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 65b5403..f651846 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1529,12 +1529,6 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
 * For integer formats, we replace the add operations with average
 * operations and skip the final division.
 */
-   typedef struct brw_instruction *(*brw_op2_ptr)(struct brw_compile *,
-  struct brw_reg,
-  struct brw_reg,
-  struct brw_reg);
-   brw_op2_ptr combine_op =
-  key-texture_data_type == BRW_REGISTER_TYPE_F ? brw_ADD : brw_AVG;
unsigned stack_depth = 0;
for (unsigned i = 0; i  num_samples; ++i) {
   assert(stack_depth == _mesa_bitcount(i)); /* Loop invariant */
@@ -1576,9 +1570,10 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
 
  /* TODO: should use a smaller loop bound for non_RGBA formats */
  for (int k = 0; k  4; ++k) {
-combine_op(func, offset(texture_data[stack_depth - 1], 2*k),
-   offset(vec8(texture_data[stack_depth - 1]), 2*k),
-   offset(vec8(texture_data[stack_depth]), 2*k));
+emit_combine(key-texture_data_type,
+ offset(texture_data[stack_depth - 1], 2*k),
+ offset(vec8(texture_data[stack_depth - 1]), 2*k),
+ offset(vec8(texture_data[stack_depth]), 2*k));
  }
   }
}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index b189aa2..dcfd82b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -148,3 +148,15 @@ brw_blorp_eu_emitter::emit_render_target_write(const 
struct brw_reg src0,
 true /* eot */,
 use_header);
 }
+
+void
+brw_blorp_eu_emitter::emit_combine(unsigned texture_data_type,
+   const struct brw_reg dst,
+   const struct brw_reg src_1,
+   const struct brw_reg src_2)
+{
+   if (texture_data_type == BRW_REGISTER_TYPE_F)
+  brw_ADD(func, dst, src_1, src_2);
+   else
+  brw_AVG(func, dst, src_1, src_2);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 5f0c8cf..00624c9 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -52,6 +52,11 @@ protected:
  unsigned msg_length,
  bool use_header);
 
+   void emit_combine(unsigned texture_data_type,
+ const struct brw_reg dst,
+ const struct brw_reg src_1,
+ const struct brw_reg src_2);
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 26/42] i965/blorp: wrap emission of if-equal-assignment

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 30 ++-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  6 ++
 2 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 261ef53..1b7310b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1661,34 +1661,16 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
  brw_IF(func, BRW_EXECUTE_16);
  {
 brw_MOV(func, vec16(t2), brw_imm_d(5));
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(1));
-brw_MOV(func, vec16(t2), brw_imm_d(2));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(2));
-brw_MOV(func, vec16(t2), brw_imm_d(4));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(3));
-brw_MOV(func, vec16(t2), brw_imm_d(6));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+emit_if_eq_mov(S, 1, vec16(t2), 2);
+emit_if_eq_mov(S, 2, vec16(t2), 4);
+emit_if_eq_mov(S, 3, vec16(t2), 6);
  }
  brw_ELSE(func);
  {
 brw_MOV(func, vec16(t2), brw_imm_d(0));
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(5));
-brw_MOV(func, vec16(t2), brw_imm_d(3));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(6));
-brw_MOV(func, vec16(t2), brw_imm_d(7));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_EQ,
-S, brw_imm_d(7));
-brw_MOV(func, vec16(t2), brw_imm_d(1));
-brw_set_predicate_control(func, BRW_PREDICATE_NONE);
+emit_if_eq_mov(S, 5, vec16(t2), 3);
+emit_if_eq_mov(S, 6, vec16(t2), 7);
+emit_if_eq_mov(S, 7, vec16(t2), 1);
  }
  brw_ENDIF(func);
  brw_MOV(func, vec16(S), t2);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 259d8db..1a023fc 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -68,6 +68,12 @@ protected:
   brw_set_predicate_control(func, BRW_PREDICATE_NONE);
}
 
+   inline void emit_if_eq_mov(const struct brw_reg x, unsigned y,
+  const struct brw_reg dst, unsigned src)
+   {
+  emit_cond_mov(x, brw_imm_d(y), BRW_CONDITIONAL_EQ, dst, brw_imm_d(src));
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 38/42] i965/fs: allow unit tests to dump the final patched assembly

2013-12-20 Thread Topi Pohjolainen
Unit tests comparing generated blorp programs to known good need
to have the dump in designated file instead of in default
standard output. The comparison also expects the jump counters
of if-else-instructions to be correctly set and hence the dump
needs to be taken _after_ 'patch_IF_ELSE()' is run (the default
dump of the fs_generator does this before).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_fs.h |  7 +--
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 15 +--
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index 9bef07c..d40d0a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -510,10 +510,13 @@ public:
 
const unsigned *generate_assembly(exec_list *simd8_instructions,
  exec_list *simd16_instructions,
- unsigned *assembly_size);
+ unsigned *assembly_size,
+ bool dump_enabled = false,
+ FILE *dump_file = stdout);
 
 private:
-   void generate_code(exec_list *instructions);
+   void generate_code(exec_list *instructions, bool dump_enabled,
+  FILE *dump_file);
void generate_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 982fcd0..df91235 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1298,7 +1298,8 @@ fs_generator::generate_untyped_surface_read(fs_inst 
*inst, struct brw_reg dst,
 }
 
 void
-fs_generator::generate_code(exec_list *instructions)
+fs_generator::generate_code(exec_list *instructions, bool dump_enabled,
+FILE *dump_file)
 {
int last_native_insn_offset = p-next_insn_offset;
const char *last_annotation_string = NULL;
@@ -1807,21 +1808,23 @@ fs_generator::generate_code(exec_list *instructions)
 * which is often something we want to debug.  So this is here in
 * case you're doing that.
 */
-   if (0) {
-  brw_dump_compile(p, stdout, 0, p-next_insn_offset);
+   if (dump_enabled) {
+  brw_dump_compile(p, dump_file, 0, p-next_insn_offset);
}
 }
 
 const unsigned *
 fs_generator::generate_assembly(exec_list *simd8_instructions,
 exec_list *simd16_instructions,
-unsigned *assembly_size)
+unsigned *assembly_size,
+bool dump_enabled,
+FILE *dump_file)
 {
assert(simd8_instructions || simd16_instructions);
 
if (simd8_instructions) {
   dispatch_width = 8;
-  generate_code(simd8_instructions);
+  generate_code(simd8_instructions, dump_enabled, dump_file);
}
 
if (simd16_instructions) {
@@ -1842,7 +1845,7 @@ fs_generator::generate_assembly(exec_list 
*simd8_instructions,
   brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);
 
   dispatch_width = 16;
-  generate_code(simd16_instructions);
+  generate_code(simd16_instructions, dump_enabled, dump_file);
}
 
return brw_get_program(p, assembly_size);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 18/42] i965/blorp: introduce separate eu-emitter for blit compiler

2013-12-20 Thread Topi Pohjolainen
Prepares for presenting blorp blit programs using FS IR that
allows EU-assembly generation using i965 glsl-compiler
backend (fs_generator).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/Makefile.sources  |  1 +
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 41 ++--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 63 +
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   | 42 +
 4 files changed, 111 insertions(+), 36 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
 create mode 100644 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 43f152e..2845844 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -30,6 +30,7 @@ i965_FILES = \
brw_binding_tables.c \
brw_blorp.cpp \
brw_blorp_blit.cpp \
+   brw_blorp_blit_eu.cpp \
brw_blorp_clear.cpp \
brw_cc.c \
brw_cfg.cpp \
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 6de4dc2..836d5a6 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -25,13 +25,11 @@
 #include main/fbobject.h
 #include main/renderbuffer.h
 
-#include glsl/ralloc.h
-
 #include intel_fbo.h
 
 #include brw_blorp.h
 #include brw_context.h
-#include brw_eu.h
+#include brw_blorp_blit_eu.h
 #include brw_state.h
 
 #define FILE_DEBUG_FLAG DEBUG_BLORP
@@ -624,12 +622,11 @@ enum sampler_message_arg
  * (In these formulas, pitch is the number of bytes occupied by a single row
  * of samples).
  */
-class brw_blorp_blit_program
+class brw_blorp_blit_program : public brw_blorp_eu_emitter
 {
 public:
brw_blorp_blit_program(struct brw_context *brw,
   const brw_blorp_blit_prog_key *key);
-   ~brw_blorp_blit_program();
 
const GLuint *compile(struct brw_context *brw, GLuint *program_size,
  FILE *dump_file = stdout);
@@ -663,10 +660,8 @@ private:
 */
static const unsigned LOG2_MAX_BLEND_SAMPLES = 3;
 
-   void *mem_ctx;
struct brw_context *brw;
const brw_blorp_blit_prog_key *key;
-   struct brw_compile func;
 
/* Thread dispatch header */
struct brw_reg R0;
@@ -740,16 +735,10 @@ private:
 brw_blorp_blit_program::brw_blorp_blit_program(
   struct brw_context *brw,
   const brw_blorp_blit_prog_key *key)
-   : mem_ctx(ralloc_context(NULL)),
+   : brw_blorp_eu_emitter(brw),
  brw(brw),
  key(key)
 {
-   brw_init_compile(brw, func, mem_ctx);
-}
-
-brw_blorp_blit_program::~brw_blorp_blit_program()
-{
-   ralloc_free(mem_ctx);
 }
 
 const GLuint *
@@ -801,21 +790,6 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
memset(prog_data, 0, sizeof(prog_data));
prog_data.persample_msaa_dispatch = key-persample_msaa_dispatch;
 
-   /*
-* By default everything is emitted as 16-wide with only a few expections
-* handled explicitly either here in the compiler or by one of the specific
-* code emission calls.
-* It should be also noted that here in this file any alterations of the
-* compression control settings are only used to affect the execution size
-* of the instructions. The instruction template used to initialise all the
-* instructions is effectively not altered -- the value stays at zero
-* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending
-* on the context.
-* If any other settings are used in the instruction headers, they are set
-* elsewhere by the individual code emission calls.
-*/
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
-
alloc_regs();
compute_frag_coords();
 
@@ -923,12 +897,7 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
 */
render_target_write();
 
-   if (unlikely(INTEL_DEBUG  DEBUG_BLORP)) {
-  printf(Native code for BLORP blit:\n);
-  brw_dump_compile(func, dump_file, 0, func.next_insn_offset);
-  printf(\n);
-   }
-   return brw_get_program(func, program_size);
+   return get_program(program_size, dump_file);
 }
 
 void
@@ -2367,7 +2336,7 @@ brw_blorp_blit_params::get_wm_prog(struct brw_context 
*brw,
  prog_offset, prog_data)) {
   brw_blorp_blit_program prog(brw, this-wm_prog_key);
   GLuint program_size;
-  const GLuint *program = prog.compile(brw, program_size);
+  const GLuint *program = prog.compile(brw, program_size, stdout);
   brw_upload_cache(brw-cache, BRW_BLORP_BLIT_PROG,
this-wm_prog_key, sizeof(this-wm_prog_key),
program, program_size,
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
new file mode 100644
index 000..8e24f80

[Mesa-dev] [PATCH 40/42] i965/fs: add support for BRW_OPCODE_AVG in fs_generator

2013-12-20 Thread Topi Pohjolainen
Needed for compiling blorp blit programs.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 4c159e6..16c08b0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1422,6 +1422,9 @@ fs_generator::generate_code(exec_list *instructions, bool 
dump_enabled,
   case BRW_OPCODE_MUL:
 brw_MUL(p, dst, src[0], src[1]);
 break;
+  case BRW_OPCODE_AVG:
+brw_AVG(p, dst, src[0], src[1]);
+break;
   case BRW_OPCODE_MACH:
 brw_set_acc_write_control(p, 1);
 brw_MACH(p, dst, src[0], src[1]);
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 30/42] i965/blorp: wrap ADD (/brw_ADD(func, /emit_add(/)

2013-12-20 Thread Topi Pohjolainen
In addition, the special case requiring explicit execution size
control is wrapped manually.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 34 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 16 +
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 8bf30a2..e24cfe1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1026,7 +1026,7 @@ brw_blorp_blit_program::compute_frag_coords()
 * Then, we need to add the repeating sequence (0, 1, 0, 1, ...) to the
 * result, since pixels n+1 and n+3 are in the right half of the subspan.
 */
-   brw_ADD(func, vec16(retype(X, BRW_REGISTER_TYPE_UW)),
+   emit_add(vec16(retype(X, BRW_REGISTER_TYPE_UW)),
stride(suboffset(R1, 4), 2, 4, 0), brw_imm_v(0x10101010));
 
/* Similarly, Y coordinates for subspans come from R1.2[31:16] through
@@ -1037,7 +1037,7 @@ brw_blorp_blit_program::compute_frag_coords()
 * And we need to add the repeating sequence (0, 0, 1, 1, ...), since
 * pixels n+2 and n+3 are in the bottom half of the subspan.
 */
-   brw_ADD(func, vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
+   emit_add(vec16(retype(Y, BRW_REGISTER_TYPE_UW)),
stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
 
/* Move the coordinates to UD registers. */
@@ -1084,13 +1084,11 @@ brw_blorp_blit_program::compute_frag_coords()
  emit_and(t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
  emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
- brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
- stride(t2_uw1, 1, 4, 0));
- brw_set_compression_control(func, BRW_COMPRESSION_NONE);
- brw_ADD(func, offset(S, 1),
- retype(t1_ud1, BRW_REGISTER_TYPE_UW),
- suboffset(stride(t2_uw1, 1, 4, 0), 2));
- brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+ emit_add(vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+  stride(t2_uw1, 1, 4, 0));
+ emit_add_8(offset(S, 1),
+retype(t1_ud1, BRW_REGISTER_TYPE_UW),
+suboffset(stride(t2_uw1, 1, 4, 0), 2));
  break;
   }
   default:
@@ -1386,8 +1384,8 @@ brw_blorp_blit_program::translate_dst_to_src()
/* Scale and offset */
brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
-   brw_ADD(func, X_f, X_f, x_transform.offset);
-   brw_ADD(func, Y_f, Y_f, y_transform.offset);
+   emit_add(X_f, X_f, x_transform.offset);
+   emit_add(Y_f, Y_f, y_transform.offset);
if (key-blit_scaled  key-blend) {
   /* Translate coordinates to lay out the samples in a rectangular  grid
* roughly corresponding to sample locations.
@@ -1397,8 +1395,8 @@ brw_blorp_blit_program::translate_dst_to_src()
  /* Adjust coordinates so that integers represent pixel centers rather
   * than pixel edges.
   */
-  brw_ADD(func, X_f, X_f, brw_imm_f(-0.5));
-  brw_ADD(func, Y_f, Y_f, brw_imm_f(-0.5));
+  emit_add(X_f, X_f, brw_imm_f(-0.5));
+  emit_add(Y_f, Y_f, brw_imm_f(-0.5));
 
   /* Clamp the X, Y texture coordinates to properly handle the sampling of
*  texels on texture edges.
@@ -1458,8 +1456,8 @@ brw_blorp_blit_program::single_to_blend()
 */
brw_SHL(func, t1, X, brw_imm_w(1));
brw_SHL(func, t2, Y, brw_imm_w(1));
-   brw_ADD(func, Xp, t1, brw_imm_w(1));
-   brw_ADD(func, Yp, t2, brw_imm_w(1));
+   emit_add(Xp, t1, brw_imm_w(1));
+   emit_add(Yp, t2, brw_imm_w(1));
SWAP_XY_AND_XPYP();
 }
 
@@ -1606,9 +1604,9 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   s_is_zero = false;
 
   /* Compute pixel coordinates */
-  brw_ADD(func, vec16(x_sample_coords), Xp_f,
+  emit_add(vec16(x_sample_coords), Xp_f,
   brw_imm_f((float)(i  0x1) * (1.0 / key-x_scale)));
-  brw_ADD(func, vec16(y_sample_coords), Yp_f,
+  emit_add(vec16(y_sample_coords), Yp_f,
   brw_imm_f((float)((i  1)  0x1) * (1.0 / key-y_scale)));
   emit_mov(vec16(X), x_sample_coords);
   emit_mov(vec16(Y), y_sample_coords);
@@ -1649,7 +1647,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_FRC(func, vec16(t2_f), y_sample_coords);
   brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
-  brw_ADD(func, vec16(t1_f), t1_f, t2_f);
+  emit_add(vec16(t1_f), t1_f, t2_f);
   emit_mov(vec16(S), t1_f);
 
   if (num_samples == 8) {
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 

[Mesa-dev] [PATCH 23/42] i965/blorp: move emission of rt-write into eu-emitter

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp| 15 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp | 18 ++
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h   |  5 +
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 5cf949a..65b5403 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1940,16 +1940,11 @@ brw_blorp_blit_program::render_target_write()
}
 
/* Now write to the render target and terminate the thread */
-   brw_fb_WRITE(func,
-16 /* dispatch_width */,
-base_mrf /* msg_reg_nr */,
-mrf_rt_write /* src0 */,
-BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
-BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
-mrf_offset /* msg_length.  TODO: Should be smaller for 
non-RGBA formats. */,
-0 /* response_length */,
-true /* eot */,
-use_header);
+   emit_render_target_write(
+  mrf_rt_write,
+  base_mrf, 
+  mrf_offset /* msg_length.  TODO: Should be smaller for non-RGBA formats. 
*/,
+  use_header);
 }
 
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
index 91455d6..b189aa2 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp
@@ -130,3 +130,21 @@ brw_blorp_eu_emitter::emit_texture_lookup(const struct 
brw_reg dst,
   BRW_SAMPLER_SIMD_MODE_SIMD16,
   BRW_SAMPLER_RETURN_FORMAT_FLOAT32);
 }
+
+void
+brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg src0,
+   unsigned msg_reg_nr,
+   unsigned msg_length,
+   bool use_header)
+{
+   brw_fb_WRITE(func,
+16 /* dispatch_width */,
+msg_reg_nr,
+src0,
+BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
+BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX,
+msg_length,
+0 /* response_length */,
+true /* eot */,
+use_header);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 9e7c43f..5f0c8cf 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -47,6 +47,11 @@ protected:
 unsigned base_mrf,
 unsigned msg_length);
 
+   void emit_render_target_write(const struct brw_reg src0,
+ unsigned msg_reg_nr,
+ unsigned msg_length,
+ bool use_header);
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 33/42] i965/blorp: wrap OR (/brw_OR(func, /emit_or(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 48 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h |  7 
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index cbdb313..60d199b 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1162,17 +1162,17 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_shr(t1, t1, brw_imm_uw(1)); /* (X  ~0b1011)  1 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b1)  2 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
+  emit_or(t1, t1, t2); /* (X  ~0b1011)  1 | (Y  0b1)  2 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
-  brw_OR(func, Xp, t1, t2);
+  emit_or(Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
   emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
   emit_and(t2, X, brw_imm_uw(8)); /* X  0b1000 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b1000)  2 */
-  brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
+  emit_or(t1, t1, t2); /* (Y  ~0b1)  1 | (X  0b1000)  2 */
   emit_and(t2, X, brw_imm_uw(2)); /* X  0b10 */
   emit_shr(t2, t2, brw_imm_uw(1)); /* (X  0b10)  1 */
-  brw_OR(func, Yp, t1, t2);
+  emit_or(Yp, t1, t2);
   SWAP_XY_AND_XPYP();
} else {
   /* Applying the same logic as above, but in reverse, we obtain the
@@ -1185,18 +1185,18 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b101)  1 */
   emit_and(t2, Y, brw_imm_uw(2)); /* Y  0b10 */
   emit_shl(t2, t2, brw_imm_uw(2)); /* (Y  0b10)  2 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
+  emit_or(t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2 */
   emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
   emit_shl(t2, t2, brw_imm_uw(1)); /* (Y  0b1)  1 */
-  brw_OR(func, t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
+  emit_or(t1, t1, t2); /* (X  ~0b101)  1 | (Y  0b10)  2
 | (Y  0b1)  1 */
   emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
-  brw_OR(func, Xp, t1, t2);
+  emit_or(Xp, t1, t2);
   emit_and(t1, Y, brw_imm_uw(0xfffc)); /* Y  ~0b11 */
   emit_shr(t1, t1, brw_imm_uw(1)); /* (Y  ~0b11)  1 */
   emit_and(t2, X, brw_imm_uw(4)); /* X  0b100 */
   emit_shr(t2, t2, brw_imm_uw(2)); /* (X  0b100)  2 */
-  brw_OR(func, Yp, t1, t2);
+  emit_or(Yp, t1, t2);
   SWAP_XY_AND_XPYP();
}
 }
@@ -1239,20 +1239,20 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
  emit_and(t1, X, brw_imm_uw(0xfffe)); /* X  ~0b1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
+emit_or(t1, t1, t2); /* (X  ~0b1) | (S  0b1) */
  }
  emit_shl(t1, t1, brw_imm_uw(1)); /* (X  ~0b1)  1
| (S  0b1)  1 */
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
- brw_OR(func, Xp, t1, t2);
+ emit_or(Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(2)); /* S  0b10 */
-brw_OR(func, t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
+emit_or(t1, t1, t2); /* (Y  ~0b1)  1 | (S  0b10) */
  }
  emit_and(t2, Y, brw_imm_uw(1)); /* Y  0b1 */
- brw_OR(func, Yp, t1, t2);
+ emit_or(Yp, t1, t2);
  break;
   case 8:
  /* encode_msaa(8, IMS, X, Y, S) = (X', Y', 0)
@@ -1264,22 +1264,22 @@ brw_blorp_blit_program::encode_msaa(unsigned 
num_samples,
  emit_shl(t1, t1, brw_imm_uw(2)); /* (X  ~0b1)  2 */
  if (!s_is_zero) {
 emit_and(t2, S, brw_imm_uw(4)); /* S  0b100 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
+emit_or(t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100) */
 emit_and(t2, S, brw_imm_uw(1)); /* S  0b1 */
 emit_shl(t2, t2, brw_imm_uw(1)); /* (S  0b1)  1 */
-brw_OR(func, t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
+emit_or(t1, t1, t2); /* (X  ~0b1)  2 | (S  0b100)
   | (S  0b1)  1 */
  }
  emit_and(t2, X, brw_imm_uw(1)); /* X  0b1 */
- brw_OR(func, Xp, t1, t2);
+ emit_or(Xp, t1, t2);
  emit_and(t1, Y, brw_imm_uw(0xfffe)); /* Y  ~0b1 */
  emit_shl(t1, t1, brw_imm_uw(1)); /* (Y  ~0b1)  1 */
  if (!s_is_zero) {
 emit_and(t2, S, 

[Mesa-dev] [PATCH 35/42] i965/blorp: wrap FRC (/brw_FRC(func, /emit_frc(/)

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 8 
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 6 ++
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index d6db6ef..83d2e56 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1408,8 +1408,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Store the fractional parts to be used as bilinear interpolation
*  coefficients.
   */
-  brw_FRC(func, x_frac, X_f);
-  brw_FRC(func, y_frac, Y_f);
+  emit_frc(x_frac, X_f);
+  emit_frc(y_frac, Y_f);
 
   /* Round the float coordinates down to nearest integer */
   brw_RNDD(func, Xp_f, X_f);
@@ -1643,8 +1643,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   *| 6 | 7 || 7 | 1 |
   *--
   */
-  brw_FRC(func, vec16(t1_f), x_sample_coords);
-  brw_FRC(func, vec16(t2_f), y_sample_coords);
+  emit_frc(vec16(t1_f), x_sample_coords);
+  emit_frc(vec16(t2_f), y_sample_coords);
   emit_mul(vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   emit_mul(vec16(t2_f), t2_f, brw_imm_f(key-x_scale * key-y_scale));
   emit_add(vec16(t1_f), t1_f, t2_f);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 17208eb..c9606b1 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -150,6 +150,12 @@ protected:
   brw_OR(func, dst, src1, src2);
}
 
+   inline void emit_frc(const struct brw_reg dst,
+const struct brw_reg src)
+   {
+  brw_FRC(func, dst, src);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 17/42] i965/blorp: reduce the scope of the explicit compression control

2013-12-20 Thread Topi Pohjolainen
By highlighting these special cases makes it clearer to switch
to the fs-generator as the wider scoped compression control
settings used in the current implementation can be simply
dropped.

No regressions on IVB (piglit quick + unit tests).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 31 ++--
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 4d62411..6de4dc2 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -801,7 +801,20 @@ brw_blorp_blit_program::compile(struct brw_context *brw,
memset(prog_data, 0, sizeof(prog_data));
prog_data.persample_msaa_dispatch = key-persample_msaa_dispatch;
 
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
+   /*
+* By default everything is emitted as 16-wide with only a few expections
+* handled explicitly either here in the compiler or by one of the specific
+* code emission calls.
+* It should be also noted that here in this file any alterations of the
+* compression control settings are only used to affect the execution size
+* of the instructions. The instruction template used to initialise all the
+* instructions is effectively not altered -- the value stays at zero
+* representing either GEN6_COMPRESSION_1Q or GEN6_COMPRESSION_1H depending
+* on the context.
+* If any other settings are used in the instruction headers, they are set
+* elsewhere by the individual code emission calls.
+*/
+   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
 
alloc_regs();
compute_frag_coords();
@@ -1077,8 +1090,10 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
  brw_MOV(func, vec16(t1_uw1), brw_imm_v(0x3210));
  /* Move to UD sample_index register. */
+ brw_set_compression_control(func, BRW_COMPRESSION_NONE);
  brw_MOV(func, S, stride(t1_uw1, 1, 4, 0));
  brw_MOV(func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
+ brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
  break;
   }
   case 8: {
@@ -1103,9 +1118,11 @@ brw_blorp_blit_program::compute_frag_coords()
  brw_MOV(func, vec16(t2_uw1), brw_imm_v(0x3210));
  brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
  stride(t2_uw1, 1, 4, 0));
+ brw_set_compression_control(func, BRW_COMPRESSION_NONE);
  brw_ADD(func, offset(S, 1),
  retype(t1_ud1, BRW_REGISTER_TYPE_UW),
  suboffset(stride(t2_uw1, 1, 4, 0), 2));
+ brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
  break;
   }
   default:
@@ -1147,7 +1164,6 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
 */
assert(s_is_zero);
 
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
if (new_tiled_w) {
   /* Given X and Y coordinates that describe an address using Y tiling,
* translate to the X and Y coordinates that describe the same address
@@ -1217,7 +1233,6 @@ brw_blorp_blit_program::translate_tiling(bool 
old_tiled_w, bool new_tiled_w)
   brw_OR(func, Yp, t1, t2);
   SWAP_XY_AND_XPYP();
}
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
 }
 
 /**
@@ -1234,7 +1249,6 @@ void
 brw_blorp_blit_program::encode_msaa(unsigned num_samples,
 intel_msaa_layout layout)
 {
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
switch (layout) {
case INTEL_MSAA_LAYOUT_NONE:
   /* No translation necessary, and S should already be zero. */
@@ -1306,7 +1320,6 @@ brw_blorp_blit_program::encode_msaa(unsigned num_samples,
   s_is_zero = true;
   break;
}
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
 }
 
 /**
@@ -1323,7 +1336,6 @@ void
 brw_blorp_blit_program::decode_msaa(unsigned num_samples,
 intel_msaa_layout layout)
 {
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
switch (layout) {
case INTEL_MSAA_LAYOUT_NONE:
   /* No translation necessary, and S should already be zero. */
@@ -1386,7 +1398,6 @@ brw_blorp_blit_program::decode_msaa(unsigned num_samples,
   SWAP_XY_AND_XPYP();
   break;
}
-   brw_set_compression_control(func, BRW_COMPRESSION_NONE);
 }
 
 /**
@@ -1422,7 +1433,6 @@ brw_blorp_blit_program::translate_dst_to_src()
struct brw_reg Xp_f = retype(Xp, BRW_REGISTER_TYPE_F);
struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
 
-   brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
/* Move the UD coordinates to float registers. */
brw_MOV(func, Xp_f, X);
brw_MOV(func, Yp_f, Y);
@@ 

[Mesa-dev] [PATCH 37/42] i965/blorp: wrap brw_IF/ELSE/ENDIF() into eu-emitter

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 14 +-
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 18 ++
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 37fc7ac..0a094ed 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1543,9 +1543,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
   * Since we have already sampled from sample 0, all we need to do is
   * skip the remaining fetches and averaging if MCS is zero.
   */
- brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_NZ,
- mcs_data, brw_imm_ud(0));
- brw_IF(func, BRW_EXECUTE_16);
+ emit_if(BRW_CONDITIONAL_NZ, mcs_data, brw_imm_ud(0));
   }
 
   /* Do count_trailing_one_bits(i) times */
@@ -1577,7 +1575,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
}
 
if (key-tex_layout == INTEL_MSAA_LAYOUT_CMS)
-  brw_ENDIF(func);
+  emit_endif();
 }
 
 void
@@ -1652,23 +1650,21 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
 
   if (num_samples == 8) {
  /* Map the sample index to a sample number */
- brw_CMP(func, vec16(brw_null_reg()), BRW_CONDITIONAL_L,
- S, brw_imm_d(4));
- brw_IF(func, BRW_EXECUTE_16);
+ emit_if(BRW_CONDITIONAL_L, S, brw_imm_d(4));
  {
 emit_mov(vec16(t2), brw_imm_d(5));
 emit_if_eq_mov(S, 1, vec16(t2), 2);
 emit_if_eq_mov(S, 2, vec16(t2), 4);
 emit_if_eq_mov(S, 3, vec16(t2), 6);
  }
- brw_ELSE(func);
+ emit_else();
  {
 emit_mov(vec16(t2), brw_imm_d(0));
 emit_if_eq_mov(S, 5, vec16(t2), 3);
 emit_if_eq_mov(S, 6, vec16(t2), 7);
 emit_if_eq_mov(S, 7, vec16(t2), 1);
  }
- brw_ENDIF(func);
+ emit_endif();
  emit_mov(vec16(S), t2);
   }
   texel_fetch(texture_data[i]);
diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h 
b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
index 1ecf076..3f2301c 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h
@@ -162,6 +162,24 @@ protected:
   brw_RNDD(func, dst, src);
}
 
+   inline void emit_if(int op,
+   const struct brw_reg x,
+   const struct brw_reg y)
+   {
+  brw_CMP(func, vec16(brw_null_reg()), op, x, y);
+  brw_IF(func, BRW_EXECUTE_16);
+   }
+
+   inline void emit_else(void)
+   {
+  brw_ELSE(func);
+   }
+
+   inline void emit_endif(void)
+   {
+  brw_ENDIF(func);
+   }
+
void *mem_ctx;
struct brw_compile func;
 };
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 16/42] i965/blorp: remove dependency to compression control state

2013-12-20 Thread Topi Pohjolainen
Effectively only the mask control bit gets altered for the single
addition in question and hence there is no real need to use a
fresh state control level for it -- that is more useful when
multiple intructions share the same mask and compression settings.

This is a preparation step for removing the explicit compression
control modifiers in the blit compiler. After this patch there
are no nested state control levels making the constant nature of
the compression settings more apparent.

No regressions on IVB (piglit quick + unit tests).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 112a307..4d62411 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1407,10 +1407,7 @@ brw_blorp_blit_program::kill_if_outside_dst_rect()
brw_CMP(func, null32, BRW_CONDITIONAL_L, Y, dst_y1);
 
brw_set_predicate_control(func, BRW_PREDICATE_NONE);
-   brw_push_insn_state(func);
-   brw_set_mask_control(func, BRW_MASK_DISABLE);
-   brw_AND(func, g1, f0, g1);
-   brw_pop_insn_state(func);
+   brw_AND(func, g1, f0, g1)-header.mask_control = BRW_MASK_DISABLE;
 }
 
 /**
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 39/42] i965/fs: introduce blorp specific rt-write for fs_generator

2013-12-20 Thread Topi Pohjolainen
The compiler for blorp programs likes to emit instructions for
the message construction itself meaning that the generator needs
to skip any such when blorp programs are translated for the hw.
In addition, the binding table control is special for blorp
programs and the generator does not need to update the binding
tables associated with the compiler bookkeeping (this in fact
gets thrown away as the blorp compiler sets the program data
in its own way).

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_defines.h|  1 +
 src/mesa/drivers/dri/i965/brw_fs.h |  1 +
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 19 +++
 src/mesa/drivers/dri/i965/brw_shader.cpp   |  2 ++
 4 files changed, 23 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 5ee4165..6a3050e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -752,6 +752,7 @@ enum opcode {
 * instructions.
 */
FS_OPCODE_FB_WRITE = 128,
+   FS_OPCODE_BLORP_FB_WRITE,
SHADER_OPCODE_RCP,
SHADER_OPCODE_RSQ,
SHADER_OPCODE_SQRT,
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index d40d0a8..2137aee 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -518,6 +518,7 @@ private:
void generate_code(exec_list *instructions, bool dump_enabled,
   FILE *dump_file);
void generate_fb_write(fs_inst *inst);
+   void generate_blorp_fb_write(fs_inst *inst);
void generate_pixel_xy(struct brw_reg dst, bool is_x);
void generate_linterp(fs_inst *inst, struct brw_reg dst,
 struct brw_reg *src);
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index df91235..4c159e6 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -190,6 +190,21 @@ fs_generator::generate_fb_write(fs_inst *inst)
mark_surface_used(surf_index);
 }
 
+void
+fs_generator::generate_blorp_fb_write(fs_inst *inst)
+{
+   brw_fb_WRITE(p,
+16 /* dispatch_width */,
+inst-base_mrf,
+brw_reg_from_fs_reg(inst-src[0]),
+BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE,
+1 /* BRW_BLORP_RENDERBUFFER_BINDING_TABLE_INDEX */,
+inst-mlen,
+0,
+true,
+inst-header_present);
+}
+
 /* Computes the integer pixel x,y values from the origin.
  *
  * This is the basis of gl_FragCoord computation, but is also used
@@ -1715,6 +1730,10 @@ fs_generator::generate_code(exec_list *instructions, 
bool dump_enabled,
 generate_fb_write(inst);
 break;
 
+  case FS_OPCODE_BLORP_FB_WRITE:
+generate_blorp_fb_write(inst);
+break;
+
   case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
  generate_mov_dispatch_to_flags(inst);
  break;
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index c0683a8..bf792c4 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -413,6 +413,8 @@ brw_instruction_name(enum opcode op)
switch (op) {
case FS_OPCODE_FB_WRITE:
   return fb_write;
+   case FS_OPCODE_BLORP_FB_WRITE:
+  return blorp_fb_write;
 
case SHADER_OPCODE_RCP:
   return rcp;
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 01/42] i965/fs: generate fs programs also without any 8-width instructions

2013-12-20 Thread Topi Pohjolainen
Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index 0cc574c..2381fb0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -1809,8 +1809,12 @@ fs_generator::generate_assembly(exec_list 
*simd8_instructions,
 exec_list *simd16_instructions,
 unsigned *assembly_size)
 {
-   dispatch_width = 8;
-   generate_code(simd8_instructions);
+   assert(simd8_instructions || simd16_instructions);
+
+   if (simd8_instructions) {
+  dispatch_width = 8;
+  generate_code(simd8_instructions);
+   }
 
if (simd16_instructions) {
   /* We have to do a compaction pass now, or the one at the end of
-- 
1.8.3.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 28/42] i965/blorp: wrap MOV (/brw_MOV(func, /emit_mov(/)

2013-12-20 Thread Topi Pohjolainen
In addition, the two special cases requiring explicit execution
size control are wrapped manually.

Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 68 +--
 src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h | 12 +
 2 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index b95104e..cedf580 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -1041,8 +1041,8 @@ brw_blorp_blit_program::compute_frag_coords()
stride(suboffset(R1, 5), 2, 4, 0), brw_imm_v(0x11001100));
 
/* Move the coordinates to UD registers. */
-   brw_MOV(func, vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
-   brw_MOV(func, vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
+   emit_mov(vec16(Xp), retype(X, BRW_REGISTER_TYPE_UW));
+   emit_mov(vec16(Yp), retype(Y, BRW_REGISTER_TYPE_UW));
SWAP_XY_AND_XPYP();
 
if (key-persample_msaa_dispatch) {
@@ -1058,12 +1058,10 @@ brw_blorp_blit_program::compute_frag_coords()
   * then copy from it using vstride=1, width=4, hstride=0.
   */
  struct brw_reg t1_uw1 = retype(t1, BRW_REGISTER_TYPE_UW);
- brw_MOV(func, vec16(t1_uw1), brw_imm_v(0x3210));
+ emit_mov(vec16(t1_uw1), brw_imm_v(0x3210));
  /* Move to UD sample_index register. */
- brw_set_compression_control(func, BRW_COMPRESSION_NONE);
- brw_MOV(func, S, stride(t1_uw1, 1, 4, 0));
- brw_MOV(func, offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
- brw_set_compression_control(func, BRW_COMPRESSION_COMPRESSED);
+ emit_mov_8(S, stride(t1_uw1, 1, 4, 0));
+ emit_mov_8(offset(S, 1), suboffset(stride(t1_uw1, 1, 4, 0), 2));
  break;
   }
   case 8: {
@@ -1085,7 +1083,7 @@ brw_blorp_blit_program::compute_frag_coords()
  struct brw_reg r0_ud1 = vec1(retype(R0, BRW_REGISTER_TYPE_UD));
  brw_AND(func, t1_ud1, r0_ud1, brw_imm_ud(0xc0));
  brw_SHR(func, t1_ud1, t1_ud1, brw_imm_ud(5));
- brw_MOV(func, vec16(t2_uw1), brw_imm_v(0x3210));
+ emit_mov(vec16(t2_uw1), brw_imm_v(0x3210));
  brw_ADD(func, vec16(S), retype(t1_ud1, BRW_REGISTER_TYPE_UW),
  stride(t2_uw1, 1, 4, 0));
  brw_set_compression_control(func, BRW_COMPRESSION_NONE);
@@ -1383,8 +1381,8 @@ brw_blorp_blit_program::translate_dst_to_src()
struct brw_reg Yp_f = retype(Yp, BRW_REGISTER_TYPE_F);
 
/* Move the UD coordinates to float registers. */
-   brw_MOV(func, Xp_f, X);
-   brw_MOV(func, Yp_f, Y);
+   emit_mov(Xp_f, X);
+   emit_mov(Yp_f, Y);
/* Scale and offset */
brw_MUL(func, X_f, Xp_f, x_transform.multiplier);
brw_MUL(func, Y_f, Yp_f, y_transform.multiplier);
@@ -1425,8 +1423,8 @@ brw_blorp_blit_program::translate_dst_to_src()
   /* Round the float coordinates down to nearest integer by moving to
* UD registers.
*/
-  brw_MOV(func, Xp, X_f);
-  brw_MOV(func, Yp, Y_f);
+  emit_mov(Xp, X_f);
+  emit_mov(Yp, Y_f);
   SWAP_XY_AND_XPYP();
}
 }
@@ -1528,7 +1526,7 @@ brw_blorp_blit_program::manual_blend_average(unsigned 
num_samples)
  s_is_zero = true;
   } else {
  s_is_zero = false;
- brw_MOV(func, vec16(S), brw_imm_ud(i));
+ emit_mov(vec16(S), brw_imm_ud(i));
   }
   texel_fetch(texture_data[stack_depth++]);
 
@@ -1612,8 +1610,8 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_imm_f((float)(i  0x1) * (1.0 / key-x_scale)));
   brw_ADD(func, vec16(y_sample_coords), Yp_f,
   brw_imm_f((float)((i  1)  0x1) * (1.0 / key-y_scale)));
-  brw_MOV(func, vec16(X), x_sample_coords);
-  brw_MOV(func, vec16(Y), y_sample_coords);
+  emit_mov(vec16(X), x_sample_coords);
+  emit_mov(vec16(Y), y_sample_coords);
 
   /* The MCS value we fetch has to match up with the pixel that we're
* sampling from. Since we sample from different pixels in each
@@ -1652,7 +1650,7 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
   brw_MUL(func, vec16(t1_f), t1_f, brw_imm_f(key-x_scale));
   brw_MUL(func, vec16(t2_f), t2_f, brw_imm_f(key-x_scale * 
key-y_scale));
   brw_ADD(func, vec16(t1_f), t1_f, t2_f);
-  brw_MOV(func, vec16(S), t1_f);
+  emit_mov(vec16(S), t1_f);
 
   if (num_samples == 8) {
  /* Map the sample index to a sample number */
@@ -1660,20 +1658,20 @@ brw_blorp_blit_program::manual_blend_bilinear(unsigned 
num_samples)
  S, brw_imm_d(4));
  brw_IF(func, BRW_EXECUTE_16);
  {
-brw_MOV(func, vec16(t2), brw_imm_d(5));
+emit_mov(vec16(t2), brw_imm_d(5));
 emit_if_eq_mov(S, 1, vec16(t2), 2);
 emit_if_eq_mov(S, 2, vec16(t2), 4);
 emit_if_eq_mov(S, 

Re: [Mesa-dev] PATCHES: R600: Implement work-around for CF stack HW bug

2013-12-20 Thread Vincent Lejeune
Some cosmetic comments below, otherwise the patches are:
reviewed-by: Vincent Lejeune vljn at ovi.com

-OutStreamer.EmitRawText(
-  Twine(; Kernel info:\n) +
-  ; NumSgprs:  + Twine(KernelInfo.NumSGPR) + \n +
-  ; NumVgprs:  + Twine(KernelInfo.NumVGPR) + \n);
+if (STM.getGeneration()  AMDGPUSubtarget::NORTHERN_ISLANDS) {
+
I think it would look cleaner without empty newline here
+  OutStreamer.EmitRawText(
+Twine(; Kernel info:\n) +
+; NumSgprs:  + Twine(KernelInfo.NumSGPR) + \n +
+; NumVgprs:  + Twine(KernelInfo.NumVGPR) + \n);
+} else {

+void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
+  CFStack::StackItem Item = CFStack::ENTRY;
+  switch(Opcode) {
+  case AMDGPU::CF_PUSH_EG:
+  case AMDGPU::CF_ALU_PUSH_BEFORE:
+if (!isWQM) {
+  if (!ST.hasCaymanISA()  
!branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
+Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on 
Evergreen/NI
+ // See comment in
+ // CFStack::getSubEntrySize()
+  else if (CurrentEntries  0 
+   ST.getGeneration()  AMDGPUSubtarget::EVERGREEN 
+   !ST.hasCaymanISA() 
+   !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
+Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
+  else
+Item = CFStack::SUB_ENTRY;
+} else {
+  Item = CFStack::ENTRY;
It's a single line statement, I think it should be without brace.
+}
+break;
 case AMDGPU::CF_ALU_PUSH_BEFORE:
-  CurrentStack++;
-  MaxStack = std::max(MaxStack, CurrentStack);
-  HasPush = true;
-  if (ST.hasCaymanISA()  CurrentLoopDepth  1) {
+  if (ST.hasCaymanISA()  CFStack.getLoopDepth()  1) {
 BuildMI(MBB, MI, MBB.findDebugLoc(MI), 
 TII-get(AMDGPU::CF_PUSH_EG))
 .addImm(CfCount + 1)
 .addImm(1);
 MI-setDesc(TII-get(AMDGPU::CF_ALU));
 CfCount++;
+CFStack.pushBranch(AMDGPU::CF_PUSH_EG);
+  } else {
+CFStack.pushBranch(AMDGPU::CF_ALU_PUSH_BEFORE);
Here too
   }

+bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
+  if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE  ST.hasCaymanISA() 
+  getLoopDepth()  1) {
+return true;
And here too
+  }

Thank for this patch set, stack bugs are really not easy to spot and fix.
Vincent

 Le Mercredi 11 décembre 2013 19h07, Tom Stellard t...@stellard.net a écrit :
  Hi,
 
 The attached patches implement a work-around for the CF stack HW bug
 that is present on some Evergreen and NI GPUs.
 
 Please Review.
 
 -Tom
 
 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev
 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Paul Berry
On 20 December 2013 04:47, Chad Versace chad.vers...@linux.intel.comwrote:

 We need to emit depth stall flushes before depth and hiz resolves.
 Placing them at the top of blorp's state emission fixes the hang.

 Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
 Tested by zooming in and out continuously for 2 hours.

 This patch is based on

 https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd

 CC: mesa-sta...@lists.freedesktop.org
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-by: Stéphane Marchesin marc...@chromium.org
 Signed-off-by: Chad Versace chad.vers...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)


Are you aware of any text in the bspec saying that these flushes are
necessary?  If so it would be nice to quote it in a comment.  I searched
for a while and wasn't able to find anything.



 diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 index 6a5841f..3a0e7ec 100644
 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 @@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
 ADVANCE_BATCH();
  }

 +static void
 +gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op hiz_op)
 +{
 +   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
 +   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {


Should we also include GEN6_HIZ_OP_DEPTH_CLEAR?  I found this text in the
bspec that suggests maybe we should (Graphics BSpec: 3D-Media-GPGPU Engine
 3D Pipeline Stages  Pixel  Depth and Stencil  Hierarchical Depth
Buffer  Depth Buffer Clear):

The following is required when performing a depth buffer clear with using
the WM_STATE or 3DSTATE_WM:

   - If other rendering operations have preceded this clear, a PIPE_CONTROL
   with depth cache flush enabled, Depth Stall bit enabled must be issued
   before the rectangle primitive used for the depth buffer clear operation.


And later on the same page:


Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM or
3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
DEPTH_STALL bit and Depth FLUSH bits “*set*” before starting to render.
DepthStall and DepthFlush are not needed between consecutive depth clear
passes nor is it required if the depth-clear pass was done with
“full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP.


(Note, however that these depth clear flushes apply to all versions of the
hardware, so perhaps we should handle them in a different patch and a
different place in the code).


Regardless of whether you make any changes due to my comments above, the
patch is:


Reviewed-by: Paul Berry stereotype...@gmail.com


 +  brw-batch.need_workaround_flush = true;
 +  intel_emit_post_sync_nonzero_flush(brw);
 +  intel_emit_depth_stall_flushes(brw);
 +   }
 +}

  /**
   * \brief Execute a blit or render pass operation.
 @@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
 uint32_t wm_bind_bo_offset = 0;

 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
 +
 +   gen6_emit_hiz_workaround(brw, params-hiz_op);
 gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
 gen6_emit_3dstate_sample_mask(brw,
   params-dst.num_samples  1 ?
 --
 1.8.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 68296] Using old viewport value after a window resize (content is clipped)

2013-12-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=68296

U. Artie Eoff ullysses.a.e...@intel.com changed:

   What|Removed |Added

   Priority|medium  |high

--- Comment #9 from U. Artie Eoff ullysses.a.e...@intel.com ---
The hope is that EFL 1.8 will be integrated into Tizen in January... however
this bug impacts EFL 1.8 quality on its wayland egl engine.  Therefore, raising
importance to high.  Preferably, the fix should be backported to the 9.2
branch.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Chad Versace

On 12/20/2013 07:56 AM, Paul Berry wrote:

On 20 December 2013 04:47, Chad Versace chad.vers...@linux.intel.comwrote:


We need to emit depth stall flushes before depth and hiz resolves.
Placing them at the top of blorp's state emission fixes the hang.

Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
Tested by zooming in and out continuously for 2 hours.

This patch is based on

https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd

CC: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
Signed-off-by: Stéphane Marchesin marc...@chromium.org
Signed-off-by: Chad Versace chad.vers...@linux.intel.com
---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)



Are you aware of any text in the bspec saying that these flushes are
necessary?  If so it would be nice to quote it in a comment.  I searched
for a while and wasn't able to find anything.


I found nothing in the BSpec stating that flushes were needed here. I began
sprinkling flushes around the code in hope of solving a HiZ hang, and this
location (found by marcheu) gave the needed fix.

I know that's not the answer you wanted.

It can take over an hour to reproduce the hang. So, after finding a fix,
I didn't want to continue to iterate to find the exact location of the
needed flush.


diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 6a5841f..3a0e7ec 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
 ADVANCE_BATCH();
  }

+static void
+gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op hiz_op)
+{
+   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
+   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {



Should we also include GEN6_HIZ_OP_DEPTH_CLEAR?


Perhaps. But I don't want to add that change without re-validating the patch.
Perhaps the extra flush on GEN6_HIZ_OP_DEPTH_CLEAR will introduce some unforseen
problem. It's unlikely, but I don't want to alter a patch for mesa-stable 
without
validation.

I prefer to flush on all the hiz ops, if only to prevent future Sandybridge 
hangs
from haunting us. On Monday, I'll add the change and re-validate. I assume v2 of
the patch automatically gets your r-b.


I found this text in the
bspec that suggests maybe we should (Graphics BSpec: 3D-Media-GPGPU Engine

3D Pipeline Stages  Pixel  Depth and Stencil  Hierarchical Depth

Buffer  Depth Buffer Clear):

The following is required when performing a depth buffer clear with using
the WM_STATE or 3DSTATE_WM:

- If other rendering operations have preceded this clear, a PIPE_CONTROL
with depth cache flush enabled, Depth Stall bit enabled must be issued
before the rectangle primitive used for the depth buffer clear operation.


And later on the same page:


Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM or
3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
DEPTH_STALL bit and Depth FLUSH bits “*set*” before starting to render.
DepthStall and DepthFlush are not needed between consecutive depth clear
passes nor is it required if the depth-clear pass was done with
“full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP.


(Note, however that these depth clear flushes apply to all versions of the
hardware, so perhaps we should handle them in a different patch and a
different place in the code).


I think we should handle these workarounds with a follow-up patch. We have
a patch that's tested and proven to solve a hang, so let's proceed with it
mostly-as-is for now so users can get the fix asap.



Regardless of whether you make any changes due to my comments above, the
patch is:


Reviewed-by: Paul Berry stereotype...@gmail.com



+  brw-batch.need_workaround_flush = true;
+  intel_emit_post_sync_nonzero_flush(brw);
+  intel_emit_depth_stall_flushes(brw);
+   }
+}

  /**
   * \brief Execute a blit or render pass operation.
@@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
 uint32_t wm_bind_bo_offset = 0;

 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
+
+   gen6_emit_hiz_workaround(brw, params-hiz_op);
 gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
 gen6_emit_3dstate_sample_mask(brw,
   params-dst.num_samples  1 ?
--
1.8.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev





___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Stéphane Marchesin
On Fri, Dec 20, 2013 at 7:56 AM, Paul Berry stereotype...@gmail.com wrote:

 On 20 December 2013 04:47, Chad Versace chad.vers...@linux.intel.comwrote:

 We need to emit depth stall flushes before depth and hiz resolves.
 Placing them at the top of blorp's state emission fixes the hang.

 Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
 Tested by zooming in and out continuously for 2 hours.

 This patch is based on

 https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd

 CC: mesa-sta...@lists.freedesktop.org
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-byhttps://bugs.freedesktop.org/show_bug.cgi?id=70740Signed-off-by:
 Stéphane Marchesin marc...@chromium.org
 Signed-off-by: Chad Versace chad.vers...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)


 Are you aware of any text in the bspec saying that these flushes are
 necessary?  If so it would be nice to quote it in a comment.  I searched
 for a while and wasn't able to find anything.



 diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 index 6a5841f..3a0e7ec 100644
 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 @@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
 ADVANCE_BATCH();
  }

 +static void
 +gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op
 hiz_op)
 +{
 +   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
 +   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {


 Should we also include GEN6_HIZ_OP_DEPTH_CLEAR?


I tried the different ops, and it didn't seem like it was needed. But
that's only empirical testing using WebGL Maps; maybe another application
would trigger this issue...

Stéphane



   I found this text in the bspec that suggests maybe we should (Graphics
 BSpec: 3D-Media-GPGPU Engine  3D Pipeline Stages  Pixel  Depth and
 Stencil  Hierarchical Depth Buffer  Depth Buffer Clear):

 The following is required when performing a depth buffer clear with using
 the WM_STATE or 3DSTATE_WM:

- If other rendering operations have preceded this clear, a
PIPE_CONTROL with depth cache flush enabled, Depth Stall bit enabled must
be issued before the rectangle primitive used for the depth buffer clear
operation.


 And later on the same page:


 Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM or
 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
 DEPTH_STALL bit and Depth FLUSH bits “*set*” before starting to render.
 DepthStall and DepthFlush are not needed between consecutive depth clear
 passes nor is it required if the depth-clear pass was done with
 “full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP.


 (Note, however that these depth clear flushes apply to all versions of the
 hardware, so perhaps we should handle them in a different patch and a
 different place in the code).


 Regardless of whether you make any changes due to my comments above, the
 patch is:


 Reviewed-by: Paul Berry stereotype...@gmail.com


 +  brw-batch.need_workaround_flush = true;
 +  intel_emit_post_sync_nonzero_flush(brw);
 +  intel_emit_depth_stall_flushes(brw);
 +   }
 +}

  /**
   * \brief Execute a blit or render pass operation.
 @@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
 uint32_t wm_bind_bo_offset = 0;

 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
 +
 +   gen6_emit_hiz_workaround(brw, params-hiz_op);
 gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
 gen6_emit_3dstate_sample_mask(brw,
   params-dst.num_samples  1 ?
 --
 1.8.4

 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev



 ___
 mesa-dev mailing list
 mesa-dev@lists.freedesktop.org
 http://lists.freedesktop.org/mailman/listinfo/mesa-dev


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/18] Remove glBlendColor and glBlendEquations decls from glext.h

2013-12-20 Thread Ian Romanick
On 12/13/2013 05:25 PM, Keith Packard wrote:
 These are duplicates from gl.h; I'm not sure which file they belong in, but
 you don't get to have them in both places.

If you're doing something that involves defining GL_GLEXT_PROTOTYPES,
you're doing it wrong.

Also, unless you're sending kernel or libdrm patches, there's no reason
to CC dri-devel. :)

 Signed-off-by: Keith Packard kei...@keithp.com
 ---
  include/GL/glext.h | 2 --
  1 file changed, 2 deletions(-)
 
 diff --git a/include/GL/glext.h b/include/GL/glext.h
 index 7d6033e..b432d2e 100644
 --- a/include/GL/glext.h
 +++ b/include/GL/glext.h
 @@ -457,8 +457,6 @@ GLAPI void APIENTRY glWindowPos3i (GLint x, GLint y, 
 GLint z);
  GLAPI void APIENTRY glWindowPos3iv (const GLint *v);
  GLAPI void APIENTRY glWindowPos3s (GLshort x, GLshort y, GLshort z);
  GLAPI void APIENTRY glWindowPos3sv (const GLshort *v);
 -GLAPI void APIENTRY glBlendColor (GLfloat red, GLfloat green, GLfloat blue, 
 GLfloat alpha);
 -GLAPI void APIENTRY glBlendEquation (GLenum mode);
  #endif
  #endif /* GL_VERSION_1_4 */
  
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 72926] New: Memory corruption (crash) in draw/draw_pt_fetch_shade_pipeline_llvm.c:435

2013-12-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72926

  Priority: medium
Bug ID: 72926
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: Memory corruption (crash) in
draw/draw_pt_fetch_shade_pipeline_llvm.c:435
  Severity: normal
Classification: Unclassified
OS: Linux (All)
  Reporter: lekenst...@gmail.com
  Hardware: x86-64 (AMD64)
Status: NEW
   Version: unspecified
 Component: Drivers/X11
   Product: Mesa

Created attachment 91053
  -- https://bugs.freedesktop.org/attachment.cgi?id=91053action=edit
gdb bt full

After upgrading Mesa 9.2.4 to 10.0.1, my Java program using JOGL crashes with a
memory corruption error.

The attached GDB log was generated with Mesa
a3ae5dc7dd5c2f8893f86a920247e690e550ebd4 (draw: make sure that the stages
setup outputs), built with --enable-debug.

I enforce software rendering because that gives me in an order of magnitude
better fps than i965 (glReadPixel is slow.):

LIBGL_ALWAYS_SOFTWARE=1 java -cp ... RobotRace

With some versions of my program (new member variable, no other side-effects),
it immediately crashes. For other versions, it crashes after modifying the
center point in gl.glLookAt(). Let me know if you need more details (source,
etc.).

Bisection leads to:
a3ae5dc7dd5c2f8893f86a920247e690e550ebd4 is the first bad commit
commit a3ae5dc7dd5c2f8893f86a920247e690e550ebd4
Author: Zack Rusin za...@vmware.com
Date:   Fri Aug 9 10:11:31 2013 -0400

draw: make sure that the stages setup outputs

Calling the prepare outputs cleans up the slot assignments
for outputs, unfortunately aapoint and aaline didn't have
code to reset their slots after the initial setup, this
was messing up our slot assignments. The unfilled stage
was just missing the initial assignment of the face slot.
This fixes all of the reported piglit failures.

Signed-off-by: Zack Rusin za...@vmware.com
Reviewed-by: Roland Scheidegger srol...@vmware.com

:04 04 fb87dfd2039663da7ff0fa6f12a5b0668fecee7f
fc98438608d4df5bd64ff651bf9098aaabc5a262 M  src

LLVM: 3.3
Mesa: 10.0.1 (gdb from a3ae5dc7dd5c2f8893f86a920247e690e550ebd4)
JOGL: 2.1-b1135-20131101
Linux: v3.13-rc2-208-g8ecffd7
Xorg: 1.14.5
OpenJDK: 7.u45_2.4.3

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Report that no function found if signature lookup is empty

2013-12-20 Thread Ian Romanick
On 12/09/2013 01:18 AM, Kevin Rogovin wrote:
 Enhancement: if no function signature is found for a function
 name, report that the function is not found instead of printing
 an empty list of candidates.

I made the minor changes that I mention below and pushed it with my R-b.

 ---
  src/glsl/ast_function.cpp | 27 +--
  1 file changed, 17 insertions(+), 10 deletions(-)
 
 diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
 index 9dc5427..ec4339c 100644
 --- a/src/glsl/ast_function.cpp
 +++ b/src/glsl/ast_function.cpp
 @@ -450,17 +450,24 @@ no_matching_function_error(const char *name,
  exec_list *actual_parameters,
  _mesa_glsl_parse_state *state)
  {
 -   char *str = prototype_string(NULL, name, actual_parameters);
 -   _mesa_glsl_error(loc, state,
 -no matching function for call to `%s'; candidates are:,
 -str);
 -   ralloc_free(str);
 +   gl_shader *sh = _mesa_glsl_get_builtin_function_shader();
  
 -   print_function_prototypes(state, loc, state-symbols-get_function(name));
 -
 -   if (state-uses_builtin_functions) {
 -  gl_shader *sh = _mesa_glsl_get_builtin_function_shader();
 -  print_function_prototypes(state, loc, sh-symbols-get_function(name));
 +   if(NULL == state-symbols-get_function(name)
^ space here

I understand that there are reasons people like the 'NULL == foo' form,
it is not used anywhere else in Mesa.  I'd prefer to have the code
remain consistent by using the 'foo == NULL' form.  If someone mistypes
'foo = NULL', GCC will issue a warning.

 +   (!state-uses_builtin_functions 
 +  || NULL == sh-symbols-get_function(name)) ) {
 delete this space  ^

 +  _mesa_glsl_error(loc, state, no function with name '%s', name);
 +   } else {
 +  char *str = prototype_string(NULL, name, actual_parameters);
 +  _mesa_glsl_error(loc, state,
 +   no matching function for call to `%s'; candidates 
 are:,
 +   str);
 +  ralloc_free(str);
 +  
 +  print_function_prototypes(state, loc, 
 state-symbols-get_function(name));
 +  
 +  if (state-uses_builtin_functions) { 
 + print_function_prototypes(state, loc, 
 sh-symbols-get_function(name));
 +  }
 }
  }
  
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] Use line number information from entire function expression

2013-12-20 Thread Ian Romanick
On 12/09/2013 12:38 AM, Kevin Rogovin wrote:
 This patch changes the error reporting behavior for incorrect
 function invocation (triggered by match_function_by_name() 
 unable to find a matching function call) from using the line
 number information associated to the function name term
 to using the line number information of the entire function
 expression. Fixes bug #72264.
 
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72264

Reviewed-by: Ian Romanick ian.d.roman...@intel.com

I'm pushed this patch and tagged it for the 10.0 branch.

 ---
  src/glsl/ast_function.cpp | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp
 index 6423759..9dc5427 100644
 --- a/src/glsl/ast_function.cpp
 +++ b/src/glsl/ast_function.cpp
 @@ -1656,7 +1656,7 @@ ast_function_expression::hir(exec_list *instructions,
 } else {
const ast_expression *id = subexpressions[0];
const char *func_name = id-primary_expression.identifier;
 -  YYLTYPE loc = id-get_location();
 +  YYLTYPE loc = get_location();
exec_list actual_parameters;
  
process_parameters(instructions, actual_parameters, 
 this-expressions,
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] Adjust MESA_FORMAT color component ordering to match API docs

2013-12-20 Thread Mark Mueller
On Fri, Dec 20, 2013 at 7:38 AM, Brian Paul bri...@vmware.com wrote:

 On 12/19/2013 06:47 PM, Michel Dänzer wrote:

 On Don, 2013-12-19 at 13:56 -0800, Mark Mueller wrote:

 Adjust MESA_FORMAT color component ordering to match API docs, driver
 specific formats (e.g. PIPE_FORMATs),


 Actually, there are a couple of examples of other format definitions
 which match the Mesa formats before your change but no longer after it,
 e.g. in the DRI and i915, nouveau and radeon driver code.

 Changing the Mesa format definitions will be confusing for people
 switching between branches with and without your change.


 Unfortunately, I don't think there's a solution to that issue.  There'll
 always be stable branches that we're cherry-picking to.  If that's a major
 concern, we'll never change any of the MESA_FORMATs.



  Also, because these Mesa formats are defined as packed values, you're
 essentially changing the notation from big endian (aka human readable)
 to little endian. It's unfortunate that the packed PIPE_FORMATs are
 named in little endian order, that's a concession we had to make when
 adding them.

 Overall, I'm afraid this change doesn't look very good at all to me. At
 the very least though, you'd also have to change the order of component
 sizes for formats such as MESA_FORMAT_BGRA2101010_UNORM or
 MESA_FORMAT_BGRA1555_UNORM, otherwise they're just plain wrong.


 Yeah, I missed that, and I agree that if we're going to rename things to
 follow the Gallium style, we should fix those too.


That wasn't an oversight, that part is in my next phase. If it's preferred,
I can move this patch into the subsequent series that I've been working on.
I'd prefer not to because the first 2 patches are huge but straight forward
changes. The subsequent series is more challenging because further name
changing exposes format names that are redundant as well as cases where
formats are being used incorrectly, probably because the name was
misunderstood. The series will be smaller but will require more thorough
review and testing by reviewers.

Mark
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-12-20 Thread Ian Romanick
This patch breaks the test_vec4_register_coalesce unit test.  Did you
run 'make check'?

On 11/27/2013 05:28 AM, Petri Latvala wrote:
 v2: Don't add function parameters, pass the required size in
 prog_data-nr_params.
 
 Signed-off-by: Petri Latvala petri.latv...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4.h   | 5 +++--
  src/mesa/drivers/dri/i965/brw_vec4_gs.c| 5 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +++
  src/mesa/drivers/dri/i965/brw_vs.c | 8 
  4 files changed, 23 insertions(+), 2 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 5cec9f9..5f5f5cd 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -325,8 +325,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
 -   int uniform_size[MAX_UNIFORMS];
 -   int uniform_vector_size[MAX_UNIFORMS];
 +   int *uniform_size;
 +   int *uniform_vector_size;
 +   int uniform_param_count; /* Size of uniform_[vector_]size arrays */
 int uniforms;
  
 src_reg shader_start_time;
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 index 018b0b6..7cf9bac 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 @@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
  
 c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
 c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
 param_count);
 +   /* Setting nr_params here NOT to the size of the param and pull_param
 +* arrays, but to the number of uniform components vec4_visitor
 +* needs. vec4_visitor::setup_uniforms() will set it back to a proper 
 value.
 +*/
 +   c.prog_data.base.nr_params = param_count / 4 + gs-num_samplers;
  
 if (gp-program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 index a13eafb..b9226dc 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 @@ -3253,6 +3253,10 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
   fail_msg(NULL),
   first_non_payload_grf(0),
   need_all_constants_in_pull_buffer(false),
 + /* Initialize uniform_param_count to at least 1 because gen6 VS 
 requires at
 +  * least one. See setup_uniforms() in brw_vec4.cpp.
 +  */
 + uniform_param_count(prog_data-nr_params ? prog_data-nr_params : 1),
   debug_flag(debug_flag),
   no_spills(no_spills)
  {
 @@ -3290,6 +3294,9 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
 this-max_grf = brw-gen = 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
  
 this-uniforms = 0;
 +
 +   this-uniform_size = rzalloc_array(mem_ctx, int, 
 this-uniform_param_count);
 +   this-uniform_vector_size = rzalloc_array(mem_ctx, int, 
 this-uniform_param_count);
  }
  
  vec4_visitor::~vec4_visitor()
 diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
 b/src/mesa/drivers/dri/i965/brw_vs.c
 index b5c8b63..8d0933d 100644
 --- a/src/mesa/drivers/dri/i965/brw_vs.c
 +++ b/src/mesa/drivers/dri/i965/brw_vs.c
 @@ -242,6 +242,14 @@ do_vs_prog(struct brw_context *brw,
  
 prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
 prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
 param_count);
 +   /* Setting nr_params here NOT to the size of the param and pull_param
 +* arrays, but to the number of uniform components vec4_visitor
 +* needs. vec4_visitor::setup_uniforms() will set it back to a proper 
 value.
 +*/
 +   prog_data.base.nr_params = param_count / 4;
 +   if (vs) {
 +  prog_data.base.nr_params += vs-num_samplers;
 +   }
  
 GLbitfield64 outputs_written = vp-program.Base.OutputsWritten;
 prog_data.inputs_read = vp-program.Base.InputsRead;
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 01/42] i965/fs: generate fs programs also without any 8-width instructions

2013-12-20 Thread Matt Turner
On Fri, Dec 20, 2013 at 6:38 AM, Topi Pohjolainen
topi.pohjolai...@intel.com wrote:
 Signed-off-by: Topi Pohjolainen topi.pohjolai...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 ++--
  1 file changed, 6 insertions(+), 2 deletions(-)

 diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp 
 b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 index 0cc574c..2381fb0 100644
 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
 @@ -1809,8 +1809,12 @@ fs_generator::generate_assembly(exec_list 
 *simd8_instructions,
  exec_list *simd16_instructions,
  unsigned *assembly_size)
  {
 -   dispatch_width = 8;
 -   generate_code(simd8_instructions);
 +   assert(simd8_instructions || simd16_instructions);
 +
 +   if (simd8_instructions) {
 +  dispatch_width = 8;
 +  generate_code(simd8_instructions);
 +   }

 if (simd16_instructions) {
/* We have to do a compaction pass now, or the one at the end of
 --
 1.8.3.1

Thinking out loud (not something you need to do): it would be nice to
have an INTEL_DEBUG=no8 option after this patch lands.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: Allocate vec4_visitor's uniform_size and uniform_vector_size arrays dynamically.

2013-12-20 Thread Kenneth Graunke
On 11/27/2013 05:28 AM, Petri Latvala wrote:
 v2: Don't add function parameters, pass the required size in
 prog_data-nr_params.
 
 Signed-off-by: Petri Latvala petri.latv...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4.h   | 5 +++--
  src/mesa/drivers/dri/i965/brw_vec4_gs.c| 5 +
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 7 +++
  src/mesa/drivers/dri/i965/brw_vs.c | 8 
  4 files changed, 23 insertions(+), 2 deletions(-)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h 
 b/src/mesa/drivers/dri/i965/brw_vec4.h
 index 5cec9f9..5f5f5cd 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4.h
 +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
 @@ -325,8 +325,9 @@ public:
  */
 dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
 const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
 -   int uniform_size[MAX_UNIFORMS];
 -   int uniform_vector_size[MAX_UNIFORMS];
 +   int *uniform_size;
 +   int *uniform_vector_size;
 +   int uniform_param_count; /* Size of uniform_[vector_]size arrays */

I'm not crazy about this variable name.  Between the params arrays,
uniform_* arrays, nr_params count, and uniforms count...we already have
a lot of distinct things that sound alike.

How about:

int uniform_array_size; /** Size of uniform_[vector_]size arrays. */

That seems clearer to me.  Especially seeing that the value here is
really the size of the array, which is an overestimate/upper bound on
the number of uniforms, not the actual number of elements in the
uniforms or params arrays.

 int uniforms;
  
 src_reg shader_start_time;
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs.c 
 b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 index 018b0b6..7cf9bac 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs.c
 @@ -64,6 +64,11 @@ do_gs_prog(struct brw_context *brw,
  
 c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
 c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
 param_count);
 +   /* Setting nr_params here NOT to the size of the param and pull_param
 +* arrays, but to the number of uniform components vec4_visitor
 +* needs. vec4_visitor::setup_uniforms() will set it back to a proper 
 value.
 +*/
 +   c.prog_data.base.nr_params = param_count / 4 + gs-num_samplers;

Hmm.  You're counting the number of vec4s, but...don't samplers take up
a single entry, since they're just integers?  This seems odd to me.

You might also consider doing ALIGN(param_count, 4) / 4 so that you
round up rather than truncating on the division.

I also would really like to keep nr_params in consistent units, i.e.
always uniform float-size components or always vec4s.

 if (gp-program.OutputType == GL_POINTS) {
/* When the output type is points, the geometry shader may output data
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 index a13eafb..b9226dc 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 @@ -3253,6 +3253,10 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
   fail_msg(NULL),
   first_non_payload_grf(0),
   need_all_constants_in_pull_buffer(false),
 + /* Initialize uniform_param_count to at least 1 because gen6 VS 
 requires at
 +  * least one. See setup_uniforms() in brw_vec4.cpp.
 +  */

I think you mean Gen4-5 requires at least one push constant, not gen6
VS.  At least, that's what setup_uniforms() is doing.

 + uniform_param_count(prog_data-nr_params ? prog_data-nr_params : 1),

I think this would be clearer as:

   MAX2(prog_data-nr_params, 1)

   debug_flag(debug_flag),
   no_spills(no_spills)
  {
 @@ -3290,6 +3294,9 @@ vec4_visitor::vec4_visitor(struct brw_context *brw,
 this-max_grf = brw-gen = 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
  
 this-uniforms = 0;
 +
 +   this-uniform_size = rzalloc_array(mem_ctx, int, 
 this-uniform_param_count);
 +   this-uniform_vector_size = rzalloc_array(mem_ctx, int, 
 this-uniform_param_count);
  }
  
  vec4_visitor::~vec4_visitor()
 diff --git a/src/mesa/drivers/dri/i965/brw_vs.c 
 b/src/mesa/drivers/dri/i965/brw_vs.c
 index b5c8b63..8d0933d 100644
 --- a/src/mesa/drivers/dri/i965/brw_vs.c
 +++ b/src/mesa/drivers/dri/i965/brw_vs.c
 @@ -242,6 +242,14 @@ do_vs_prog(struct brw_context *brw,
  
 prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
 prog_data.base.pull_param = rzalloc_array(NULL, const float *, 
 param_count);
 +   /* Setting nr_params here NOT to the size of the param and pull_param
 +* arrays, but to the number of uniform components vec4_visitor
 +* needs. vec4_visitor::setup_uniforms() will set it back to a proper 
 value.
 +*/
 +   prog_data.base.nr_params = param_count / 4;
 +   if (vs) {
 +  prog_data.base.nr_params += vs-num_samplers;
 +   }
  
 GLbitfield64 outputs_written 

Re: [Mesa-dev] [PATCH 2/2] i965: Assert array index on access to vec4_visitor's arrays.

2013-12-20 Thread Kenneth Graunke
On 11/22/2013 12:09 AM, Petri Latvala wrote:
 Signed-off-by: Petri Latvala petri.latv...@intel.com
 ---
  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 6 ++
  1 file changed, 6 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp 
 b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 index df38dab..511b080 100644
 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
 @@ -662,6 +662,7 @@ vec4_visitor::setup_uniform_values(ir_variable *ir)
 storage-type-matrix_columns);
  
for (unsigned s = 0; s  vector_count; s++) {
 + assert(uniforms  uniform_param_count);
   uniform_vector_size[uniforms] = storage-type-vector_elements;
  
   int i;
 @@ -685,6 +686,7 @@ vec4_visitor::setup_uniform_clipplane_values()
 gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
  
 for (int i = 0; i  key-nr_userclip_plane_consts; ++i) {
 +  assert(this-uniforms  uniform_param_count);
this-uniform_vector_size[this-uniforms] = 4;
this-userplane[i] = dst_reg(UNIFORM, this-uniforms);
this-userplane[i].type = BRW_REGISTER_TYPE_F;
 @@ -715,6 +717,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable 
 *ir)
   (gl_state_index *)slots[i].tokens);
float *values = this-prog-Parameters-ParameterValues[index][0].f;
  
 +  assert(this-uniforms  uniform_param_count);
this-uniform_vector_size[this-uniforms] = 0;
/* Add each of the unique swizzled channels of the element.
 * This will end up matching the size of the glsl_type of this field.
 @@ -725,6 +728,7 @@ vec4_visitor::setup_builtin_uniform_values(ir_variable 
 *ir)
last_swiz = swiz;
  
prog_data-param[this-uniforms * 4 + j] = values[swiz];
 +  assert(this-uniforms  uniform_param_count);
if (swiz = last_swiz)
   this-uniform_vector_size[this-uniforms]++;
}
 @@ -983,6 +987,7 @@ vec4_visitor::visit(ir_variable *ir)
/* Track how big the whole uniform variable is, in case we need to put 
 a
 * copy of its data into pull constants for array access.
 */
 +  assert(this-uniforms  uniform_param_count);
this-uniform_size[this-uniforms] = type_size(ir-type);
  
if (!strncmp(ir-name, gl_, 3)) {
 @@ -3197,6 +3202,7 @@ 
 vec4_visitor::move_uniform_array_access_to_pull_constants()
  
   pull_constant_loc[uniform] = prog_data-nr_pull_params / 4;
  
 + assert(uniform  uniform_param_count);
   for (int j = 0; j  uniform_size[uniform] * 4; j++) {
  prog_data-pull_param[prog_data-nr_pull_params++]
= values[j];
 

This patch looks good to me (assuming
s/uniform_param_count/uniform_array_size/g).  So, patch 2 is:

Reviewed-by: Kenneth Graunke kenn...@whitecape.org
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] mesa: Add infrastructure for GL_ARB_clear_buffer_object

2013-12-20 Thread Ian Romanick
I'm pretty sure this patch needs to add stub versions of
_mesa_ClearBufferData and _mesa_ClearBufferSubData.  Otherwise, I
believe the build breaks here.

I also think the change to dd_function_table should be in a different patch.

On 12/10/2013 05:13 AM, Pi Tabred wrote:
  - add xml file for extension
  - add reference in gl_API.xml
  - add pointer to device driver function table
  - add new functions to list of available functions
 ---
  src/mapi/glapi/gen/ARB_clear_buffer_object.xml | 50 
 ++
  src/mapi/glapi/gen/gl_API.xml  |  6 +++-
  src/mesa/main/dd.h |  5 +++
  src/mesa/main/tests/dispatch_sanity.cpp|  4 +--
  4 files changed, 62 insertions(+), 3 deletions(-)
  create mode 100644 src/mapi/glapi/gen/ARB_clear_buffer_object.xml
 
 diff --git a/src/mapi/glapi/gen/ARB_clear_buffer_object.xml 
 b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml
 new file mode 100644
 index 000..e7baf6f
 --- /dev/null
 +++ b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml
 @@ -0,0 +1,50 @@
 +?xml version=1.0?
 +!DOCTYPE OpenGLAPI SYSTEM gl_API.dtd
 +
 +!-- Note: no GLX protocol info yet. --
 +
 +
 +OpenGLAPI
 +
 +category name=GL_ARB_clear_buffer_object number=121
 +
 +function name =ClearBufferData offset=assign
 +param name=target type=GLenum/
 +param name=internalformat type=GLenum/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid */
 +/function
 +
 +function name =ClearBufferSubData offset=assign
 +param name=target type=GLenum/
 +param name=internalformat type=GLenum/
 +param name=offset type=GLintptr/
 +param name=size type=GLsizeiptr/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid */
 +/function
 +
 +!--function name=ClearNamedBufferDataEXT offset=assign es2=3.0
 +param name=buffer type=GLuint/
 +param name=internalformat type=GLenum/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid*/
 +/function
 +
 +
 +function name=ClearNamedBufferSubDataEXT offset=assign es2=3.0
 +param name=buffer type=GLuint/
 +param name=internalformat type=GLenum/
 +param name=offset type=GLintptr/
 +param name=size type=GLsizeiptr/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid*/
 +/function --
 +
 +/category
 +
 +/OpenGLAPI
 diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
 index 5c877aa..af8ab2e 100644
 --- a/src/mapi/glapi/gen/gl_API.xml
 +++ b/src/mapi/glapi/gen/gl_API.xml
 @@ -8460,7 +8460,11 @@
  
  /category
  
 -!-- ARB extensions #120...#124 --
 +!-- ARB extension #120 --
 +
 +xi:include href=ARB_clear_buffer_object.xml 
 xmlns:xi=http://www.w3.org/2001/XInclude/
 +
 +!-- ARB extensions #122...#124 --
  
  xi:include href=ARB_vertex_attrib_binding.xml 
 xmlns:xi=http://www.w3.org/2001/XInclude/
  
 diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
 index b5b874f..0e2bda0 100644
 --- a/src/mesa/main/dd.h
 +++ b/src/mesa/main/dd.h
 @@ -569,6 +569,11 @@ struct dd_function_table {
GLintptrARB offset, GLsizeiptrARB size,
GLvoid *data, struct gl_buffer_object *obj );
  
 +   void (*ClearBufferSubData)( struct gl_context *ctx, GLenum 
 internalformat, 
 +   GLintptr offset, GLsizeiptr size, 
 +   GLenum format, GLenum type, 
 +   const GLvoid *data, struct gl_buffer_object 
 *obj );
 +
 void (*CopyBufferSubData)( struct gl_context *ctx,
struct gl_buffer_object *src,
struct gl_buffer_object *dst,
 diff --git a/src/mesa/main/tests/dispatch_sanity.cpp 
 b/src/mesa/main/tests/dispatch_sanity.cpp
 index e57fb52..a2ed04b 100644
 --- a/src/mesa/main/tests/dispatch_sanity.cpp
 +++ b/src/mesa/main/tests/dispatch_sanity.cpp
 @@ -846,8 +846,8 @@ const struct function gl_core_functions_possible[] = {
  // { glGetObjectLabel, 43, -1 },  // XXX: Add to xml
  // { glObjectPtrLabel, 43, -1 },  // XXX: Add to xml
  // { glGetObjectPtrLabel, 43, -1 },   // XXX: Add to xml
 -// { glClearBufferData, 43, -1 }, // XXX: Add to xml
 -// { glClearBufferSubData, 43, -1 },  // XXX: Add to xml
 +   { glClearBufferData, 43, -1 },
 +   { glClearBufferSubData, 43, -1 },
  // { glClearNamedBufferDataEXT, 43, -1 }, // XXX: Add to xml
  // { glClearNamedBufferSubDataEXT, 43, -1 },  // XXX: Add to xml
  // { glDispatchCompute, 43, -1 }, // XXX: Add to xml
 

___

Re: [Mesa-dev] [PATCH 1/1] mesa: Add some convention to MESA_FORMATs with attention to PIPE_FORMATs

2013-12-20 Thread Kenneth Graunke
On 12/18/2013 08:44 PM, Mark Mueller wrote:
 Though a large patch, this is simply a series of global search
 and replace operations. Please refer to formats.h for the core
 deltas. In summary this change adds UNORM, and UINT to formats
 where no type is given, as necessary; replaces the SIGNED designation
 inside the named format with an appened SNORM qualifier; and replaces
 *_INT with *_SINT, thus making MESA_FORMATs more consistent
 with PIPE_FORMATs. This is the first and most intrusive change
 in a crusade to clean up and conventionalize MESA_FORMATs to
 meet needs of contemporary drivers.
 
 No regressions were observed with piglit testing on i965.
 There were no build regressions on gallium ilo, nouveau, r300,
 or r600 after the change. 
 
 Signed-off-by: Mark Mueller markkmuel...@gmail.com

 diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
 index 64b4b9a..94fc7a0 100644
 --- a/src/mesa/main/formats.h
 +++ b/src/mesa/main/formats.h
 @@ -67,51 +67,51 @@ typedef enum
 /*@{*/
   /* msb -- TEXEL BITS --- lsb */
   /*         */
 -   MESA_FORMAT_RGBA, /*         */
 -   MESA_FORMAT_RGBA_REV, /*         */
 -   MESA_FORMAT_ARGB, /*         */
 -   MESA_FORMAT_ARGB_REV, /*         */
 -   MESA_FORMAT_RGBX, /*         */
 -   MESA_FORMAT_RGBX_REV, /*         */
 -   MESA_FORMAT_XRGB, /*         */
 -   MESA_FORMAT_XRGB_REV, /*         */
 -   MESA_FORMAT_RGB888,   /*        
  */
 -   MESA_FORMAT_BGR888,   /*        
  */
 -   MESA_FORMAT_RGB565,   /*  RGGG GGGB 
  */
 -   MESA_FORMAT_RGB565_REV,   /* GGGB   RGGG */
 -   MESA_FORMAT_ARGB, /*     */
 -   MESA_FORMAT_ARGB_REV, /*     */
 -   MESA_FORMAT_RGBA5551,/*  RGGG GGBB BBBA */
 -   MESA_FORMAT_ARGB1555, /* ARRR RRGG GGGB  */
 -   MESA_FORMAT_ARGB1555_REV, /* GGGB  ARRR RRGG */
 -   MESA_FORMAT_AL44, /*     */
 -   MESA_FORMAT_AL88, /*     */
 -   MESA_FORMAT_AL88_REV, /*     */
 -   MESA_FORMAT_AL1616,  /*         */
 -   MESA_FORMAT_AL1616_REV,  /*         */
 -   MESA_FORMAT_RGB332,   /*   RRRG 
 GGBB */
 -   MESA_FORMAT_A8,   /*     */
 -   MESA_FORMAT_A16, /*     */
 -   MESA_FORMAT_L8,   /*     */
 -   MESA_FORMAT_L16, /*     */
 -   MESA_FORMAT_I8,   /*     */
 -   MESA_FORMAT_I16, /*     */
 -   MESA_FORMAT_YCBCR,/*   UorV 
 UorV */
 -   MESA_FORMAT_YCBCR_REV,/* UorV UorV   */
 -   MESA_FORMAT_R8,   /*     */
 -   MESA_FORMAT_GR88, /*     */
 -   MESA_FORMAT_RG88, /*     */
 -   MESA_FORMAT_R16,  /*     */
 -   MESA_FORMAT_GR1616,   /*        
  */
 -   MESA_FORMAT_RG1616,  /*         */
 -   MESA_FORMAT_ARGB2101010, /* AARR     GGBB   */
 -   MESA_FORMAT_Z24_S8,  /*         */
 -   MESA_FORMAT_S8_Z24,  /*         */
 -   MESA_FORMAT_Z16, /*     */
 -   MESA_FORMAT_X8_Z24,  /*         */
 -   MESA_FORMAT_Z24_X8,  /*         */
 -   MESA_FORMAT_Z32, /*         */
 -   MESA_FORMAT_S8,  /*     */
 +   MESA_FORMAT_RGBA_UNORM,  /*        
  */
 +   MESA_FORMAT_RGBA_REV_UNORM,  /*        
  */
 +   MESA_FORMAT_ARGB_UNORM,  /*        
  */
 +   

Re: [Mesa-dev] [PATCH 1/5] mesa: Add infrastructure for GL_ARB_clear_buffer_object

2013-12-20 Thread Ian Romanick
On 12/11/2013 03:53 AM, Pi Tabred wrote:
 On 10.12.2013 18:00, Marek Olšák wrote:
 On Tue, Dec 10, 2013 at 2:13 PM, Pi Tabred servuswiege...@yahoo.de wrote:
  - add xml file for extension
  - add reference in gl_API.xml
  - add pointer to device driver function table
  - add new functions to list of available functions
 ---
  src/mapi/glapi/gen/ARB_clear_buffer_object.xml | 50 
 ++
  src/mapi/glapi/gen/gl_API.xml  |  6 +++-
  src/mesa/main/dd.h |  5 +++
  src/mesa/main/tests/dispatch_sanity.cpp|  4 +--
  4 files changed, 62 insertions(+), 3 deletions(-)
  create mode 100644 src/mapi/glapi/gen/ARB_clear_buffer_object.xml

 diff --git a/src/mapi/glapi/gen/ARB_clear_buffer_object.xml 
 b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml
 new file mode 100644
 index 000..e7baf6f
 --- /dev/null
 +++ b/src/mapi/glapi/gen/ARB_clear_buffer_object.xml
 @@ -0,0 +1,50 @@
 +?xml version=1.0?
 +!DOCTYPE OpenGLAPI SYSTEM gl_API.dtd
 +
 +!-- Note: no GLX protocol info yet. --
 +
 +
 +OpenGLAPI
 +
 +category name=GL_ARB_clear_buffer_object number=121
 +
 +function name =ClearBufferData offset=assign
 +param name=target type=GLenum/
 +param name=internalformat type=GLenum/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid */
 +/function
 +
 +function name =ClearBufferSubData offset=assign
 +param name=target type=GLenum/
 +param name=internalformat type=GLenum/
 +param name=offset type=GLintptr/
 +param name=size type=GLsizeiptr/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid */
 +/function
 +
 +!--function name=ClearNamedBufferDataEXT offset=assign es2=3.0
 +param name=buffer type=GLuint/
 +param name=internalformat type=GLenum/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid*/
 +/function
 +
 +
 +function name=ClearNamedBufferSubDataEXT offset=assign es2=3.0
 +param name=buffer type=GLuint/
 +param name=internalformat type=GLenum/
 +param name=offset type=GLintptr/
 +param name=size type=GLsizeiptr/
 +param name=format type=GLenum/
 +param name=type type=GLenum/
 +param name=data type=const GLvoid*/
 +/function --
 +
 +/category
 +
 +/OpenGLAPI
 diff --git a/src/mapi/glapi/gen/gl_API.xml b/src/mapi/glapi/gen/gl_API.xml
 index 5c877aa..af8ab2e 100644
 --- a/src/mapi/glapi/gen/gl_API.xml
 +++ b/src/mapi/glapi/gen/gl_API.xml
 @@ -8460,7 +8460,11 @@

  /category

 -!-- ARB extensions #120...#124 --
 +!-- ARB extension #120 --
 +
 +xi:include href=ARB_clear_buffer_object.xml 
 xmlns:xi=http://www.w3.org/2001/XInclude/
 +
 +!-- ARB extensions #122...#124 --

  xi:include href=ARB_vertex_attrib_binding.xml 
 xmlns:xi=http://www.w3.org/2001/XInclude/

 diff --git a/src/mesa/main/dd.h b/src/mesa/main/dd.h
 index b5b874f..0e2bda0 100644
 --- a/src/mesa/main/dd.h
 +++ b/src/mesa/main/dd.h
 @@ -569,6 +569,11 @@ struct dd_function_table {
  GLintptrARB offset, GLsizeiptrARB size,
  GLvoid *data, struct gl_buffer_object *obj );

 +   void (*ClearBufferSubData)( struct gl_context *ctx, GLenum 
 internalformat,
 +   GLintptr offset, GLsizeiptr size,
 +   GLenum format, GLenum type,
 +   const GLvoid *data, struct gl_buffer_object 
 *obj );

 This interface could be simpler. The unpacking from the data to the
 internal format should take place in mesa/main, so that drivers only
 have to implement something like this:

 void (*ClearBufferSubData)(struct gl_context *ctx, struct
 gl_buffer_object *obj, GLintptr offset, GLsizeiptr size, GLvoid
 *element, int element_size);

 offset and size should stay. element is the clear value that has
 element_size bytes. element_size is the size of the internal
 format in bytes.

 Marek
 
 That's something I considered, but I thought the drivers might have
 their own way of converting the data to the internalformat.
 If that's not the case, I'll change it.

I think there are two ways a driver could implement this function, and I
suspect that drivers for UMA systems may dynamically switch between the
two.  I'm pretty sure the i965 driver will, anyway.

A driver could implement this using a blitter-based or CPU-based memcpy
function.  In that case, having a pointer to raw data in the destination
format is what you would want.

A driver could also implement this using a blitter-based or render-based
clear function.  That is, they'd wrap the buffer object in a fake
surface and do whatever is done for glClearBuffer*.  I had originally
thought you'd want the internalformat in that case, but after discussing
a number of 

Re: [Mesa-dev] [PATCH 1/1] Adjust MESA_FORMAT color component ordering to match API docs

2013-12-20 Thread Mark Mueller
On Thu, Dec 19, 2013 at 5:47 PM, Michel Dänzer mic...@daenzer.net wrote:

 On Don, 2013-12-19 at 13:56 -0800, Mark Mueller wrote:
  Adjust MESA_FORMAT color component ordering to match API docs, driver
  specific formats (e.g. PIPE_FORMATs),

 Actually, there are a couple of examples of other format definitions
 which match the Mesa formats before your change but no longer after it,
 e.g. in the DRI and i915, nouveau and radeon driver code.


It's just not fair to hold the mesa formats to the varied conventions in
the DRI driver code because they are all over the place across all the
different drivers, past, present, and future (the confusion with
MESA_FORMATs may be partly to blame). Moving the mesa formats closer to
those of the API, instead of a nasty mix in-between,  brings at least some
clarity to the situation and the documentation burden can be left to the
API specs - minus a few outliers.

I'm pretty confident that this naming confusion is resulting in some
unnecessary texture re-packing work due to _mesa_choose_tex_format
returning less then optimal formats, though by name they look to be the
best choice. My next series attempts to address that.


 Changing the Mesa format definitions will be confusing for people
 switching between branches with and without your change.

 Also, because these Mesa formats are defined as packed values, you're
 essentially changing the notation from big endian (aka human readable)
 to little endian. It's unfortunate that the packed PIPE_FORMATs are
 named in little endian order, that's a concession we had to make when
 adding them.


Are they all really big endian, currently it looks like a mix of who knows
what. The main role the MESA_FORMATs are serving is to fortify the GLenums
coming from the API, which are close to useless on their own. I don't think
there is any question that there is a lack of convention and using the
conventions already provided by the API offers the best fit for lots of
reasons.



 Overall, I'm afraid this change doesn't look very good at all to me. At
 the very least though, you'd also have to change the order of component
 sizes for formats such as MESA_FORMAT_BGRA2101010_UNORM or
 MESA_FORMAT_BGRA1555_UNORM, otherwise they're just plain wrong.


  and actual use on common platforms.

 What does that mean?


I'm referring to PIPE_FORMAT, BRW_SURFACEFORMAT, RADEON_TXFORMAT, etc here.



  Remove comments giving MESA_FORMAT color packings, some of which are
  misleading.

 Which ones are misleading, and how?


Bottom line, if the MESA_FORMATs follow the API's convention, then the
reader can go to the API for clarification and we don't have to maintain
that in the code. There are some special cases which can be dealt with in
the code.



  diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
  index 94fc7a0..f224ed5 100644
  --- a/src/mesa/main/formats.h
  +++ b/src/mesa/main/formats.h
  @@ -62,67 +62,68 @@ typedef enum
  MESA_FORMAT_NONE = 0,
 
  /**
  -* \name Basic hardware formats
  +* \name Basic API user space data formats

 All of Mesa is in user space. :)


  +* Please refer to API documentation for more information on format
  +* packing
   */

 What API documentation?


opengl.org/wiki/Pixel_Transfer
opengl.org/wiki/Image_Format
http://msdn.microsoft.com/en-us/library/windows/desktop/bb172558(v=vs.85).aspx




Thanks for your input Michel. Do you still think this is a bad idea?

Mark
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/7] i965: Include brw_reg.h in brw_disasm.c.

2013-12-20 Thread Kenneth Graunke
On 12/19/2013 12:48 PM, Eric Anholt wrote:
 Kenneth Graunke kenn...@whitecape.org writes:
 
 We're going to move some things from brw_defines.h there in a moment.
 Both files contain symbols named negate, so we need to arbitrarily
 rename them.  (The m_ prefix is a convention I adopted in the Gen8+
 code).
 
 I think maybe series rewrites may have made this patch unnecessary?  The
 uses of BRW_REGISTER_TYPE_* by the end I think have all been rewritten
 to use the HW_REG_TYPE that are still in brw_defines.h.
 
 Other than that, this series is:
 
 Reviewed-by: Eric Anholt e...@anholt.net

Good catch!  I originally didn't have the HW_REG_TYPE values, but then
all the magic constants in brw_disasm and gen8_disasm made me sad.

I've dropped patch 1, as it doesn't appear necessary.  Thanks for the
review.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/1] mesa: Add some convention to MESA_FORMATs with attention to PIPE_FORMATs

2013-12-20 Thread Marek Olšák
Not all _INT occurences are changed to _SINT. It's kinda obvious
if you just read formats.h of the patch. Also, radeon and r200 will
fail to compile with this. Therefore, NAK. The format-translation
macros in the radeon drivers could be inlined to make find-and-replace
operations possible there.

Marek

On Thu, Dec 19, 2013 at 5:44 AM, Mark Mueller markkmuel...@gmail.com wrote:
 Though a large patch, this is simply a series of global search
 and replace operations. Please refer to formats.h for the core
 deltas. In summary this change adds UNORM, and UINT to formats
 where no type is given, as necessary; replaces the SIGNED designation
 inside the named format with an appened SNORM qualifier; and replaces
 *_INT with *_SINT, thus making MESA_FORMATs more consistent
 with PIPE_FORMATs. This is the first and most intrusive change
 in a crusade to clean up and conventionalize MESA_FORMATs to
 meet needs of contemporary drivers.

 No regressions were observed with piglit testing on i965.
 There were no build regressions on gallium ilo, nouveau, r300,
 or r600 after the change.

 Signed-off-by: Mark Mueller markkmuel...@gmail.com
 ---
  src/gallium/state_trackers/dri/common/dri_screen.c |   6 +-
  src/mesa/drivers/dri/common/dri_util.c |  32 +-
  src/mesa/drivers/dri/common/utils.c|  16 +-
  src/mesa/drivers/dri/i915/i830_texstate.c  |  18 +-
  src/mesa/drivers/dri/i915/i830_vtbl.c  |  20 +-
  src/mesa/drivers/dri/i915/i915_context.c   |  24 +-
  src/mesa/drivers/dri/i915/i915_texstate.c  |  26 +-
  src/mesa/drivers/dri/i915/i915_vtbl.c  |  20 +-
  src/mesa/drivers/dri/i915/intel_blit.c |  24 +-
  src/mesa/drivers/dri/i915/intel_fbo.c  |   4 +-
  src/mesa/drivers/dri/i915/intel_pixel_bitmap.c |   6 +-
  src/mesa/drivers/dri/i915/intel_screen.c   |  22 +-
  src/mesa/drivers/dri/i915/intel_tex_image.c|   6 +-
  src/mesa/drivers/dri/i965/brw_blorp.cpp|  10 +-
  src/mesa/drivers/dri/i965/brw_blorp.h  |   4 +-
  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp   |  16 +-
  src/mesa/drivers/dri/i965/brw_clear.c  |   4 +-
  src/mesa/drivers/dri/i965/brw_context.c|   2 +-
  src/mesa/drivers/dri/i965/brw_misc_state.c |  14 +-
  src/mesa/drivers/dri/i965/brw_surface_formats.c| 188 +++---
  src/mesa/drivers/dri/i965/brw_tex_layout.c |   6 +-
  src/mesa/drivers/dri/i965/intel_blit.c |  12 +-
  src/mesa/drivers/dri/i965/intel_fbo.c  |   8 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.c  |  36 +-
  src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |   4 +-
  src/mesa/drivers/dri/i965/intel_pixel_bitmap.c |   6 +-
  src/mesa/drivers/dri/i965/intel_screen.c   |  26 +-
  src/mesa/drivers/dri/i965/intel_tex_image.c|   6 +-
  src/mesa/drivers/dri/i965/intel_tex_subimage.c |   6 +-
  src/mesa/drivers/dri/nouveau/nouveau_fbo.c |  10 +-
  src/mesa/drivers/dri/nouveau/nouveau_screen.c  |   6 +-
  src/mesa/drivers/dri/nouveau/nouveau_texture.c |  26 +-
  src/mesa/drivers/dri/nouveau/nouveau_util.h|  20 +-
  src/mesa/drivers/dri/nouveau/nv04_context.c|   6 +-
  src/mesa/drivers/dri/nouveau/nv04_state_fb.c   |   6 +-
  src/mesa/drivers/dri/nouveau/nv04_state_frag.c |   6 +-
  src/mesa/drivers/dri/nouveau/nv04_state_raster.c   |   2 +-
  src/mesa/drivers/dri/nouveau/nv04_state_tex.c  |  16 +-
  src/mesa/drivers/dri/nouveau/nv04_surface.c| 176 +++---
  src/mesa/drivers/dri/nouveau/nv10_state_fb.c   |  10 +-
  src/mesa/drivers/dri/nouveau/nv10_state_frag.c |   8 +-
  src/mesa/drivers/dri/nouveau/nv10_state_tex.c  |  30 +-
  src/mesa/drivers/dri/nouveau/nv10_state_tnl.c  |   2 +-
  src/mesa/drivers/dri/nouveau/nv20_state_fb.c   |  12 +-
  src/mesa/drivers/dri/nouveau/nv20_state_tex.c  |  32 +-
  src/mesa/drivers/dri/nouveau/nv20_state_tnl.c  |   2 +-
  src/mesa/drivers/dri/r200/r200_blit.c  |  82 +--
  src/mesa/drivers/dri/r200/r200_state_init.c|   6 +-
  src/mesa/drivers/dri/r200/r200_texstate.c  |  16 +-
  src/mesa/drivers/dri/radeon/radeon_blit.c  |  52 +-
  src/mesa/drivers/dri/radeon/radeon_fbo.c   |  16 +-
  src/mesa/drivers/dri/radeon/radeon_pixel_read.c|  22 +-
  src/mesa/drivers/dri/radeon/radeon_screen.c|  30 +-
  src/mesa/drivers/dri/radeon/radeon_state_init.c|   6 +-
  src/mesa/drivers/dri/radeon/radeon_tex_copy.c  |  12 +-
  src/mesa/drivers/dri/radeon/radeon_texstate.c  |  12 +-
  src/mesa/drivers/dri/radeon/radeon_texture.c   |  40 +-
  src/mesa/drivers/dri/swrast/swrast.c   |  20 +-
  src/mesa/drivers/haiku/swrast/SoftwareRast.cpp |   8 +-
  src/mesa/drivers/osmesa/osmesa.c   |  24 +-
  src/mesa/drivers/x11/xm_buffer.c   |  10 +-
  src/mesa/main/accum.c  |   8 

Re: [Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Kenneth Graunke
On 12/20/2013 07:56 AM, Paul Berry wrote:
 On 20 December 2013 04:47, Chad Versace chad.vers...@linux.intel.com
 mailto:chad.vers...@linux.intel.com wrote:
 
 We need to emit depth stall flushes before depth and hiz resolves.
 Placing them at the top of blorp's state emission fixes the hang.
 
 Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
 Tested by zooming in and out continuously for 2 hours.
 
 This patch is based on
 
 https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd
 
 CC: mesa-sta...@lists.freedesktop.org
 mailto:mesa-sta...@lists.freedesktop.org
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-by https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-by: Stéphane Marchesin marc...@chromium.org
 mailto:marc...@chromium.org
 Signed-off-by: Chad Versace chad.vers...@linux.intel.com
 mailto:chad.vers...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)
 
 
 Are you aware of any text in the bspec saying that these flushes are
 necessary?  If so it would be nice to quote it in a comment.  I searched
 for a while and wasn't able to find anything.
  
 
 
 diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 index 6a5841f..3a0e7ec 100644
 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 @@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context
 *brw,
 ADVANCE_BATCH();
  }
 
 +static void
 +gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op
 hiz_op)
 +{
 +   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
 +   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {
 
 
 Should we also include GEN6_HIZ_OP_DEPTH_CLEAR?  I found this text in
 the bspec that suggests maybe we should (Graphics BSpec: 3D-Media-GPGPU
 Engine  3D Pipeline Stages  Pixel  Depth and Stencil  Hierarchical
 Depth Buffer  Depth Buffer Clear):
 
 The following is required when performing a depth buffer clear with
 using the WM_STATE or 3DSTATE_WM:
 
   * If other rendering operations have preceded this clear, a
 PIPE_CONTROL with depth cache flush enabled, Depth Stall bit enabled
 must be issued before the rectangle primitive used for the depth
 buffer clear operation.
 
 
 And later on the same page:
 
 
 Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM
 or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
 DEPTH_STALL bit and Depth FLUSH bits “*set*” before starting to render.
 DepthStall and DepthFlush are not needed between consecutive depth clear
 passes nor is it required if the depth-clear pass was done with
 “full_surf_clear” bit set in the 3DSTATE_WM_HZ_OP.

We already at least attempt to do this: before emitting
3DSTATE_DEPTH_BUFFER, we call intel_emit_depth_stall_flushes(), both in
the main rendering code and from BLORP.  Since all depth clears go
through one of those two methods, the flushes do happen before rendering.

Maybe the timing is slightly off.  Chad's patch does add an additional
set of these flushes.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/5] mesa: Add infrastructure for GL_ARB_clear_buffer_object

2013-12-20 Thread Marek Olšák
On Fri, Dec 20, 2013 at 9:21 PM, Ian Romanick i...@freedesktop.org wrote:

 I think there are two ways a driver could implement this function, and I
 suspect that drivers for UMA systems may dynamically switch between the
 two.  I'm pretty sure the i965 driver will, anyway.

 A driver could implement this using a blitter-based or CPU-based memcpy
 function.  In that case, having a pointer to raw data in the destination
 format is what you would want.

 A driver could also implement this using a blitter-based or render-based
 clear function.  That is, they'd wrap the buffer object in a fake
 surface and do whatever is done for glClearBuffer*.  I had originally
 thought you'd want the internalformat in that case, but after discussing
 a number of implementation strategies with Ken, I don't think it's
 actually needed.

 I also support Marek's suggestion.

 For the record, the possible implementation strategies Ken and I discussed:

  - Repeating memcpy on the CPU.

  - Using the blit engine with clever source strides.

  - Using the render engine with some sort of uint outputs of raw bits.

  - Using transform feedback with some sort of uint outputs of raw bits.

We use DMA (memset on the GPU) for clearValueSize = 4 and transform
feedback in the other cases. The latter sets the clear value as a
vertex with a stride of zero.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] swrast: silence driContextSetFlags() parameter type warning

2013-12-20 Thread Ian Romanick
Oops.  All of the other drivers call the Mesa context ctx and the
dri_context something else... and I managed to overlook the warning.

Reviewed-by: Ian Romanick ian.d.roman...@intel.com

On 12/13/2013 03:55 PM, Brian Paul wrote:
 ---
  src/mesa/drivers/dri/swrast/swrast.c |2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/src/mesa/drivers/dri/swrast/swrast.c 
 b/src/mesa/drivers/dri/swrast/swrast.c
 index cfa9316..79a2740 100644
 --- a/src/mesa/drivers/dri/swrast/swrast.c
 +++ b/src/mesa/drivers/dri/swrast/swrast.c
 @@ -705,7 +705,7 @@ dri_create_context(gl_api api,
   goto context_fail;
  }
  
 -driContextSetFlags(ctx, flags);
 +driContextSetFlags(mesaCtx, flags);
  
  /* do bounds checking to prevent segfaults and server crashes! */
  mesaCtx-Const.CheckArrayBounds = GL_TRUE;
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: minor simplification in _mesa_es3_error_check_format_and_type()

2013-12-20 Thread Ian Romanick
On 12/17/2013 03:35 PM, Brian Paul wrote:
 The type_valid local was set to true and never changed.

Right... I think I just copy-and-paste
_mesa_es_error_check_format_and_type, which does change the value of
type_valid.

Reviewed-by: Ian Romanick ian.d.roman...@intel.com

 ---
  src/mesa/main/glformats.c |4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)
 
 diff --git a/src/mesa/main/glformats.c b/src/mesa/main/glformats.c
 index 740faa8..1ab8b23 100644
 --- a/src/mesa/main/glformats.c
 +++ b/src/mesa/main/glformats.c
 @@ -1694,8 +1694,6 @@ GLenum
  _mesa_es3_error_check_format_and_type(GLenum format, GLenum type,
GLenum internalFormat)
  {
 -   GLboolean type_valid = GL_TRUE;
 -
 switch (format) {
 case GL_RGBA:
switch (type) {
 @@ -2116,5 +2114,5 @@ _mesa_es3_error_check_format_and_type(GLenum format, 
 GLenum type,
break;
 }
  
 -   return type_valid ? GL_NO_ERROR : GL_INVALID_OPERATION;
 +   return GL_NO_ERROR;
  }
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Kenneth Graunke
On 12/20/2013 04:47 AM, Chad Versace wrote:
 We need to emit depth stall flushes before depth and hiz resolves.
 Placing them at the top of blorp's state emission fixes the hang.
 
 Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
 Tested by zooming in and out continuously for 2 hours.
 
 This patch is based on
 https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd
 
 CC: mesa-sta...@lists.freedesktop.org
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-by: Stéphane Marchesin marc...@chromium.org
 Signed-off-by: Chad Versace chad.vers...@linux.intel.com
 ---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)
 
 diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
 b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 index 6a5841f..3a0e7ec 100644
 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 @@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
 ADVANCE_BATCH();
  }
  
 +static void
 +gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op hiz_op)
 +{
 +   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
 +   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {
 +  brw-batch.need_workaround_flush = true;
 +  intel_emit_post_sync_nonzero_flush(brw);
 +  intel_emit_depth_stall_flushes(brw);
 +   }
 +}
  
  /**
   * \brief Execute a blit or render pass operation.
 @@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
 uint32_t wm_bind_bo_offset = 0;
  
 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
 +
 +   gen6_emit_hiz_workaround(brw, params-hiz_op);
 gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
 gen6_emit_3dstate_sample_mask(brw,
   params-dst.num_samples  1 ?
 

I'm fine with landing this as is, since it's trivial and getting
Sandybridge stable is really critical.  I'm glad to see this won't
happen on Gen7+.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

That said, it would be nice to refine it a little.

The intel_emit_post_sync_nonzero_flush(brw) call should not be
necessary.  The very first BLORP call, gen6_emit_3dstate_multisample,
already calls it.  However, it may not have taken effect in the past,
since intel_emit_post_sync_nonzero_flush checks need_workaround_flush.

I also am dubious whether we need the depth stall flushes, since we
already do them before 3DSTATE_DEPTH_BUFFER, which is documented as
necessary.

So here's what I think is actually going on:

We've seen strong evidence that it's 100% required to do the post-sync
nonzero workaround before non-pipelined commands.  (My patches which
added missing post-sync workarounds greatly reduced the number of GPU
hangs.)

I believe that the root of the problem is that needs_workaround_flush is
not getting set to true sufficiently often, so the post-sync non-zero
workaround isn't happening when necessary.  Currently, we flag it at the
start of each batch, and /after/ each BLORP operation.

That means it's missing in two cases:
1. Switching from normal rendering to BLORP drawing.
   (...fixed by this patch.)
2. Between successive draws (3DPRIMITIVEs) within a batch.
   (I believe this is also necessary, and is not fixed by this patch.)

I would love to see an alternate patch which sets need_workaround_flush
in two places:
1. Batchbuffer init (for the very first draw)
2. After each 3DPRIMITIVE command.

I believe that would fix this hang and potentially other future hangs.

--Ken
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Chad Versace

On 12/20/2013 01:36 PM, Kenneth Graunke wrote:

On 12/20/2013 04:47 AM, Chad Versace wrote:

We need to emit depth stall flushes before depth and hiz resolves.
Placing them at the top of blorp's state emission fixes the hang.

Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
Tested by zooming in and out continuously for 2 hours.

This patch is based on
https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd

CC: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
Signed-off-by: Stéphane Marchesin marc...@chromium.org
Signed-off-by: Chad Versace chad.vers...@linux.intel.com
---
  src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
  1 file changed, 12 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 6a5841f..3a0e7ec 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context *brw,
 ADVANCE_BATCH();
  }

+static void
+gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op hiz_op)
+{
+   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
+   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {
+  brw-batch.need_workaround_flush = true;
+  intel_emit_post_sync_nonzero_flush(brw);
+  intel_emit_depth_stall_flushes(brw);
+   }
+}

  /**
   * \brief Execute a blit or render pass operation.
@@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
 uint32_t wm_bind_bo_offset = 0;

 uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
+
+   gen6_emit_hiz_workaround(brw, params-hiz_op);
 gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
 gen6_emit_3dstate_sample_mask(brw,
   params-dst.num_samples  1 ?



I'm fine with landing this as is, since it's trivial and getting
Sandybridge stable is really critical.  I'm glad to see this won't
happen on Gen7+.

Reviewed-by: Kenneth Graunke kenn...@whitecape.org


Ok. Your's is the second r-b for the patch as-is. And this fixes hangs
affecting users on all distros.  So I'll commit it as-is, with refinements
to follow as I validate them.

Validation takes a long time, multiple hours, so I think it unwise to postpone
committing this bugfix until its perfect. The needed build-test-refine cycles to
attain a perfect fix may take a long time. (Thankfully the reproduction steps
are automated now).



That said, it would be nice to refine it a little.



The intel_emit_post_sync_nonzero_flush(brw) call should not be
necessary.  The very first BLORP call, gen6_emit_3dstate_multisample,
already calls it.  However, it may not have taken effect in the past,
since intel_emit_post_sync_nonzero_flush checks need_workaround_flush.

I also am dubious whether we need the depth stall flushes, since we
already do them before 3DSTATE_DEPTH_BUFFER, which is documented as
necessary.

So here's what I think is actually going on:

We've seen strong evidence that it's 100% required to do the post-sync
nonzero workaround before non-pipelined commands.  (My patches which
added missing post-sync workarounds greatly reduced the number of GPU
hangs.)

I believe that the root of the problem is that needs_workaround_flush is
not getting set to true sufficiently often, so the post-sync non-zero
workaround isn't happening when necessary.  Currently, we flag it at the
start of each batch, and /after/ each BLORP operation.

That means it's missing in two cases:
1. Switching from normal rendering to BLORP drawing.
(...fixed by this patch.)
2. Between successive draws (3DPRIMITIVEs) within a batch.
(I believe this is also necessary, and is not fixed by this patch.)



After thinking about this more this morning, I also suspected that the
bug's root problem was that need_workaround_flush needed to be set
at the top of blorp (your #1).

Why do you suspect #2? Experimentation? Hardware docs?


I would love to see an alternate patch which sets need_workaround_flush
in two places:
1. Batchbuffer init (for the very first draw)
2. After each 3DPRIMITIVE command.

I believe that would fix this hang and potentially other future hangs.


Agreed. I'll follow-up with patches for that.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Fix build to properly check for supported compiler flags

2013-12-20 Thread Chad Versace

On 12/19/2013 01:50 AM, Lauri Kasanen wrote:

On Wed, 18 Dec 2013 17:57:16 -0800
Matt Turner matts...@gmail.com wrote:


On Tue, Dec 17, 2013 at 7:15 AM, Lauri Kasanen c...@gmx.com



It seems to me that the Intel code that uses this SSE4.1 function
is still buggy, as it has no runtime check - would it not crash
if built on a SSE4-capable system but used with a lower-class cpu?


You haven't thought this through. :)

The SSE 4.1 code is in i965 #ifdef __SSE4_1__. __SSE4_1__ is defined
if the code is compiled with the appropriate CFLAGS, e.g., -msse4.1 or
-march=native and your CPU support SSE 4.1.

That is, your Mesa would have to be compiled with flags incompatible
with your CPU for this to break, and in that case gcc would already
have used instructions your CPU doesn't have.


Yes, you're right that there is no crash risk on a normal build.
However, it is practically useless for distro builds like that, as they
can't ship a normal build and get the speedup at the same time.


I want to clarify how this is currently useful. Product-specific builds,
such as Chrome OS builds, do benefit from this SSE code, because they
are compiled with the appropriate CFLAGS.

Your point still stands though for regular distros.

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glcpp: Add a more descriptive comment for the SKIP state manipulation

2013-12-20 Thread Ian Romanick
Other than the code formatting comment below, this series is

Reviewed-by: Ian Romanick ian.d.roman...@intel.com

On 12/19/2013 04:25 PM, Carl Worth wrote:
 Two things make this code confusing:
 
   1. The uncharacteristic manipulation of lexer start state outside of
  flex rules.
 
   2. The confusing semantics of the skip_stack (including the
  lexing_if override and the SKIP_NO_SKIP state).
 
 This new comment is intended to bring a bit more clarity for any readers.
 
 There is no intended beahvioral change to the code here. The actual code
 changes include better indentation to avoid an excessively-long line, and
 using the more descriptive INITIAL rather than 0.
 ---
  src/glsl/glcpp/glcpp-lex.l | 45 +++--
  1 file changed, 39 insertions(+), 6 deletions(-)
 
 diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l
 index a029f62..5543edc 100644
 --- a/src/glsl/glcpp/glcpp-lex.l
 +++ b/src/glsl/glcpp/glcpp-lex.l
 @@ -92,14 +92,47 @@ OCTAL_INTEGER 0[0-7]*[uU]?
  HEXADECIMAL_INTEGER  0[xX][0-9a-fA-F]+[uU]?
  
  %%
 - /* Implicitly switch between SKIP and INITIAL (non-skipping);
 -  * don't switch if some other state was explicitly set.
 +
 + /* The handling of the SKIP vs INITIAL start states requires
 +  * some special handling. Typically, a lexer would change
 +  * start states with statements like BEGIN SKIP within the
 +  * lexer rules. We can't get away with that here, since we
 +  * need the parser to actually evaluate expressions for
 +  * directives like #if.
 +  *
 +  * So, here, in code that will be executed on every call to
 +  * the lexer,and before any rules, we examine the skip_stack
 +  * as set by the parser to know whether to change from INITIAL
 +  * to SKIP or from SKIP back to INITIAL.
 +  *
 +  * Three cases cause us to switch out of the SKIP state and
 +  * back to the INITIAL state:
 +  *
 +  *  1. The top of the skip_stack is of type SKIP_NO_SKIP
 +  * This means we're still evaluating some #if
 +  * hierarchy, but we're on a branch of it where
 +  * content should not be skipped (such as #if 1 or
 +  * #else or so).
 +  *
 +  *  2. The skip_stack is NULL meaning that we've reached
 +  * the last #endif.
 +  *
 +  *  3. The lexing_if bit is set. This indicates that we
 +  * are lexing the expression following an #if of
 +  * #elif. Even inside an #if 0 we need to lex this
 +  * expression so the parser can correctly update the
 +  * skip_stack state.
*/
   glcpp_parser_t *parser = yyextra;
 - if (YY_START == 0 || YY_START == SKIP) {
 - if (parser-lexing_if || parser-skip_stack == NULL || 
 parser-skip_stack-type == SKIP_NO_SKIP) {
 - BEGIN 0;
 - } else {
 + if (YY_START == INITIAL || YY_START == SKIP) {
 + if (parser-lexing_if ||
 + parser-skip_stack == NULL ||
 + parser-skip_stack-type == SKIP_NO_SKIP)
 + {
 + BEGIN INITIAL;
 + }
 + else
 + {

This should used the same formatting as the rest of Mesa (the { or } on
the same line as the if or else).

   BEGIN SKIP;
   }
   }
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glcpp: error on multiple #else/#elif directives

2013-12-20 Thread Ian Romanick
On 12/17/2013 07:37 AM, Erik Faye-Lund wrote:
 The preprocessor currently accepts multiple else/elif-groups
 per if-section. The GLSL-preprocessor is defined by the C++
 specification, which defines the following parse-rule:
 
 if-section:
   if-group elif-groups(opt) else-group(opt) endif-line
 
 This clearly only allows a single else-group, that has to come
 after any elif-groups.
 
 So let's modify the code to follow the specification. Add test
 to prevent regressions.

Reviewed-by: Ian Romanick ian.d.roman...@intel.com
Cc: 10.0 mesa-sta...@lists.freedesktop.org

 ---
 
 Here's a resend of an older patch (original
 1379968503-30246-1-git-send-email-kusmab...@gmail.com), this time
 with a better commit message, and a similar treatment for elif-after-else.
 
  src/glsl/glcpp/glcpp-parse.y   | 23 
 +-
  src/glsl/glcpp/glcpp.h |  1 +
  src/glsl/glcpp/tests/118-multiple-else.c   |  6 ++
  src/glsl/glcpp/tests/118-multiple-else.c.expected  |  8 
  src/glsl/glcpp/tests/119-elif-after-else.c |  6 ++
  .../glcpp/tests/119-elif-after-else.c.expected |  8 
  6 files changed, 51 insertions(+), 1 deletion(-)
  create mode 100644 src/glsl/glcpp/tests/118-multiple-else.c
  create mode 100644 src/glsl/glcpp/tests/118-multiple-else.c.expected
  create mode 100644 src/glsl/glcpp/tests/119-elif-after-else.c
  create mode 100644 src/glsl/glcpp/tests/119-elif-after-else.c.expected
 
 diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y
 index 7edc274..451b728 100644
 --- a/src/glsl/glcpp/glcpp-parse.y
 +++ b/src/glsl/glcpp/glcpp-parse.y
 @@ -310,6 +310,11 @@ control_line:
   _glcpp_parser_expand_and_lex_from (parser,
  ELIF_EXPANDED, $2);
   }
 + else if (parser-skip_stack 
 + parser-skip_stack-has_else)
 + {
 + glcpp_error( @1, parser, #elif after #else);
 + }
   else
   {
   _glcpp_parser_skip_stack_change_if (parser,  @1,
 @@ -324,6 +329,11 @@ control_line:
   {
   glcpp_error( @1, parser, #elif with no expression);
   }
 + else if (parser-skip_stack 
 + parser-skip_stack-has_else)
 + {
 + glcpp_error( @1, parser, #elif after #else);
 + }
   else
   {
   _glcpp_parser_skip_stack_change_if (parser,  @1,
 @@ -332,7 +342,17 @@ control_line:
   }
   }
  |HASH_ELSE {
 - _glcpp_parser_skip_stack_change_if (parser,  @1, else, 1);
 + if (parser-skip_stack 
 + parser-skip_stack-has_else)
 + {
 + glcpp_error( @1, parser, multiple #else);
 + }
 + else
 + {
 + _glcpp_parser_skip_stack_change_if (parser,  @1, 
 else, 1);
 + if (parser-skip_stack)
 + parser-skip_stack-has_else = true;
 + }
   } NEWLINE
  |HASH_ENDIF {
   _glcpp_parser_skip_stack_pop (parser,  @1);
 @@ -2024,6 +2044,7 @@ _glcpp_parser_skip_stack_push_if (glcpp_parser_t 
 *parser, YYLTYPE *loc,
   node-type = SKIP_TO_ENDIF;
   }
  
 + node-has_else = false;
   node-next = parser-skip_stack;
   parser-skip_stack = node;
  }
 diff --git a/src/glsl/glcpp/glcpp.h b/src/glsl/glcpp/glcpp.h
 index 8aaa551..ccae96c 100644
 --- a/src/glsl/glcpp/glcpp.h
 +++ b/src/glsl/glcpp/glcpp.h
 @@ -153,6 +153,7 @@ typedef enum skip_type {
  
  typedef struct skip_node {
   skip_type_t type;
 + bool has_else;
   YYLTYPE loc; /* location of the initial #if/#elif/... */
   struct skip_node *next;
  } skip_node_t;
 diff --git a/src/glsl/glcpp/tests/118-multiple-else.c 
 b/src/glsl/glcpp/tests/118-multiple-else.c
 new file mode 100644
 index 000..62ad49c
 --- /dev/null
 +++ b/src/glsl/glcpp/tests/118-multiple-else.c
 @@ -0,0 +1,6 @@
 +#if 0
 +#else
 +int foo;
 +#else
 +int bar;
 +#endif
 diff --git a/src/glsl/glcpp/tests/118-multiple-else.c.expected 
 b/src/glsl/glcpp/tests/118-multiple-else.c.expected
 new file mode 100644
 index 000..eaec481
 --- /dev/null
 +++ b/src/glsl/glcpp/tests/118-multiple-else.c.expected
 @@ -0,0 +1,8 @@
 +0:4(1): preprocessor error: multiple #else
 +
 +
 +int foo;
 +
 +int bar;
 +
 +
 diff --git a/src/glsl/glcpp/tests/119-elif-after-else.c 
 b/src/glsl/glcpp/tests/119-elif-after-else.c
 new file mode 100644
 index 000..9b9e923
 --- /dev/null
 +++ b/src/glsl/glcpp/tests/119-elif-after-else.c
 @@ -0,0 +1,6 @@
 +#if 0
 +#else
 +int foo;
 +#elif 0
 +int bar;
 +#endif
 diff --git a/src/glsl/glcpp/tests/119-elif-after-else.c.expected 
 b/src/glsl/glcpp/tests/119-elif-after-else.c.expected
 new file mode 

Re: [Mesa-dev] [Mesa-stable] [PATCH] i965/gen6: Fix HiZ hang in WebGL Google maps

2013-12-20 Thread Kenneth Graunke
On 12/20/2013 03:09 PM, Chad Versace wrote:
 On 12/20/2013 01:36 PM, Kenneth Graunke wrote:
 On 12/20/2013 04:47 AM, Chad Versace wrote:
 We need to emit depth stall flushes before depth and hiz resolves.
 Placing them at the top of blorp's state emission fixes the hang.

 Fixes HiZ hang in the new WebGL Google maps on Sandybridge Chrome OS.
 Tested by zooming in and out continuously for 2 hours.

 This patch is based on
 https://chromium.googlesource.com/chromiumos/overlays/chromiumos-overlay/+/8bc07bb70163c3706fb4ba5f980e57dc942f56dd


 CC: mesa-sta...@lists.freedesktop.org
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=70740
 Signed-off-by: Stéphane Marchesin marc...@chromium.org
 Signed-off-by: Chad Versace chad.vers...@linux.intel.com
 ---
   src/mesa/drivers/dri/i965/gen6_blorp.cpp | 12 
   1 file changed, 12 insertions(+)

 diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 index 6a5841f..3a0e7ec 100644
 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
 @@ -1012,6 +1012,16 @@ gen6_blorp_emit_primitive(struct brw_context
 *brw,
  ADVANCE_BATCH();
   }

 +static void
 +gen6_emit_hiz_workaround(struct brw_context *brw, enum gen6_hiz_op
 hiz_op)
 +{
 +   if (hiz_op == GEN6_HIZ_OP_DEPTH_RESOLVE ||
 +   hiz_op == GEN6_HIZ_OP_HIZ_RESOLVE) {
 +  brw-batch.need_workaround_flush = true;
 +  intel_emit_post_sync_nonzero_flush(brw);
 +  intel_emit_depth_stall_flushes(brw);
 +   }
 +}

   /**
* \brief Execute a blit or render pass operation.
 @@ -1034,6 +1044,8 @@ gen6_blorp_exec(struct brw_context *brw,
  uint32_t wm_bind_bo_offset = 0;

  uint32_t prog_offset = params-get_wm_prog(brw, prog_data);
 +
 +   gen6_emit_hiz_workaround(brw, params-hiz_op);
  gen6_emit_3dstate_multisample(brw, params-dst.num_samples);
  gen6_emit_3dstate_sample_mask(brw,
params-dst.num_samples  1 ?


 I'm fine with landing this as is, since it's trivial and getting
 Sandybridge stable is really critical.  I'm glad to see this won't
 happen on Gen7+.

 Reviewed-by: Kenneth Graunke kenn...@whitecape.org
 
 Ok. Your's is the second r-b for the patch as-is. And this fixes hangs
 affecting users on all distros.  So I'll commit it as-is, with refinements
 to follow as I validate them.
 
 Validation takes a long time, multiple hours, so I think it unwise to
 postpone
 committing this bugfix until its perfect. The needed build-test-refine
 cycles to
 attain a perfect fix may take a long time. (Thankfully the reproduction
 steps
 are automated now).
 

 That said, it would be nice to refine it a little.

 The intel_emit_post_sync_nonzero_flush(brw) call should not be
 necessary.  The very first BLORP call, gen6_emit_3dstate_multisample,
 already calls it.  However, it may not have taken effect in the past,
 since intel_emit_post_sync_nonzero_flush checks need_workaround_flush.

 I also am dubious whether we need the depth stall flushes, since we
 already do them before 3DSTATE_DEPTH_BUFFER, which is documented as
 necessary.

 So here's what I think is actually going on:

 We've seen strong evidence that it's 100% required to do the post-sync
 nonzero workaround before non-pipelined commands.  (My patches which
 added missing post-sync workarounds greatly reduced the number of GPU
 hangs.)

 I believe that the root of the problem is that needs_workaround_flush is
 not getting set to true sufficiently often, so the post-sync non-zero
 workaround isn't happening when necessary.  Currently, we flag it at the
 start of each batch, and /after/ each BLORP operation.

 That means it's missing in two cases:
 1. Switching from normal rendering to BLORP drawing.
 (...fixed by this patch.)
 2. Between successive draws (3DPRIMITIVEs) within a batch.
 (I believe this is also necessary, and is not fixed by this patch.)

 
 After thinking about this more this morning, I also suspected that the
 bug's root problem was that need_workaround_flush needed to be set
 at the top of blorp (your #1).
 
 Why do you suspect #2? Experimentation? Hardware docs?

Intuition?  In many places, the documentation says that you must do
this kind of flush...unless software can guarantee that the pipeline is
already flushed.  Executing a 3DPRIMITIVE command kicks off rendering,
at which point I'm fairly sure the pipeline is NOT already flushed.

Basically, needs_workaround_flush is an attempt to avoid flushes because
we've already done them.  Some GPU commands clearly invalidate the
already flushed state, but it's not clear which.  3DPRIMITIVE makes
the most sense to me, as it actually makes drawing happen, using the
state that was last programmed.  Subsequent commands begin modifying
that state, so flushing seems necessary.

 
 I would love to see an alternate patch which sets need_workaround_flush
 in two places:
 1. Batchbuffer init (for the very first draw)
 2. After each 

[Mesa-dev] main/hash_table.h vs program/hash_table.h

2013-12-20 Thread Connor Abbott
Hi,

While looking at the GLSL IR code, I noticed that different parts of the
code use 2 different hash table implementations. ir_loop_analysis,
ir_variable_refcount, ir_clone, etc. use struct hash_table under
program/hash_table.h, whereas ir_variable_refcount and link_uniform_blocks
use struct hash_table under main/hash_table.h. So more code uses
program/hash_table.h, but it seems based on git blame that
main/hash_table.h is meant to be a replacement and therefore new code
should use that - correct? If so, why was the rest of the code never
converted over? Also, the fact that both versions use the same name (struct
hash_table) seems to me like a potential source of confusion/chaos with
accidentally using the wrong functions. It seems like if we include
main/hash_table.h and program/hash_table.h and then use functions from both
header files on the same structure (for example, using the wrong functions
on the results of an analysis pass), then the compiler won't complain,
leading to all sorts of debugging fun; actually that's something I didn't
even think about until writing this. So why was the name kept the same for
the (apparently) new version?

Thanks,
Connor
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2] glcpp: error on multiple #else/#elif directives

2013-12-20 Thread Kenneth Graunke
On 12/17/2013 07:37 AM, Erik Faye-Lund wrote:
 The preprocessor currently accepts multiple else/elif-groups
 per if-section. The GLSL-preprocessor is defined by the C++
 specification, which defines the following parse-rule:
 
 if-section:
   if-group elif-groups(opt) else-group(opt) endif-line
 
 This clearly only allows a single else-group, that has to come
 after any elif-groups.
 
 So let's modify the code to follow the specification. Add test
 to prevent regressions.
 ---
 
 Here's a resend of an older patch (original
 1379968503-30246-1-git-send-email-kusmab...@gmail.com), this time
 with a better commit message, and a similar treatment for elif-after-else.

This looks good to me.  Thanks!

Reviewed-by: Kenneth Graunke kenn...@whitecape.org

Presumably Ian or Carl will commit it today...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] glx: Simplify __glxGetMscRate, it only needs the screen, not a drawable

2013-12-20 Thread Ian Romanick
On 12/15/2013 02:37 AM, Lauri Kasanen wrote:
 Useful in its own right, but also needed for adaptive vsync.
 
 Last mail was accidentally wrapped.
 
 Signed-off-by: Lauri Kasanen c...@gmx.com

I made minor tweaks to the commit message and pushed this patch with my
Reviewed-by.

Thanks!

 ---
  src/glx/dri_common.c | 2 +-
  src/glx/glxclient.h  | 2 +-
  src/glx/glxcmds.c| 6 ++
  3 files changed, 4 insertions(+), 6 deletions(-)
 
 diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
 index 22ba248..b5058c9 100644
 --- a/src/glx/dri_common.c
 +++ b/src/glx/dri_common.c
 @@ -223,7 +223,7 @@ __driGetMSCRate(__DRIdrawable *draw,
  {
 __GLXDRIdrawable *glxDraw = loaderPrivate;
  
 -   return __glxGetMscRate(glxDraw, numerator, denominator);
 +   return __glxGetMscRate(glxDraw-psc, numerator, denominator);
  }
  
  _X_HIDDEN const __DRIsystemTimeExtension systemTimeExtension = {
 diff --git a/src/glx/glxclient.h b/src/glx/glxclient.h
 index e33dba6..a7118af 100644
 --- a/src/glx/glxclient.h
 +++ b/src/glx/glxclient.h
 @@ -781,7 +781,7 @@ extern GLboolean __glXGetMscRateOML(Display * dpy, 
 GLXDrawable drawable,
  
  #if defined(GLX_DIRECT_RENDERING)  !defined(GLX_USE_APPLEGL)
  extern GLboolean
 -__glxGetMscRate(__GLXDRIdrawable *glxDraw,
 +__glxGetMscRate(struct glx_screen *psc,
   int32_t * numerator, int32_t * denominator);
  
  /* So that dri2.c:DRI2WireToEvent() can access
 diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c
 index 06c4c16..b5377c2 100644
 --- a/src/glx/glxcmds.c
 +++ b/src/glx/glxcmds.c
 @@ -2095,16 +2095,14 @@ __glXGetSyncValuesOML(Display * dpy, GLXDrawable 
 drawable,
  
  #if defined(GLX_DIRECT_RENDERING)  !defined(GLX_USE_APPLEGL)
  _X_HIDDEN GLboolean
 -__glxGetMscRate(__GLXDRIdrawable *glxDraw,
 +__glxGetMscRate(struct glx_screen *psc,
   int32_t * numerator, int32_t * denominator)
  {
  #ifdef XF86VIDMODE
 -   struct glx_screen *psc;
 XF86VidModeModeLine mode_line;
 int dot_clock;
 int i;
  
 -   psc = glxDraw-psc;
 if (XF86VidModeQueryVersion(psc-dpy, i, i) 
 XF86VidModeGetModeLine(psc-dpy, psc-scr, dot_clock, mode_line)) {
unsigned n = dot_clock * 1000;
 @@ -2180,7 +2178,7 @@ __glXGetMscRateOML(Display * dpy, GLXDrawable drawable,
 if (draw == NULL)
return False;
  
 -   return __glxGetMscRate(draw, numerator, denominator);
 +   return __glxGetMscRate(draw-psc, numerator, denominator);
  #else
 (void) dpy;
 (void) drawable;
 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Fix build to properly check for supported compiler flags, v2

2013-12-20 Thread Matt Turner
On Thu, Dec 19, 2013 at 11:43 AM, Lauri Kasanen c...@gmx.com wrote:
 Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72708

 Signed-off-by: Lauri Kasanen c...@gmx.com
 ---

Thanks, R-b and pushed.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: Remove unused depth_mode parameter from translate_tex_format().

2013-12-20 Thread Kenneth Graunke
According to git blame, this hasn't been used in over two years:

commit d2235b0f4681f75d562131d655a6d7b7033d2d8b
Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 17 17:01:58 2011 -0800

i965: Always handle GL_DEPTH_TEXTURE_MODE through the shader.

Signed-off-by: Kenneth Graunke kenn...@whitecape.org
---
 src/mesa/drivers/dri/i965/brw_state.h | 1 -
 src/mesa/drivers/dri/i965/brw_surface_formats.c   | 1 -
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  | 1 -
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 1 -
 4 files changed, 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index d3fd937..a148125 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -193,7 +193,6 @@ GLuint translate_tex_target(GLenum target);
 
 GLuint translate_tex_format(struct brw_context *brw,
 gl_format mesa_format,
-   GLenum depth_mode,
GLenum srgb_decode);
 
 int brw_get_texture_swizzle(const struct gl_context *ctx,
diff --git a/src/mesa/drivers/dri/i965/brw_surface_formats.c 
b/src/mesa/drivers/dri/i965/brw_surface_formats.c
index 88fda8c..9b75c2b 100644
--- a/src/mesa/drivers/dri/i965/brw_surface_formats.c
+++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c
@@ -686,7 +686,6 @@ brw_render_target_supported(struct brw_context *brw,
 GLuint
 translate_tex_format(struct brw_context *brw,
  gl_format mesa_format,
-GLenum depth_mode,
 GLenum srgb_decode)
 {
struct gl_context *ctx = brw-ctx;
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 01a4709..5236eda 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -289,7 +289,6 @@ brw_update_texture_surface(struct gl_context *ctx,
  BRW_SURFACE_CUBEFACE_ENABLES |
  (translate_tex_format(brw,
 mt-format,
-   tObj-DepthMode,
sampler-sRGBDecode) 
   BRW_SURFACE_FORMAT_SHIFT));
 
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index dbd7309..761bc3b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -291,7 +291,6 @@ gen7_update_texture_surface(struct gl_context *ctx,
 
uint32_t tex_format = translate_tex_format(brw,
   mt-format,
-  tObj-DepthMode,
   sampler-sRGBDecode);
 
if (for_gather  tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT)
-- 
1.8.5.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [V2 PATCH 1/2] mesa: inline r200 radeon texture format macros to facility search and replace

2013-12-20 Thread Mark Mueller
Signed-off-by: Mark Mueller markkmuel...@gmail.com
---
 src/mesa/drivers/dri/r200/r200_texstate.c | 108 +++---
 src/mesa/drivers/dri/radeon/radeon_texstate.c |  64 ++-
 2 files changed, 70 insertions(+), 102 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c 
b/src/mesa/drivers/dri/r200/r200_texstate.c
index b20bd51..b89bb39 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -60,20 +60,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
 #define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
 
-#define _COLOR(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
-#define _COLOR_REV(f) \
-[ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
-#define _ALPHA(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 }
-#define _ALPHA_REV(f) \
-[ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 }
-#define _YUV(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
-#define _INVALID(f) \
-[ MESA_FORMAT_ ## f ] = { 0x, 0 }
 #define VALID_FORMAT(f) ( ((f) = MESA_FORMAT_RGBA_DXT5) \
- (tx_table_be[f].format != 0x) )
+  (tx_table_be[f].format != 0x) )
 
 struct tx_table {
GLuint format, filter;
@@ -82,63 +70,59 @@ struct tx_table {
 static const struct tx_table tx_table_be[] =
 {
[ MESA_FORMAT_RGBA ] = { R200_TXFORMAT_ABGR | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   _ALPHA_REV(RGBA),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _INVALID(RGB888),
-   _COLOR(RGB565),
-   _COLOR_REV(RGB565),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _ALPHA(ARGB1555),
-   _ALPHA_REV(ARGB1555),
-   _ALPHA(AL88),
-   _ALPHA_REV(AL88),
-   _ALPHA(A8),
-   _COLOR(L8),
-   _ALPHA(I8),
-   _YUV(YCBCR),
-   _YUV(YCBCR_REV),
-   _INVALID(RGB_FXT1),
-   _INVALID(RGBA_FXT1),
-   _COLOR(RGB_DXT1),
-   _ALPHA(RGBA_DXT1),
-   _ALPHA(RGBA_DXT3),
-   _ALPHA(RGBA_DXT5),
+   [ MESA_FORMAT_RGBA_REV ] = { R200_TXFORMAT_RGBA | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGB888 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGB565 ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_RGB565_REV ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB1555 ] = { R200_TXFORMAT_ARGB1555 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB1555_REV ] = { R200_TXFORMAT_ARGB1555 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_AL88 ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 
},
+   [ MESA_FORMAT_AL88_REV ] = { R200_TXFORMAT_AL88 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L8 ] = { R200_TXFORMAT_L8, 0 },
+   [ MESA_FORMAT_I8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_RGB_FXT1 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGBA_FXT1 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
+   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
 };
 
 static const struct tx_table tx_table_le[] =
 {
-   _ALPHA(RGBA),
+   [ MESA_FORMAT_RGBA ] = { R200_TXFORMAT_RGBA | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_RGBA_REV ] = { R200_TXFORMAT_ABGR | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB, 0 },
-   _COLOR(RGB565),
-   _COLOR_REV(RGB565),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _ALPHA(ARGB1555),
-   _ALPHA_REV(ARGB1555),
-   _ALPHA(AL88),
-   _ALPHA_REV(AL88),
-   _ALPHA(A8),
-   _COLOR(L8),
-   _ALPHA(I8),
-   _YUV(YCBCR),
-   _YUV(YCBCR_REV),
-   _INVALID(RGB_FXT1),
-   _INVALID(RGBA_FXT1),
-   _COLOR(RGB_DXT1),
-   _ALPHA(RGBA_DXT1),
-   _ALPHA(RGBA_DXT3),
-   _ALPHA(RGBA_DXT5),
+   [ MESA_FORMAT_RGB565 ] = { R200_TXFORMAT_RGB565, 0 },
+   [ 

[Mesa-dev] [V2 PATCH 1/2] mesa: inline r200 radeon texture format macros to facility search and replace

2013-12-20 Thread Mark Mueller
v2: This patch was inserted into the series to correct build problems with
r200 and radeon drivers. The patch replaces macros that operate on the
MESA_FORMATs with their inline equivalent to facilitate search and replace.

Signed-off-by: Mark Mueller markkmuel...@gmail.com
---
 src/mesa/drivers/dri/r200/r200_texstate.c | 108 +++---
 src/mesa/drivers/dri/radeon/radeon_texstate.c |  64 ++-
 2 files changed, 70 insertions(+), 102 deletions(-)

diff --git a/src/mesa/drivers/dri/r200/r200_texstate.c 
b/src/mesa/drivers/dri/r200/r200_texstate.c
index b20bd51..b89bb39 100644
--- a/src/mesa/drivers/dri/r200/r200_texstate.c
+++ b/src/mesa/drivers/dri/r200/r200_texstate.c
@@ -60,20 +60,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 
SOFTWARE.
 #define R200_TXFORMAT_RGBA_DXT3 R200_TXFORMAT_DXT23
 #define R200_TXFORMAT_RGBA_DXT5 R200_TXFORMAT_DXT45
 
-#define _COLOR(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, 0 }
-#define _COLOR_REV(f) \
-[ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f, 0 }
-#define _ALPHA(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 }
-#define _ALPHA_REV(f) \
-[ MESA_FORMAT_ ## f ## _REV ] = { R200_TXFORMAT_ ## f | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 }
-#define _YUV(f) \
-[ MESA_FORMAT_ ## f ] = { R200_TXFORMAT_ ## f, R200_YUV_TO_RGB }
-#define _INVALID(f) \
-[ MESA_FORMAT_ ## f ] = { 0x, 0 }
 #define VALID_FORMAT(f) ( ((f) = MESA_FORMAT_RGBA_DXT5) \
- (tx_table_be[f].format != 0x) )
+  (tx_table_be[f].format != 0x) )
 
 struct tx_table {
GLuint format, filter;
@@ -82,63 +70,59 @@ struct tx_table {
 static const struct tx_table tx_table_be[] =
 {
[ MESA_FORMAT_RGBA ] = { R200_TXFORMAT_ABGR | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   _ALPHA_REV(RGBA),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _INVALID(RGB888),
-   _COLOR(RGB565),
-   _COLOR_REV(RGB565),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _ALPHA(ARGB1555),
-   _ALPHA_REV(ARGB1555),
-   _ALPHA(AL88),
-   _ALPHA_REV(AL88),
-   _ALPHA(A8),
-   _COLOR(L8),
-   _ALPHA(I8),
-   _YUV(YCBCR),
-   _YUV(YCBCR_REV),
-   _INVALID(RGB_FXT1),
-   _INVALID(RGBA_FXT1),
-   _COLOR(RGB_DXT1),
-   _ALPHA(RGBA_DXT1),
-   _ALPHA(RGBA_DXT3),
-   _ALPHA(RGBA_DXT5),
+   [ MESA_FORMAT_RGBA_REV ] = { R200_TXFORMAT_RGBA | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGB888 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGB565 ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_RGB565_REV ] = { R200_TXFORMAT_RGB565, 0 },
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB1555 ] = { R200_TXFORMAT_ARGB1555 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB1555_REV ] = { R200_TXFORMAT_ARGB1555 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_AL88 ] = { R200_TXFORMAT_AL88 | R200_TXFORMAT_ALPHA_IN_MAP, 0 
},
+   [ MESA_FORMAT_AL88_REV ] = { R200_TXFORMAT_AL88 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_A8 ] = { R200_TXFORMAT_A8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_L8 ] = { R200_TXFORMAT_L8, 0 },
+   [ MESA_FORMAT_I8 ] = { R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_YCBCR ] = { R200_TXFORMAT_YCBCR, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_YCBCR_REV ] = { R200_TXFORMAT_YCBCR_REV, R200_YUV_TO_RGB },
+   [ MESA_FORMAT_RGB_FXT1 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGBA_FXT1 ] = { 0x, 0 },
+   [ MESA_FORMAT_RGB_DXT1 ] = { R200_TXFORMAT_RGB_DXT1, 0 },
+   [ MESA_FORMAT_RGBA_DXT1 ] = { R200_TXFORMAT_RGBA_DXT1 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT3 ] = { R200_TXFORMAT_RGBA_DXT3 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_RGBA_DXT5 ] = { R200_TXFORMAT_RGBA_DXT5 | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
 };
 
 static const struct tx_table tx_table_le[] =
 {
-   _ALPHA(RGBA),
+   [ MESA_FORMAT_RGBA ] = { R200_TXFORMAT_RGBA | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_RGBA_REV ] = { R200_TXFORMAT_ABGR | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
+   [ MESA_FORMAT_ARGB ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
+   [ MESA_FORMAT_ARGB_REV ] = { R200_TXFORMAT_ARGB | 
R200_TXFORMAT_ALPHA_IN_MAP, 0 },
[ MESA_FORMAT_RGB888 ] = { R200_TXFORMAT_ARGB, 0 },
-   _COLOR(RGB565),
-   _COLOR_REV(RGB565),
-   _ALPHA(ARGB),
-   _ALPHA_REV(ARGB),
-   _ALPHA(ARGB1555),
-   _ALPHA_REV(ARGB1555),
-   _ALPHA(AL88),
-   _ALPHA_REV(AL88),
-   _ALPHA(A8),
-   _COLOR(L8),
-   _ALPHA(I8),
-   _YUV(YCBCR),
-   

[Mesa-dev] [Bug 72708] Master fails to build with older gcc due to -msse4.1

2013-12-20 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=72708

Matt Turner matts...@gmail.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #3 from Matt Turner matts...@gmail.com ---
Committed your patch. Thanks

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev