Mesa (master): i965: Make a helper for finding an existing shader variant.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: f9edc550b2bb76f77e33b6cb122a91f266bc5958
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9edc550b2bb76f77e33b6cb122a91f266bc5958

Author: Kenneth Graunke 
Date:   Fri Nov 11 13:31:06 2016 -0800

i965: Make a helper for finding an existing shader variant.

We had five copies of the same "walk the cache and look for an
existing shader variant for this program" code.  Now we have one
helper function that returns the key.

Signed-off-by: Kenneth Graunke 
Reviewed-by: Eduardo Lima Mitev 

---

 src/mesa/drivers/dri/i965/brw_gs.c| 22 
 src/mesa/drivers/dri/i965/brw_program_cache.c | 38 +++
 src/mesa/drivers/dri/i965/brw_state.h |  5 
 src/mesa/drivers/dri/i965/brw_tcs.c   | 22 
 src/mesa/drivers/dri/i965/brw_tes.c   | 22 
 src/mesa/drivers/dri/i965/brw_vs.c| 22 
 src/mesa/drivers/dri/i965/brw_wm.c| 22 
 7 files changed, 68 insertions(+), 85 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
b/src/mesa/drivers/dri/i965/brw_gs.c
index b7fb9f9..2996203 100644
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -40,26 +40,14 @@ static void
 brw_gs_debug_recompile(struct brw_context *brw, struct gl_program *prog,
const struct brw_gs_prog_key *key)
 {
-   struct brw_cache_item *c = NULL;
-   const struct brw_gs_prog_key *old_key = NULL;
-   bool found = false;
-
perf_debug("Recompiling geometry shader for program %d\n", prog->Id);
 
-   for (unsigned int i = 0; i < brw->cache.size; i++) {
-  for (c = brw->cache.items[i]; c; c = c->next) {
- if (c->cache_id == BRW_CACHE_GS_PROG) {
-old_key = c->key;
-
-if (old_key->program_string_id == key->program_string_id)
-   break;
- }
-  }
-  if (c)
- break;
-   }
+   bool found = false;
+   const struct brw_gs_prog_key *old_key =
+  brw_find_previous_compile(>cache, BRW_CACHE_GS_PROG,
+key->program_string_id);
 
-   if (!c) {
+   if (!old_key) {
   perf_debug("  Didn't find previous compile in the shader cache for "
  "debug\n");
   return;
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 3947904a..3d95372 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -55,6 +55,27 @@
 
 #define FILE_DEBUG_FLAG DEBUG_STATE
 
+static unsigned
+get_program_string_id(enum brw_cache_id cache_id, const void *key)
+{
+   switch (cache_id) {
+   case BRW_CACHE_VS_PROG:
+  return ((struct brw_vs_prog_key *) key)->program_string_id;
+   case BRW_CACHE_TCS_PROG:
+  return ((struct brw_tcs_prog_key *) key)->program_string_id;
+   case BRW_CACHE_TES_PROG:
+  return ((struct brw_tes_prog_key *) key)->program_string_id;
+   case BRW_CACHE_GS_PROG:
+  return ((struct brw_gs_prog_key *) key)->program_string_id;
+   case BRW_CACHE_CS_PROG:
+  return ((struct brw_cs_prog_key *) key)->program_string_id;
+   case BRW_CACHE_FS_PROG:
+  return ((struct brw_wm_prog_key *) key)->program_string_id;
+   default:
+  unreachable("no program string id for this kind of program");
+   }
+}
+
 static GLuint
 hash_key(struct brw_cache_item *item)
 {
@@ -268,6 +289,23 @@ brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
return offset;
 }
 
+const void *
+brw_find_previous_compile(struct brw_cache *cache,
+  enum brw_cache_id cache_id,
+  unsigned program_string_id)
+{
+   for (unsigned i = 0; i < cache->size; i++) {
+  for (struct brw_cache_item *c = cache->items[i]; c; c = c->next) {
+ if (c->cache_id == cache_id &&
+ get_program_string_id(cache_id, c->key) == program_string_id) {
+return c->key;
+ }
+  }
+   }
+
+   return NULL;
+}
+
 void
 brw_upload_cache(struct brw_cache *cache,
  enum brw_cache_id cache_id,
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 176557b..bd82212 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -235,6 +235,11 @@ bool brw_search_cache(struct brw_cache *cache,
   const void *key,
   GLuint key_size,
   uint32_t *inout_offset, void *inout_aux);
+
+const void *brw_find_previous_compile(struct brw_cache *cache,
+  enum brw_cache_id cache_id,
+  unsigned program_string_id);
+
 void brw_program_cache_check_size(struct brw_context *brw);
 
 void brw_init_caches( struct brw_context *brw );
diff --git 

Mesa (master): i965: Move program cache printing to brw_program_cache.c.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: ce892392948e18241a872878873dbdd46e546fb2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ce892392948e18241a872878873dbdd46e546fb2

Author: Kenneth Graunke 
Date:   Fri Nov 11 14:47:53 2016 -0800

i965: Move program cache printing to brw_program_cache.c.

It makes sense to put a function which prints out the entire contents
of the program cache in the file that implements the program cache.

Signed-off-by: Kenneth Graunke 
Reviewed-by: Eduardo Lima Mitev 

---

 src/mesa/drivers/dri/i965/brw_program_cache.c | 46 +
 src/mesa/drivers/dri/i965/brw_state.h |  2 +
 src/mesa/drivers/dri/i965/brw_state_dump.c| 58 +--
 3 files changed, 49 insertions(+), 57 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 3d95372..8939fb1 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -480,3 +480,49 @@ brw_destroy_caches(struct brw_context *brw)
 {
brw_destroy_cache(brw, >cache);
 }
+
+static const char *
+cache_name(enum brw_cache_id cache_id)
+{
+   switch (cache_id) {
+   case BRW_CACHE_VS_PROG:
+  return "VS kernel";
+   case BRW_CACHE_TCS_PROG:
+  return "TCS kernel";
+   case BRW_CACHE_TES_PROG:
+  return "TES kernel";
+   case BRW_CACHE_FF_GS_PROG:
+  return "Fixed-function GS kernel";
+   case BRW_CACHE_GS_PROG:
+  return "GS kernel";
+   case BRW_CACHE_CLIP_PROG:
+  return "CLIP kernel";
+   case BRW_CACHE_SF_PROG:
+  return "SF kernel";
+   case BRW_CACHE_FS_PROG:
+  return "FS kernel";
+   case BRW_CACHE_CS_PROG:
+  return "CS kernel";
+   default:
+  return "unknown";
+   }
+}
+
+void
+brw_print_program_cache(struct brw_context *brw)
+{
+   const struct brw_cache *cache = >cache;
+   struct brw_cache_item *item;
+
+   drm_intel_bo_map(cache->bo, false);
+
+   for (unsigned i = 0; i < cache->size; i++) {
+  for (item = cache->items[i]; item; item = item->next) {
+ fprintf(stderr, "%s:\n", cache_name(i));
+ brw_disassemble(>screen->devinfo, cache->bo->virtual,
+ item->offset, item->size, stderr);
+  }
+   }
+
+   drm_intel_bo_unmap(cache->bo);
+}
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index bd82212..f2349d8 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -245,6 +245,8 @@ void brw_program_cache_check_size(struct brw_context *brw);
 void brw_init_caches( struct brw_context *brw );
 void brw_destroy_caches( struct brw_context *brw );
 
+void brw_print_program_cache(struct brw_context *brw);
+
 /***
  * brw_state_batch.c
  */
diff --git a/src/mesa/drivers/dri/i965/brw_state_dump.c 
b/src/mesa/drivers/dri/i965/brw_state_dump.c
index 1ed8aaa..13e76ec 100644
--- a/src/mesa/drivers/dri/i965/brw_state_dump.c
+++ b/src/mesa/drivers/dri/i965/brw_state_dump.c
@@ -718,62 +718,6 @@ static void dump_binding_table(struct brw_context *brw, 
uint32_t offset,
 }
 
 static void
-dump_prog_cache(struct brw_context *brw)
-{
-   struct brw_cache *cache = >cache;
-   unsigned int b;
-
-   drm_intel_bo_map(brw->cache.bo, false);
-
-   for (b = 0; b < cache->size; b++) {
-  struct brw_cache_item *item;
-
-  for (item = cache->items[b]; item; item = item->next) {
-const char *name;
-
-switch (item->cache_id) {
-case BRW_CACHE_VS_PROG:
-   name = "VS kernel";
-   break;
- case BRW_CACHE_TCS_PROG:
-name = "TCS kernel";
-break;
- case BRW_CACHE_TES_PROG:
-name = "TES kernel";
-break;
-case BRW_CACHE_FF_GS_PROG:
-   name = "Fixed-function GS kernel";
-   break;
- case BRW_CACHE_GS_PROG:
-name = "GS kernel";
-break;
-case BRW_CACHE_CLIP_PROG:
-   name = "CLIP kernel";
-   break;
-case BRW_CACHE_SF_PROG:
-   name = "SF kernel";
-   break;
-case BRW_CACHE_FS_PROG:
-   name = "FS kernel";
-   break;
- case BRW_CACHE_CS_PROG:
-name = "CS kernel";
-break;
-default:
-   name = "unknown";
-   break;
-}
-
- fprintf(stderr, "%s:\n", name);
- brw_disassemble(>screen->devinfo, brw->cache.bo->virtual,
- item->offset, item->size, stderr);
-  }
-   }
-
-   drm_intel_bo_unmap(brw->cache.bo);
-}
-
-static void
 dump_state_batch(struct brw_context *brw)
 {
int i;
@@ -880,5 +824,5 @@ void brw_debug_batch(struct brw_context *brw)
drm_intel_bo_unmap(brw->batch.bo);
 
if (0)
-  dump_prog_cache(brw);
+  brw_print_program_cache(brw);
 }


Mesa (master): i965: Don't map/ unmap in brw_print_program_cache on LLC platforms.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: aa291c3ba9b1062d219276cef12b1b7c5380b423
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aa291c3ba9b1062d219276cef12b1b7c5380b423

Author: Kenneth Graunke 
Date:   Fri Nov 11 18:05:14 2016 -0800

i965: Don't map/unmap in brw_print_program_cache on LLC platforms.

We have a persistent mapping.  Don't map it a second time or try to
unmap it.  Just use the pointer.

This most likely would wreak havoc except that this code is unused
(it's only called from an if (0) debug block).

Signed-off-by: Kenneth Graunke 
Reviewed-by: Eduardo Lima Mitev 

---

 src/mesa/drivers/dri/i965/brw_program_cache.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c 
b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 8939fb1..44d9994 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -514,7 +514,8 @@ brw_print_program_cache(struct brw_context *brw)
const struct brw_cache *cache = >cache;
struct brw_cache_item *item;
 
-   drm_intel_bo_map(cache->bo, false);
+   if (!brw->has_llc)
+  drm_intel_bo_map(cache->bo, false);
 
for (unsigned i = 0; i < cache->size; i++) {
   for (item = cache->items[i]; item; item = item->next) {
@@ -524,5 +525,6 @@ brw_print_program_cache(struct brw_context *brw)
   }
}
 
-   drm_intel_bo_unmap(cache->bo);
+   if (!brw->has_llc)
+  drm_intel_bo_unmap(cache->bo);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): glsl: Make copy propagation not panic when it sees an intrinsic.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: e7d4008ebfe561ee0aa3df6cdcfd39a8842ed659
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7d4008ebfe561ee0aa3df6cdcfd39a8842ed659

Author: Kenneth Graunke 
Date:   Fri Dec  9 19:06:06 2016 -0800

glsl: Make copy propagation not panic when it sees an intrinsic.

A number of games have large arrays of constants, which we promote to
uniforms.  This introduces copies from the uniform array to the original
temporary array.  Normally, copy propagation eliminates those copies,
making everything refer to the uniform array directly.

A number of shaders in "Deus Ex: Mankind Divided" recently exposed a
limitation of copy propagation - if we had any intrinsics (i.e. image
access in a compute shader), we weren't able to get rid of these copies.

That meant that any variable indexing remained on the temporary array
rather being moved to the uniform array.  i965's scalar backend
currently doesn't support indirect addressing of temporary arrays,
which meant lowering it to if-ladders.  This was horrible.

According to Marek, on radeonsi/GCN, "F1 2015" uses 64% less
spilled-temp-array memory.

On i965/Skylake:

total instructions in shared programs: 13362954 -> 13329878 (-0.25%)
instructions in affected programs: 43745 -> 10669 (-75.61%)
helped: 12
HURT: 0

total cycles in shared programs: 248081010 -> 245949178 (-0.86%)
cycles in affected programs: 4597930 -> 2466098 (-46.37%)
helped: 12
HURT: 0

total spills in shared programs: 9493 -> 9507 (0.15%)
spills in affected programs: 25 -> 39 (56.00%)
helped: 0
HURT: 1

total fills in shared programs: 12127 -> 12197 (0.58%)
fills in affected programs: 110 -> 180 (63.64%)
helped: 0
HURT: 1

Helps Deus Ex: Mankind Divided.   The one shader with hurt spills/fills
is from Tomb Raider at Ultra settings, but that same shader has a
-39.55% reduction in instructions and -14.09% reduction in cycle counts,
so it seems like a win there as well.

Signed-off-by: Kenneth Graunke 
Reviewed-by: Timothy Arceri 
Reviewed-by: Matt Turner 

---

 src/compiler/glsl/opt_copy_propagation.cpp | 31 ++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/compiler/glsl/opt_copy_propagation.cpp 
b/src/compiler/glsl/opt_copy_propagation.cpp
index 247c498..2240421 100644
--- a/src/compiler/glsl/opt_copy_propagation.cpp
+++ b/src/compiler/glsl/opt_copy_propagation.cpp
@@ -186,11 +186,34 @@ ir_copy_propagation_visitor::visit_enter(ir_call *ir)
   }
}
 
-   /* Since we're unlinked, we don't (necessarily) know the side effects of
-* this call.  So kill all copies.
+   /* Since this pass can run when unlinked, we don't (necessarily) know
+* the side effects of calls.  (When linked, most calls are inlined
+* anyway, so it doesn't matter much.)
+*
+* One place where this does matter is IR intrinsics.  They're never
+* inlined.  We also know what they do - while some have side effects
+* (such as image writes), none edit random global variables.  So we
+* can assume they're side-effect free (other than the return value
+* and out parameters).
 */
-   _mesa_hash_table_clear(acp, NULL);
-   this->killed_all = true;
+   if (!ir->callee->is_intrinsic()) {
+  _mesa_hash_table_clear(acp, NULL);
+  this->killed_all = true;
+   } else {
+  if (ir->return_deref)
+ kill(ir->return_deref->var);
+
+  foreach_two_lists(formal_node, >callee->parameters,
+actual_node, >actual_parameters) {
+ ir_variable *sig_param = (ir_variable *) formal_node;
+ if (sig_param->data.mode == ir_var_function_out ||
+ sig_param->data.mode == ir_var_function_inout) {
+ir_rvalue *ir = (ir_rvalue *) actual_node;
+ir_variable *var = ir->variable_referenced();
+kill(var);
+ }
+  }
+   }
 
return visit_continue_with_parent;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Make DCE set null destinations on messages with side effects.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: 9919542f1cfff70524bc6117d19bf88e59159caa
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9919542f1cfff70524bc6117d19bf88e59159caa

Author: Kenneth Graunke 
Date:   Sat Jan 14 23:32:12 2017 -0800

i965: Make DCE set null destinations on messages with side effects.

(Co-authored by Matt Turner.)

Image atomics, for example, return a value - but the shader may not
want to use it.  We assigned a useless VGRF destination.  This seemed
harmless, but it can actually be quite harmful.  The register allocator
has to assign that VGRF to a real register.  It may assign the same
actual GRF to the destination of an instruction that follows soon after.

This results in a write-after-write (WAW) dependency, and stall.

A number of "Deus Ex: Mankind Divided" shaders use image atomics, but
don't use the return value.  Several of these were hitting WAW stalls
for nearly 14,000 (poorly estimated) cycles a pop.  Making dead code
elimination null out the destination avoids this issue.

This patch cuts one shader's estimated cycles by -98.39%!  Removing the
message response should also help with data cluster bandwidth.

On Skylake:

(instruction counts remain identical)

total cycles in shared programs: 255413890 -> 248081010 (-2.87%)
cycles in affected programs: 12019948 -> 4687068 (-61.01%)
helped: 24
HURT: 10

v2: Make can_omit_write independent of can_eliminate (Curro).

Signed-off-by: Kenneth Graunke 
Reviewed-by: Francisco Jerez 
Reviewed-by: Matt Turner 

---

 .../dri/i965/brw_fs_dead_code_eliminate.cpp| 54 --
 1 file changed, 41 insertions(+), 13 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 0dd6091..7adb427 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -34,6 +34,42 @@
  * yet in the tail end of this block.
  */
 
+/**
+ * Is it safe to eliminate the instruction?
+ */
+static bool
+can_eliminate(const fs_inst *inst, BITSET_WORD *flag_live)
+{
+return !inst->is_control_flow() &&
+   !inst->has_side_effects() &&
+   !(flag_live[0] & inst->flags_written()) &&
+   !inst->writes_accumulator;
+}
+
+/**
+ * Is it safe to omit the write, making the destination ARF null?
+ */
+static bool
+can_omit_write(const fs_inst *inst)
+{
+   switch (inst->opcode) {
+   case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+   case SHADER_OPCODE_TYPED_ATOMIC:
+   case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
+  return true;
+   default:
+  /* We can eliminate the destination write for ordinary instructions,
+   * but not most SENDs.
+   */
+  if (inst->opcode < 128 && inst->mlen == 0)
+ return true;
+
+  /* It might not be safe for other virtual opcodes. */
+  return false;
+   }
+}
+
 bool
 fs_visitor::dead_code_eliminate()
 {
@@ -52,29 +88,21 @@ fs_visitor::dead_code_eliminate()
  sizeof(BITSET_WORD));
 
   foreach_inst_in_block_reverse_safe(fs_inst, inst, block) {
- if (inst->dst.file == VGRF && !inst->has_side_effects()) {
+ if (inst->dst.file == VGRF) {
 const unsigned var = live_intervals->var_from_reg(inst->dst);
 bool result_live = false;
 
 for (unsigned i = 0; i < regs_written(inst); i++)
result_live |= BITSET_TEST(live, var + i);
 
-if (!result_live) {
+if (!result_live &&
+(can_omit_write(inst) || can_eliminate(inst, flag_live))) {
+   inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
progress = true;
-
-   if (inst->writes_accumulator || inst->flags_written()) {
-  inst->dst = fs_reg(retype(brw_null_reg(), inst->dst.type));
-   } else {
-  inst->opcode = BRW_OPCODE_NOP;
-   }
 }
  }
 
- if (inst->dst.is_null() &&
- !inst->is_control_flow() &&
- !inst->has_side_effects() &&
- !(flag_live[0] & inst->flags_written()) &&
- !inst->writes_accumulator) {
+ if (inst->dst.is_null() && can_eliminate(inst, flag_live)) {
 inst->opcode = BRW_OPCODE_NOP;
 progress = true;
  }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Make DCE explicitly not eliminate any control flow instructions.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: be5f53e769deb936509efd6f0576b15b7a5432b9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=be5f53e769deb936509efd6f0576b15b7a5432b9

Author: Kenneth Graunke 
Date:   Tue Jan 17 19:15:50 2017 -0800

i965: Make DCE explicitly not eliminate any control flow instructions.

According to Matt, the dead code pass explicitly avoided IF and WHILE
because on Sandybridge, these could have conditional modifiers and
null destination registers.  Normally, those instructions use BAD_FILE
for the destination register.  Nowadays, we don't do that anymore, so
we could technically drop these checks.

However, it's clearer to explicitly leave control flow instructions
alone, so change it to the more generic !inst->is_control_flow().

This should have no actual change.

[This patch implements review feedback from Curro and Matt.]

Signed-off-by: Kenneth Graunke 
Reviewed-by: Francisco Jerez 
Reviewed-by: Matt Turner 

---

 src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 8a0469a..04901a9 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -77,9 +77,8 @@ fs_visitor::dead_code_eliminate()
 }
  }
 
- if ((inst->opcode != BRW_OPCODE_IF &&
-  inst->opcode != BRW_OPCODE_WHILE) &&
- inst->dst.is_null() &&
+ if (inst->dst.is_null() &&
+ !inst->is_control_flow() &&
  !inst->has_side_effects() &&
  !inst->flags_written() &&
  !inst->writes_accumulator) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): i965: Combine some dead code elimination NOP'ing code.

2017-01-17 Thread Kenneth Graunke
Module: Mesa
Branch: master
Commit: 90bf39cd2b39874557a7c492d92b85945d45f3c6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=90bf39cd2b39874557a7c492d92b85945d45f3c6

Author: Kenneth Graunke 
Date:   Wed Dec 14 17:56:35 2016 -0800

i965: Combine some dead code elimination NOP'ing code.

In theory we might have incorrectly NOP'd instructions that write the
flag, but where that flag value isn't used, and yet the instruction
either writes the accumulator or has side effects.

I don't believe any such instructions exist, so this is mostly a
code cleanup.

Curro pointed out that FS_OPCODE_FB_WRITE has a null destination and
actually writes the flag on Gen4-5 to dynamically decide whether to
write some payload data.  The hunk removed in this patch might have
NOP'd it, except that we don't actually mark flags_written() in the
IR, so it doesn't think the flag is touched at all.  That's sketchy,
but it means it wouldn't hit this today (though there are likely other
problems!).

v2: Properly replace the inst->regs_written() check in the second
hunk with the flag being live (mistake caught by Curro).

Signed-off-by: Kenneth Graunke 
Reviewed-by: Francisco Jerez 
Reviewed-by: Matt Turner 

---

 src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp | 9 +
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
index 04901a9..0dd6091 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_dead_code_eliminate.cpp
@@ -70,17 +70,10 @@ fs_visitor::dead_code_eliminate()
 }
  }
 
- if (inst->dst.is_null() && inst->flags_written()) {
-if (!(flag_live[0] & inst->flags_written())) {
-   inst->opcode = BRW_OPCODE_NOP;
-   progress = true;
-}
- }
-
  if (inst->dst.is_null() &&
  !inst->is_control_flow() &&
  !inst->has_side_effects() &&
- !inst->flags_written() &&
+ !(flag_live[0] & inst->flags_written()) &&
  !inst->writes_accumulator) {
 inst->opcode = BRW_OPCODE_NOP;
 progress = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: disable vertex reuse when writing viewport index

2017-01-17 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: aac562f112ea9194b416c97336dcbbd3c1da812b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=aac562f112ea9194b416c97336dcbbd3c1da812b

Author: Dave Airlie 
Date:   Wed Jan 18 06:26:31 2017 +1000

radv: disable vertex reuse when writing viewport index

This fixes some issues we'd hit later if using viewport
indexes.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_cmd_buffer.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 27fa405..c6f238b 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -511,6 +511,8 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
   cull_dist_mask << 8 |
   clip_dist_mask);
 
+   radeon_set_context_reg(cmd_buffer->cs, R_028AB4_VGT_REUSE_OFF,
+  
S_028AB4_REUSE_OFF(vs->info.vs.writes_viewport_index));
 }
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv/ac: switch an if to switch

2017-01-17 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 5dadd7ca27da6cd5bbac95c8e09130ec4a384e2b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5dadd7ca27da6cd5bbac95c8e09130ec4a384e2b

Author: Dave Airlie 
Date:   Tue Jan 17 08:38:14 2017 +1000

radv/ac: switch an if to switch

makes it easier to add other shader stages.

Reviewed-by: Bas Nieuwenhuizen 
Reviewed-by: Edward O'Callaghan 
Signed-off-by: Dave Airlie 

---

 src/amd/common/ac_nir_to_llvm.c | 13 +
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 3173aa5..6d98fde 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4626,11 +4626,16 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
/* +3 for scratch wave offset and VCC */
config->num_sgprs = MAX2(config->num_sgprs,
 shader_info->num_input_sgprs + 3);
-   if (nir->stage == MESA_SHADER_COMPUTE) {
+
+   switch (nir->stage) {
+   case MESA_SHADER_COMPUTE:
for (int i = 0; i < 3; ++i)
shader_info->cs.block_size[i] = 
nir->info->cs.local_size[i];
-   }
-
-   if (nir->stage == MESA_SHADER_FRAGMENT)
+   break;
+   case MESA_SHADER_FRAGMENT:
shader_info->fs.early_fragment_test = 
nir->info->fs.early_fragment_tests;
+   break;
+   default:
+   break;
+   }
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv: add support for writing layer/viewport index (v2)

2017-01-17 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 6b635bbe16c93ad13afa3390d20c2f0f033e065d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6b635bbe16c93ad13afa3390d20c2f0f033e065d

Author: Dave Airlie 
Date:   Tue Jan 17 07:04:52 2017 +1000

radv: add support for writing layer/viewport index (v2)

This just adds the infrastructure to allow writing layer
and viewport index. It's just a first patch out of the geom
shader tree, and doesn't do much on its own.

v2: add missing if statement change (Bas)

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/common/ac_nir_to_llvm.c  | 21 ++---
 src/amd/common/ac_nir_to_llvm.h  |  2 ++
 src/amd/vulkan/radv_cmd_buffer.c |  6 +-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 798ddca..3173aa5 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4107,7 +4107,7 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
unsigned pos_idx, num_pos_exports = 0;
LLVMValueRef args[9];
LLVMValueRef pos_args[4][9] = { { 0 } };
-   LLVMValueRef psize_value = 0;
+   LLVMValueRef psize_value = NULL, layer_value = NULL, 
viewport_index_value = NULL;
int i;
const uint64_t clip_mask = ctx->output_mask & ((1ull << 
VARYING_SLOT_CLIP_DIST0) |
   (1ull << 
VARYING_SLOT_CLIP_DIST1) |
@@ -4167,6 +4167,14 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
ctx->shader_info->vs.writes_pointsize = true;
psize_value = values[0];
continue;
+   } else if (i == VARYING_SLOT_LAYER) {
+   ctx->shader_info->vs.writes_layer = true;
+   layer_value = values[0];
+   continue;
+   } else if (i == VARYING_SLOT_VIEWPORT) {
+   ctx->shader_info->vs.writes_viewport_index = true;
+   viewport_index_value = values[0];
+   continue;
} else if (i >= VARYING_SLOT_VAR0) {
ctx->shader_info->vs.export_mask |= 1u << (i - 
VARYING_SLOT_VAR0);
target = V_008DFC_SQ_EXP_PARAM + param_count;
@@ -4200,8 +4208,11 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
pos_args[0][8] = ctx->f32one;  /* W */
}
 
-   if (ctx->shader_info->vs.writes_pointsize == true) {
-   pos_args[1][0] = LLVMConstInt(ctx->i32, 
(ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
+   uint32_t mask = ((ctx->shader_info->vs.writes_pointsize == true ? 1 : 
0) |
+(ctx->shader_info->vs.writes_layer == true ? 4 : 0) |
+(ctx->shader_info->vs.writes_viewport_index == true ? 
8 : 0));
+   if (mask) {
+   pos_args[1][0] = LLVMConstInt(ctx->i32, mask, false); /* 
writemask */
pos_args[1][1] = ctx->i32zero;  /* EXEC mask */
pos_args[1][2] = ctx->i32zero;  /* last export? */
pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 
1, false);
@@ -4213,6 +4224,10 @@ handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
 
if (ctx->shader_info->vs.writes_pointsize == true)
pos_args[1][5] = psize_value;
+   if (ctx->shader_info->vs.writes_layer == true)
+   pos_args[1][7] = layer_value;
+   if (ctx->shader_info->vs.writes_viewport_index == true)
+   pos_args[1][8] = viewport_index_value;
}
for (i = 0; i < 4; i++) {
if (pos_args[i][0])
diff --git a/src/amd/common/ac_nir_to_llvm.h b/src/amd/common/ac_nir_to_llvm.h
index f488c09..a57558e 100644
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -95,6 +95,8 @@ struct ac_shader_variant_info {
unsigned vgpr_comp_cnt;
uint32_t export_mask;
bool writes_pointsize;
+   bool writes_layer;
+   bool writes_viewport_index;
uint8_t clip_dist_mask;
uint8_t cull_dist_mask;
} vs;
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 01e77f8..27fa405 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -500,7 +500,11 @@ radv_emit_vertex_shader(struct radv_cmd_buffer *cmd_buffer,
 
radeon_set_context_reg(cmd_buffer->cs, R_02881C_PA_CL_VS_OUT_CNTL,
   
S_02881C_USE_VTX_POINT_SIZE(vs->info.vs.writes_pointsize) |
-  
S_02881C_VS_OUT_MISC_VEC_ENA(vs->info.vs.writes_pointsize) |
+ 

Mesa (master): radv: add support for layered clears (v2)

2017-01-17 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 7e0382fb35960416459134f27fa1b0f57aba8acc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7e0382fb35960416459134f27fa1b0f57aba8acc

Author: Dave Airlie 
Date:   Tue Jan 17 10:05:07 2017 +1000

radv: add support for layered clears (v2)

Just always use the layer clear pipelines,
the overhead of emitting the layer shouldn't be
too large.

v2: Bas suggested we always use it.

Reviewed-by: Bas Nieuwenhuizen 
Signed-off-by: Dave Airlie 

---

 src/amd/vulkan/radv_meta_clear.c | 28 ++--
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/src/amd/vulkan/radv_meta_clear.c b/src/amd/vulkan/radv_meta_clear.c
index ff779ea..957b388 100644
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -98,6 +98,16 @@ build_color_shaders(struct nir_shader **out_vs,
nir_copy_var(_b, vs_out_color, vs_in_color);
nir_copy_var(_b, fs_out_color, fs_in_color);
 
+   const struct glsl_type *layer_type = glsl_int_type();
+   nir_variable *vs_out_layer =
+   nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+   "v_layer");
+   vs_out_layer->data.location = VARYING_SLOT_LAYER;
+   vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+   nir_ssa_def *inst_id = nir_load_system_value(_b, 
nir_intrinsic_load_instance_id, 0);
+
+   nir_store_var(_b, vs_out_layer, inst_id, 0x1);
+
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
 }
@@ -447,7 +457,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
   pipeline_h);
}
 
-   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+   radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
 
radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
 }
@@ -477,6 +487,15 @@ build_depthstencil_shader(struct nir_shader **out_vs, 
struct nir_shader **out_fs
 
nir_copy_var(_b, vs_out_pos, vs_in_pos);
 
+   const struct glsl_type *layer_type = glsl_int_type();
+   nir_variable *vs_out_layer =
+   nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+   "v_layer");
+   vs_out_layer->data.location = VARYING_SLOT_LAYER;
+   vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+   nir_ssa_def *inst_id = nir_load_system_value(_b, 
nir_intrinsic_load_instance_id, 0);
+   nir_store_var(_b, vs_out_layer, inst_id, 0x1);
+
*out_vs = vs_b.shader;
*out_fs = fs_b.shader;
 }
@@ -717,7 +736,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
if (depth_view_can_fast_clear(iview, 
subpass->depth_stencil_attachment.layout, clear_rect))
radv_set_depth_clear_regs(cmd_buffer, iview->image, 
clear_value, aspects);
 
-   radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+   radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
 }
 
 
@@ -948,13 +967,10 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer 
*cmd_buffer)
 
radv_meta_save_graphics_reset_vport_scissor(_state, cmd_buffer);
 
-   if (cmd_state->framebuffer->layers > 1)
-   radv_finishme("clearing multi-layer framebuffer");
-
VkClearRect clear_rect = {
.rect = cmd_state->render_area,
.baseArrayLayer = 0,
-   .layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+   .layerCount = cmd_state->framebuffer->layers,
};
 
for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radv/ac: split part of llvm compile into a separate function

2017-01-17 Thread Dave Airlie
Module: Mesa
Branch: master
Commit: 788610081198260d6974f86ed62a4b9aaf59b8c4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=788610081198260d6974f86ed62a4b9aaf59b8c4

Author: Dave Airlie 
Date:   Tue Jan 17 08:41:03 2017 +1000

radv/ac: split part of llvm compile into a separate function

This is needed to have common code for gs copy shader emission.

Reviewed-by: Bas Nieuwenhuizen 
Reviewed-by: Edward O'Callaghan 
Signed-off-by: Dave Airlie 

---

 src/amd/common/ac_nir_to_llvm.c | 33 ++---
 1 file changed, 22 insertions(+), 11 deletions(-)

diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 6d98fde..26b87e8 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -4557,17 +4557,14 @@ out:
return retval;
 }
 
-void ac_compile_nir_shader(LLVMTargetMachineRef tm,
-   struct ac_shader_binary *binary,
-   struct ac_shader_config *config,
-   struct ac_shader_variant_info *shader_info,
-   struct nir_shader *nir,
-   const struct ac_nir_compiler_options *options,
-  bool dump_shader)
+static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
+  LLVMModuleRef llvm_module,
+  struct ac_shader_binary *binary,
+  struct ac_shader_config *config,
+  struct ac_shader_variant_info *shader_info,
+  gl_shader_stage stage,
+  bool dump_shader)
 {
-
-   LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, 
shader_info,
-options);
if (dump_shader)
LLVMDumpModule(llvm_module);
 
@@ -4586,7 +4583,7 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
LLVMDisposeModule(llvm_module);
LLVMContextDispose(ctx);
 
-   if (nir->stage == MESA_SHADER_FRAGMENT) {
+   if (stage == MESA_SHADER_FRAGMENT) {
shader_info->num_input_vgprs = 0;
if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
shader_info->num_input_vgprs += 2;
@@ -4626,7 +4623,21 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
/* +3 for scratch wave offset and VCC */
config->num_sgprs = MAX2(config->num_sgprs,
 shader_info->num_input_sgprs + 3);
+}
+
+void ac_compile_nir_shader(LLVMTargetMachineRef tm,
+   struct ac_shader_binary *binary,
+   struct ac_shader_config *config,
+   struct ac_shader_variant_info *shader_info,
+   struct nir_shader *nir,
+   const struct ac_nir_compiler_options *options,
+  bool dump_shader)
+{
+
+   LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, 
shader_info,
+options);
 
+   ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, 
nir->stage, dump_shader);
switch (nir->stage) {
case MESA_SHADER_COMPUTE:
for (int i = 0; i < 3; ++i)

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): ac/debug: Decrease num_dw for type 2 NOP's.

2017-01-17 Thread Bas Nieuwenhuizen
Module: Mesa
Branch: master
Commit: 3b4bf8aa636768f4ad5fb636b8406e58d0d78f62
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3b4bf8aa636768f4ad5fb636b8406e58d0d78f62

Author: Bas Nieuwenhuizen 
Date:   Sun Jan 15 23:01:03 2017 +0100

ac/debug: Decrease num_dw for type 2 NOP's.

Otherwise we read past the end of the buffer.

Signed-off-by: Bas Nieuwenhuizen 
Reviewed-by: Nicolai Hähnle 

---

 src/amd/common/ac_debug.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/amd/common/ac_debug.c b/src/amd/common/ac_debug.c
index f91e448..989dfda 100644
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -357,6 +357,7 @@ void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int 
trace_id,
if (ib[0] == 0x8000) {
fprintf(f, COLOR_GREEN "NOP (type 2)" 
COLOR_RESET "\n");
ib++;
+   num_dw--;
break;
}
/* fall through */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (master): radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+

2017-01-17 Thread Marek Olšák
Module: Mesa
Branch: master
Commit: 57f18623fb94891c04f3a395cfd977ea3747ee61
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57f18623fb94891c04f3a395cfd977ea3747ee61

Author: Marek Olšák 
Date:   Tue Jan 17 13:45:42 2017 +0100

radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+

Cc: 17.0 13.0 
Reviewed-by: Edward O'Callaghan 
Reviewed-by: Nicolai Hähnle 

---

 src/gallium/drivers/radeonsi/si_shader.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f404273..10f40a9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5440,10 +5440,13 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
struct si_shader_context *ctx = si_shader_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
 
-   /* The real barrier instruction isn’t needed, because an entire patch
+   /* SI only (thanks to a hw bug workaround):
+* The real barrier instruction isn’t needed, because an entire patch
 * always fits into a single wave.
 */
-   if (ctx->type == PIPE_SHADER_TESS_CTRL) {
+   if (HAVE_LLVM >= 0x0309 &&
+   ctx->screen->b.chip_class == SI &&
+   ctx->type == PIPE_SHADER_TESS_CTRL) {
emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
return;
}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-commit


Mesa (12.0): 37 new commits

2017-01-17 Thread Emil Velikov
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc2894d376e75de5255ed9670bcae14524cb0801
Author: Emil Velikov 
Date:   Thu Jan 12 17:18:51 2017 +

automake: use shared llvm libs for make distcheck

Cc: "12.0 13.0" 
Signed-off-by: Emil Velikov 
(cherry picked from commit 23dcce0c03db055c168696c9120637506b68b13d)

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=febf22ff559604fca421a0555aeead5cbd8d4377
Author: Chad Versace 
Date:   Fri Dec 16 12:05:45 2016 -0800

i965/mt: Disable HiZ when sharing depth buffer externally (v2)

intel_miptree_make_shareable() discarded and disabled CCS. Fix it so
that it discards and disables HiZ too.

Fixes 
dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer
on Skylake.

v2: Actually do what the commit message says. Discard the HiZ buffer.

Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=98329
Reviewed-by: Topi Pohjolainen 
Reviewed-by: Kenneth Graunke 
Reviewed-by: Anuj Phogat 
Cc: Nanley Chery 
(cherry picked from commit 42011be1e27f59d750b781c10766e19ec0ee6ff5)
[Emil Velikov: patch is a backport by Chad of above commit]

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3c7b53bba3738480e3ddaf84a0386f72834e5428
Author: Chad Versace 
Date:   Fri Dec 9 16:18:11 2016 -0800

i965/mt: Disable aux surfaces after making miptree shareable

The entire goal of intel_miptree_make_shareable() is to permanently
disable the miptree's aux surfaces. So set
intel_mipmap_tree:disable_aux_buffers after the function's done with
discarding down the aux surfaces.

References: https://bugs.freedesktop.org/show_bug.cgi?id=98329
Reviewed-by: Topi Pohjolainen 
Reviewed-by: Kenneth Graunke 
Cc: Nanley Chery 
Cc: mesa-sta...@lists.freedesktop.org
(cherry picked from commit 1c8be049bea786c2c054a770025976beba5b8636)

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c880deef41d3c4cf2dc56342c3f8cc6edb6728ca
Author: Emil Velikov 
Date:   Fri Dec 16 15:08:30 2016 +

get-typod-pick-list.sh: add new script

Typos do happen as people nominate patches for stable. This script aims
to catch most of those.

Due to the subtle nature of things, one has to pay special attention to
the output, similar to get-extra-pick-list.sh.

At the moment only the following is handled:
 grep -i "CC:.*mesa-dev"

Cc: 12.0 13.0 
Signed-off-by: Emil Velikov 
(cherry picked from commit f0bdd13fdbc0bec1119b296d99820899183e26ab)

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=09973d9a991919d010251a6e0998fa105374b654
Author: Ilia Mirkin 
Date:   Tue Jan 10 22:07:53 2017 -0500

nouveau: take extra push space into account for pushbuf_space calls

Ever since a long time ago when I messed around with fences, I ensure
that after a PUSH_SPACE call there is enough space to write a fence out
into the pushbuf.

However the PUSH_SPACE macro is not all-knowing, and so sometimes we
have to invoke nouveau_pushbuf_space manually with the relocs/pushes
args set. If we don't take the extra allocation from PUSH_SPACE into
account, then we will end up accidentally flushing when the code was not
expecting a flush. This can lead to various runtime and rendering
failures.

The amount of extra allocation isn't that important - it has to be at
least 8 based on the current nouveau_winsys.h setting, but even more
won't hurt. I just rounded up to powers of 2.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=99354
Cc: "12.0 13.0" 
Signed-off-by: Ilia Mirkin 
Acked-by: Ben Skeggs 
(cherry picked from commit eb60a89bc3ac2b43faf52d06e05670bbbca7292d)

URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=36a54c27fd01e186c777acb4c12c61a7a32c2838
Author: Kenneth Graunke 
Date:   Sun Jan 8 23:03:25 2017 -0800

spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.

vtn_ssa_value() can produce variable loads, and the cursor might
be after a return statement, causing nir_builder assert failures
about not inserting instructions after a jump.

This fixes:
dEQP-VK.spirv_assembly.instruction.graphics.barrier.in_if

Mesa (master): st/vdpau: remove the delayed rendering hack(v1.1)

2017-01-17 Thread Christian König
Module: Mesa
Branch: master
Commit: 3a8f316e7b7f7dc5d913d117ec47e26587ce8177
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3a8f316e7b7f7dc5d913d117ec47e26587ce8177

Author: Nayan Deshmukh 
Date:   Wed Jan 11 22:45:15 2017 +0530

st/vdpau: remove the delayed rendering hack(v1.1)

the hack was introduced to avoid an extra copying
but now with dri3 we don't need it anymore

v1.1: rebasing

Signed-off-by: Nayan Deshmukh 
Acked-by: Christian König 

---

 src/gallium/state_trackers/vdpau/bitmap.c|  2 -
 src/gallium/state_trackers/vdpau/device.c| 50 -
 src/gallium/state_trackers/vdpau/mixer.c | 93 +++-
 src/gallium/state_trackers/vdpau/output.c|  9 ---
 src/gallium/state_trackers/vdpau/presentation.c  | 30 +++-
 src/gallium/state_trackers/vdpau/vdpau_private.h |  9 ---
 6 files changed, 52 insertions(+), 141 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/bitmap.c 
b/src/gallium/state_trackers/vdpau/bitmap.c
index fd67a98..d9ec60d 100644
--- a/src/gallium/state_trackers/vdpau/bitmap.c
+++ b/src/gallium/state_trackers/vdpau/bitmap.c
@@ -198,8 +198,6 @@ vlVdpBitmapSurfacePutBitsNative(VdpBitmapSurface surface,
 
pipe_mutex_lock(vlsurface->device->mutex);
 
-   vlVdpResolveDelayedRendering(vlsurface->device, NULL, NULL);
-
dst_box = RectToPipeBox(destination_rect, vlsurface->sampler_view->texture);
pipe->texture_subdata(pipe, vlsurface->sampler_view->texture, 0,
  PIPE_TRANSFER_WRITE, _box, *source_data,
diff --git a/src/gallium/state_trackers/vdpau/device.c 
b/src/gallium/state_trackers/vdpau/device.c
index 8bae064..4f4ffdf 100644
--- a/src/gallium/state_trackers/vdpau/device.c
+++ b/src/gallium/state_trackers/vdpau/device.c
@@ -327,53 +327,3 @@ vlVdpDefaultSamplerViewTemplate(struct pipe_sampler_view 
*templ, struct pipe_res
if (desc->swizzle[3] == PIPE_SWIZZLE_0)
   templ->swizzle_a = PIPE_SWIZZLE_1;
 }
-
-void
-vlVdpResolveDelayedRendering(vlVdpDevice *dev, struct pipe_surface *surface, 
struct u_rect *dirty_area)
-{
-   struct vl_compositor_state *cstate;
-   vlVdpOutputSurface *vlsurface;
-
-   assert(dev);
-
-   cstate = dev->delayed_rendering.cstate;
-   if (!cstate)
-  return;
-
-   vlsurface = vlGetDataHTAB(dev->delayed_rendering.surface);
-   if (!vlsurface)
-  return;
-
-   if (!surface) {
-  surface = vlsurface->surface;
-  dirty_area = >dirty_area;
-   }
-
-   vl_compositor_render(cstate, >compositor, surface, dirty_area, true);
-
-   dev->delayed_rendering.surface = VDP_INVALID_HANDLE;
-   dev->delayed_rendering.cstate = NULL;
-
-   /* test if we need to create a new sampler for the just filled texture */
-   if (surface->texture != vlsurface->sampler_view->texture) {
-  struct pipe_resource *res = surface->texture;
-  struct pipe_sampler_view sv_templ;
-
-  vlVdpDefaultSamplerViewTemplate(_templ, res);
-  pipe_sampler_view_reference(>sampler_view, NULL);
-  vlsurface->sampler_view = 
dev->context->create_sampler_view(dev->context, res, _templ);
-   }
-
-   return;
-}
-
-void
-vlVdpSave4DelayedRendering(vlVdpDevice *dev, VdpOutputSurface surface, struct 
vl_compositor_state *cstate)
-{
-   assert(dev);
-
-   vlVdpResolveDelayedRendering(dev, NULL, NULL);
-
-   dev->delayed_rendering.surface = surface;
-   dev->delayed_rendering.cstate = cstate;
-}
diff --git a/src/gallium/state_trackers/vdpau/mixer.c 
b/src/gallium/state_trackers/vdpau/mixer.c
index 1014174..37a6fcd 100644
--- a/src/gallium/state_trackers/vdpau/mixer.c
+++ b/src/gallium/state_trackers/vdpau/mixer.c
@@ -193,8 +193,6 @@ vlVdpVideoMixerDestroy(VdpVideoMixer mixer)
 
pipe_mutex_lock(vmixer->device->mutex);
 
-   vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL);
-
vlRemoveDataHTAB(mixer);
 
vl_compositor_cleanup_state(>cstate);
@@ -293,7 +291,6 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
}
 
pipe_mutex_lock(vmixer->device->mutex);
-   vlVdpResolveDelayedRendering(vmixer->device, NULL, NULL);
 
vl_compositor_clear_layers(>cstate);
 
@@ -403,64 +400,60 @@ VdpStatus vlVdpVideoMixerRender(VdpVideoMixer mixer,
   ++layers;
}
 
-   if (!vmixer->noise_reduction.filter && !vmixer->sharpness.filter && 
!vmixer->bicubic.filter)
-  vlVdpSave4DelayedRendering(vmixer->device, destination_surface, 
>cstate);
-   else {
-  vl_compositor_render(>cstate, compositor, surface, _area, 
true);
-
-  if (vmixer->noise_reduction.filter) {
- if (!vmixer->sharpness.filter && !vmixer->bicubic.filter) {
-vl_median_filter_render(vmixer->noise_reduction.filter,
-sampler_view, dst->surface);
- } else {
-res = pipe->screen->resource_create(pipe->screen, _tmpl);
-struct pipe_sampler_view *sampler_view_temp = 
pipe->create_sampler_view(pipe, res, _templ);
-struct 

Mesa (master): vl/dri3: use external texture as back buffers(v4)

2017-01-17 Thread Christian König
Module: Mesa
Branch: master
Commit: 0ef17d76bbbc9506d50138f1b4d79db8ef08ad6d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ef17d76bbbc9506d50138f1b4d79db8ef08ad6d

Author: Nayan Deshmukh 
Date:   Wed Jan 11 22:45:13 2017 +0530

vl/dri3: use external texture as back buffers(v4)

dri3 allows us to send handle of a texture directly to X
so this patch allows a state tracker to directly send its
texture to X to be used as back buffer and avoids extra
copying

v2: use clip width/height to display a portion of the surface
v3: remove redundant variables, fix wrapping, rename variables
handle vaapi path
v3.1: we need clip_width/height for every frame so we don't need
  to maintain it for each buffer instead use a global variable
v4: In case of single gpu we can cache the buffers as applications
use constant number of buffer and we can avoid calls to present
extension for every frame

Reviewed and Suggested-by: Leo Liu 
Acked-by: Christian König 
Tested-by: Andy Furniss 
Signed-off-by: Nayan Deshmukh 

---

 configure.ac  |   2 +-
 src/gallium/auxiliary/vl/vl_winsys.h  |   5 ++
 src/gallium/auxiliary/vl/vl_winsys_dri3.c | 126 ++
 3 files changed, 115 insertions(+), 18 deletions(-)

diff --git a/configure.ac b/configure.ac
index 459f3e8..3e2d79a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2081,7 +2081,7 @@ if test "x$enable_xvmc" = xyes -o \
 "x$enable_va" = xyes; then
 if test x"$enable_dri3" = xyes; then
 PKG_CHECK_MODULES([VL], [xcb-dri3 xcb-present xcb-sync xshmfence >= 
$XSHMFENCE_REQUIRED
- x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
+ xcb-xfixes x11-xcb xcb xcb-dri2 >= 
$XCBDRI2_REQUIRED])
 else
 PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
 fi
diff --git a/src/gallium/auxiliary/vl/vl_winsys.h 
b/src/gallium/auxiliary/vl/vl_winsys.h
index 26db9f2..e1f9b27 100644
--- a/src/gallium/auxiliary/vl/vl_winsys.h
+++ b/src/gallium/auxiliary/vl/vl_winsys.h
@@ -59,6 +59,11 @@ struct vl_screen
void *
(*get_private)(struct vl_screen *vscreen);
 
+   void
+   (*set_back_texture_from_output)(struct vl_screen *vscreen,
+   struct pipe_resource *buffer,
+   uint32_t width, uint32_t height);
+
struct pipe_screen *pscreen;
struct pipe_loader_device *dev;
 };
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 2929928..a810dea 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "loader.h"
 
@@ -71,9 +72,12 @@ struct vl_dri3_screen
xcb_special_event_t *special_event;
 
struct pipe_context *pipe;
+   struct pipe_resource *output_texture;
+   uint32_t clip_width, clip_height;
 
struct vl_dri3_buffer *back_buffers[BACK_BUFFER_NUM];
int cur_back;
+   int next_back;
 
struct u_rect dirty_areas[BACK_BUFFER_NUM];
 
@@ -105,7 +109,8 @@ dri3_free_back_buffer(struct vl_dri3_screen *scrn,
xcb_free_pixmap(scrn->conn, buffer->pixmap);
xcb_sync_destroy_fence(scrn->conn, buffer->sync_fence);
xshmfence_unmap_shm(buffer->shm_fence);
-   pipe_resource_reference(>texture, NULL);
+   if (!scrn->output_texture)
+  pipe_resource_reference(>texture, NULL);
if (buffer->linear_texture)
pipe_resource_reference(>linear_texture, NULL);
FREE(buffer);
@@ -236,29 +241,31 @@ dri3_alloc_back_buffer(struct vl_dri3_screen *scrn)
templ.format = PIPE_FORMAT_B8G8R8X8_UNORM;
templ.target = PIPE_TEXTURE_2D;
templ.last_level = 0;
-   templ.width0 = scrn->width;
-   templ.height0 = scrn->height;
+   templ.width0 = (scrn->output_texture) ?
+  scrn->output_texture->width0 : scrn->width;
+   templ.height0 = (scrn->output_texture) ?
+   scrn->output_texture->height0 : scrn->height;
templ.depth0 = 1;
templ.array_size = 1;
 
if (scrn->is_different_gpu) {
-  buffer->texture = scrn->base.pscreen->resource_create(scrn->base.pscreen,
-);
+  buffer->texture = (scrn->output_texture) ? scrn->output_texture :
+
scrn->base.pscreen->resource_create(scrn->base.pscreen, );
   if (!buffer->texture)
  goto unmap_shm;
 
   templ.bind |= PIPE_BIND_SCANOUT | PIPE_BIND_SHARED |
 PIPE_BIND_LINEAR;
-  buffer->linear_texture = 
scrn->base.pscreen->resource_create(scrn->base.pscreen,
-  );
+  buffer->linear_texture =
+  scrn->base.pscreen->resource_create(scrn->base.pscreen, );
   

Mesa (master): st/vdpau: use dri3 to directly send the buffer to X(v2)

2017-01-17 Thread Christian König
Module: Mesa
Branch: master
Commit: 15bfdea99c7b487d2c38d6dd7b88fb44810ef75a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=15bfdea99c7b487d2c38d6dd7b88fb44810ef75a

Author: Nayan Deshmukh 
Date:   Wed Jan 11 22:45:14 2017 +0530

st/vdpau: use dri3 to directly send the buffer to X(v2)

this avoids an extra copy which occurs in case of dri2

v1.1: fallback to dri2 if dri3 fails to initialize
v2: add PIPE_BIND_SCANOUT to output buffers as they will
be send to X server directly (Michel)

Suggested-by: Christian König 
Tested-by: Andy Furniss 
Signed-off-by: Nayan Deshmukh 

---

 src/gallium/state_trackers/vdpau/output.c   |  2 +-
 src/gallium/state_trackers/vdpau/presentation.c | 58 ++---
 2 files changed, 33 insertions(+), 27 deletions(-)

diff --git a/src/gallium/state_trackers/vdpau/output.c 
b/src/gallium/state_trackers/vdpau/output.c
index d67ead8..8ddf2c1 100644
--- a/src/gallium/state_trackers/vdpau/output.c
+++ b/src/gallium/state_trackers/vdpau/output.c
@@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
res_tmpl.depth0 = 1;
res_tmpl.array_size = 1;
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
-   PIPE_BIND_SHARED;
+   PIPE_BIND_SHARED | PIPE_BIND_SCANOUT;
res_tmpl.usage = PIPE_USAGE_DEFAULT;
 
pipe_mutex_lock(dev->mutex);
diff --git a/src/gallium/state_trackers/vdpau/presentation.c 
b/src/gallium/state_trackers/vdpau/presentation.c
index f35d73a..b2c8aea 100644
--- a/src/gallium/state_trackers/vdpau/presentation.c
+++ b/src/gallium/state_trackers/vdpau/presentation.c
@@ -231,43 +231,47 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue 
presentation_queue,
vscreen = pq->device->vscreen;
 
pipe_mutex_lock(pq->device->mutex);
+   if (vscreen->set_back_texture_from_output)
+  vscreen->set_back_texture_from_output(vscreen, surf->surface->texture, 
clip_width, clip_height);
tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable);
if (!tex) {
   pipe_mutex_unlock(pq->device->mutex);
   return VDP_STATUS_INVALID_HANDLE;
}
 
-   dirty_area = vscreen->get_dirty_area(vscreen);
+   if (!vscreen->set_back_texture_from_output) {
+  dirty_area = vscreen->get_dirty_area(vscreen);
 
-   memset(_templ, 0, sizeof(surf_templ));
-   surf_templ.format = tex->format;
-   surf_draw = pipe->create_surface(pipe, tex, _templ);
+  memset(_templ, 0, sizeof(surf_templ));
+  surf_templ.format = tex->format;
+  surf_draw = pipe->create_surface(pipe, tex, _templ);
 
-   dst_clip.x0 = 0;
-   dst_clip.y0 = 0;
-   dst_clip.x1 = clip_width ? clip_width : surf_draw->width;
-   dst_clip.y1 = clip_height ? clip_height : surf_draw->height;
+  dst_clip.x0 = 0;
+  dst_clip.y0 = 0;
+  dst_clip.x1 = clip_width ? clip_width : surf_draw->width;
+  dst_clip.y1 = clip_height ? clip_height : surf_draw->height;
 
-   if (pq->device->delayed_rendering.surface == surface &&
-   dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) {
+  if (pq->device->delayed_rendering.surface == surface &&
+  dst_clip.x1 == surf_draw->width && dst_clip.y1 == surf_draw->height) 
{
 
-  // TODO: we correctly support the clipping here, but not the pq 
background color in the clipped area
-  cstate = pq->device->delayed_rendering.cstate;
-  vl_compositor_set_dst_clip(cstate, _clip);
-  vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area);
+ // TODO: we correctly support the clipping here, but not the pq 
background color in the clipped area
+ cstate = pq->device->delayed_rendering.cstate;
+ vl_compositor_set_dst_clip(cstate, _clip);
+ vlVdpResolveDelayedRendering(pq->device, surf_draw, dirty_area);
 
-   } else {
-  vlVdpResolveDelayedRendering(pq->device, NULL, NULL);
+  } else {
+ vlVdpResolveDelayedRendering(pq->device, NULL, NULL);
 
-  src_rect.x0 = 0;
-  src_rect.y0 = 0;
-  src_rect.x1 = surf_draw->width;
-  src_rect.y1 = surf_draw->height;
+ src_rect.x0 = 0;
+ src_rect.y0 = 0;
+ src_rect.x1 = surf_draw->width;
+ src_rect.y1 = surf_draw->height;
 
-  vl_compositor_clear_layers(cstate);
-  vl_compositor_set_rgba_layer(cstate, compositor, 0, surf->sampler_view, 
_rect, NULL, NULL);
-  vl_compositor_set_dst_clip(cstate, _clip);
-  vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true);
+ vl_compositor_clear_layers(cstate);
+ vl_compositor_set_rgba_layer(cstate, compositor, 0, 
surf->sampler_view, _rect, NULL, NULL);
+ vl_compositor_set_dst_clip(cstate, _clip);
+ vl_compositor_render(cstate, compositor, surf_draw, dirty_area, true);
+  }
}
 
vscreen->set_next_timestamp(vscreen, earliest_presentation_time);
@@ -297,8 +301,10 @@