from:"Eric Anholt"

Mesa (master): vc4: Fix leak of the compiled shader programs in the cache.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 80ed075e6033eba68b034fbd748da4e0b82a27f4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=80ed075e6033eba68b034fbd748da4e0b82a27f4

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:29:10 2014 -0800

vc4: Fix leak of the compiled shader programs in the cache.

---

 src/gallium/drivers/vc4/vc4_context.c |2 ++
 src/gallium/drivers/vc4/vc4_context.h |1 +
 src/gallium/drivers/vc4/vc4_program.c |   21 +
 3 files changed, 24 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index b26c071..3535ebb 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -431,6 +431,8 @@ vc4_context_destroy(struct pipe_context *pctx)
 
 util_slab_destroy(vc4-transfer_pool);
 
+vc4_program_fini(pctx);
+
 ralloc_free(vc4);
 }
 
diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index ba92cb3..962abbf 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -293,6 +293,7 @@ struct pipe_context *vc4_context_create(struct pipe_screen 
*pscreen,
 void vc4_draw_init(struct pipe_context *pctx);
 void vc4_state_init(struct pipe_context *pctx);
 void vc4_program_init(struct pipe_context *pctx);
+void vc4_program_fini(struct pipe_context *pctx);
 void vc4_query_init(struct pipe_context *pctx);
 void vc4_simulator_init(struct vc4_screen *screen);
 int vc4_simulator_flush(struct vc4_context *vc4,
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 007c181..3af738f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2777,3 +2777,24 @@ vc4_program_init(struct pipe_context *pctx)
 vc4-vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash,
 vs_cache_compare);
 }
+
+void
+vc4_program_fini(struct pipe_context *pctx)
+{
+struct vc4_context *vc4 = vc4_context(pctx);
+
+struct hash_entry *entry;
+hash_table_foreach(vc4-fs_cache, entry) {
+struct vc4_compiled_shader *shader = entry-data;
+vc4_bo_unreference(shader-bo);
+ralloc_free(shader);
+_mesa_hash_table_remove(vc4-fs_cache, entry);
+}
+
+hash_table_foreach(vc4-vs_cache, entry) {
+struct vc4_compiled_shader *shader = entry-data;
+vc4_bo_unreference(shader-bo);
+ralloc_free(shader);
+_mesa_hash_table_remove(vc4-vs_cache, entry);
+}
+}

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix leak of a copy of the scheduled QPU instructions.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 4da9e3d80556253a05179c398ffb1c3120fa3089
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=4da9e3d80556253a05179c398ffb1c3120fa3089

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:28:13 2014 -0800

vc4: Fix leak of a copy of the scheduled QPU instructions.

They're copied into a vc4_bo after compiling is done.

---

 src/gallium/drivers/vc4/vc4_qpu.c |5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index faf8790..83bf105 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -22,6 +22,7 @@
  */
 
 #include stdbool.h
+#include util/ralloc.h
 #include vc4_qir.h
 #include vc4_qpu.h
 
@@ -460,8 +461,8 @@ qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
 {
 if (c-qpu_inst_count = c-qpu_inst_size) {
 c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2);
-c-qpu_insts = realloc(c-qpu_insts,
-   c-qpu_inst_size * sizeof(uint64_t));
+c-qpu_insts = reralloc(c, c-qpu_insts,
+uint64_t, c-qpu_inst_size);
 }
 c-qpu_insts[c-qpu_inst_count++] = inst;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix leaks of the CL contents.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 667719fcb2296d73e1897d4071da6dd30b2cc6ac
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=667719fcb2296d73e1897d4071da6dd30b2cc6ac

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:41:25 2014 -0800

vc4: Fix leaks of the CL contents.

---

 src/gallium/drivers/vc4/vc4_cl.c  |5 -
 src/gallium/drivers/vc4/vc4_context.c |2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c
index 29b956e..36dd28c 100644
--- a/src/gallium/drivers/vc4/vc4_cl.c
+++ b/src/gallium/drivers/vc4/vc4_cl.c
@@ -22,11 +22,14 @@
  */
 
 #include util/u_math.h
+#include util/ralloc.h
 #include vc4_context.h
 
 void
 vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl)
 {
+cl-base = ralloc_size(vc4, 1);
+cl-end = cl-next = cl-base;
 }
 
 void
@@ -35,7 +38,7 @@ vc4_grow_cl(struct vc4_cl *cl)
 uint32_t size = MAX2((cl-end - cl-base) * 2, 4096);
 uint32_t offset = cl-next -cl-base;
 
-cl-base = realloc(cl-base, size);
+cl-base = reralloc(ralloc_parent(cl-base), cl-base, uint8_t, size);
 cl-end = cl-base + size;
 cl-next = cl-base + offset;
 }
diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index fd65f43..e49d654 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -472,7 +472,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv)
 vc4_init_cl(vc4, vc4-bcl);
 vc4_init_cl(vc4, vc4-rcl);
 vc4_init_cl(vc4, vc4-shader_rec);
+vc4_init_cl(vc4, vc4-uniforms);
 vc4_init_cl(vc4, vc4-bo_handles);
+vc4_init_cl(vc4, vc4-bo_pointers);
 
 vc4-dirty = ~0;
 vc4-fd = screen-fd;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix leak of vc4_bos stashed in the context.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 1f1ca8b2ea80f6b538b7f7c0de2ebe8eba862edc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f1ca8b2ea80f6b538b7f7c0de2ebe8eba862edc

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:39:49 2014 -0800

vc4: Fix leak of vc4_bos stashed in the context.

---

 src/gallium/drivers/vc4/vc4_context.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index 3535ebb..fd65f43 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -431,6 +431,11 @@ vc4_context_destroy(struct pipe_context *pctx)
 
 util_slab_destroy(vc4-transfer_pool);
 
+pipe_surface_reference(vc4-framebuffer.cbufs[0], NULL);
+pipe_surface_reference(vc4-framebuffer.zsbuf, NULL);
+vc4_bo_unreference(vc4-tile_alloc);
+vc4_bo_unreference(vc4-tile_state);
+
 vc4_program_fini(pctx);
 
 ralloc_free(vc4);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): hash_table: Fix compiler warnings from the renaming.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6c3115af852834476a451688734c07f0dbe13ec9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c3115af852834476a451688734c07f0dbe13ec9

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:21:32 2014 -0800

hash_table: Fix compiler warnings from the renaming.

Not sure how we both missed this.  None of the callers were using the
return value, though.

---

 src/util/hash_table.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index e85ebe3..0ad0383 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -330,7 +330,7 @@ struct hash_entry *
 _mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data)
 {
assert(ht-key_hash_function);
-   hash_table_insert(ht, ht-key_hash_function(key), key, data);
+   return hash_table_insert(ht, ht-key_hash_function(key), key, data);
 }
 
 struct hash_entry *
@@ -338,7 +338,7 @@ _mesa_hash_table_insert_with_hash(struct hash_table *ht, 
uint32_t hash,
   const void *key, void *data)
 {
assert(ht-key_hash_function == NULL || hash == ht-key_hash_function(key));
-   hash_table_insert(ht, hash, key, data);
+   return hash_table_insert(ht, hash, key, data);
 }
 
 /**

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix a leak of the simulator's exec BO's actual vc4_bo.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: f519c3bff14dfc4555c511a4d3709affa056d688
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f519c3bff14dfc4555c511a4d3709affa056d688

Author: Eric Anholt e...@anholt.net
Date:   Sat Dec 13 16:37:28 2014 -0800

vc4: Fix a leak of the simulator's exec BO's actual vc4_bo.

---

 src/gallium/drivers/vc4/vc4_simulator.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/vc4/vc4_simulator.c 
b/src/gallium/drivers/vc4/vc4_simulator.c
index 1040ae8..bb4a669 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -173,6 +173,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct 
drm_vc4_submit_cl *args)
 if (ret)
 return ret;
 
+vc4_bo_unreference(exec.exec_bo-bo);
 free(exec.exec_bo);
 
 if (ctex  ctex-bo-simulator_winsys_map) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Switch to using the util/ hash table.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5c9b8eace2b7e92899aae2cad131c0ca05b389e7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c9b8eace2b7e92899aae2cad131c0ca05b389e7

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 19:48:51 2014 -0800

vc4: Switch to using the util/ hash table.

No performance difference on a microbenchmark with norast that should hit it
enough to have mattered, n=220.

---

 src/gallium/drivers/vc4/vc4_context.h |2 +-
 src/gallium/drivers/vc4/vc4_program.c |   85 +
 2 files changed, 33 insertions(+), 54 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index 207a7b4..ba92cb3 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -209,7 +209,7 @@ struct vc4_context {
 
 struct primconvert_context *primconvert;
 
-struct util_hash_table *fs_cache, *vs_cache;
+struct hash_table *fs_cache, *vs_cache;
 uint32_t next_uncompiled_program_id;
 uint64_t next_compiled_program_id;
 
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index b48c2c4..007c181 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -25,7 +25,6 @@
 #include inttypes.h
 #include pipe/p_state.h
 #include util/u_format.h
-#include util/u_hash_table.h
 #include util/u_hash.h
 #include util/u_memory.h
 #include util/u_pack_color.h
@@ -2164,7 +2163,7 @@ static struct vc4_compiled_shader *
 vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage,
 struct vc4_key *key)
 {
-struct util_hash_table *ht;
+struct hash_table *ht;
 uint32_t key_size;
 if (stage == QSTAGE_FRAG) {
 ht = vc4-fs_cache;
@@ -2175,9 +2174,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 }
 
 struct vc4_compiled_shader *shader;
-shader = util_hash_table_get(ht, key);
-if (shader)
-return shader;
+struct hash_entry *entry = _mesa_hash_table_search(ht, key);
+if (entry)
+return entry-data;
 
 struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, stage, key);
 shader = rzalloc(NULL, struct vc4_compiled_shader);
@@ -2257,7 +2256,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 struct vc4_key *dup_key;
 dup_key = malloc(key_size);
 memcpy(dup_key, key, key_size);
-util_hash_table_set(ht, dup_key, shader);
+_mesa_hash_table_insert(ht, dup_key, shader);
 
 return shader;
 }
@@ -2389,65 +2388,43 @@ vc4_update_compiled_shaders(struct vc4_context *vc4, 
uint8_t prim_mode)
 vc4_update_compiled_vs(vc4, prim_mode);
 }
 
-static unsigned
-fs_cache_hash(void *key)
+static uint32_t
+fs_cache_hash(const void *key)
 {
 return _mesa_hash_data(key, sizeof(struct vc4_fs_key));
 }
 
-static unsigned
-vs_cache_hash(void *key)
+static uint32_t
+vs_cache_hash(const void *key)
 {
 return _mesa_hash_data(key, sizeof(struct vc4_vs_key));
 }
 
-static int
-fs_cache_compare(void *key1, void *key2)
+static bool
+fs_cache_compare(const void *key1, const void *key2)
 {
-return memcmp(key1, key2, sizeof(struct vc4_fs_key));
+return memcmp(key1, key2, sizeof(struct vc4_fs_key)) == 0;
 }
 
-static int
-vs_cache_compare(void *key1, void *key2)
+static bool
+vs_cache_compare(const void *key1, const void *key2)
 {
-return memcmp(key1, key2, sizeof(struct vc4_vs_key));
+return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0;
 }
 
-struct delete_state {
-struct vc4_context *vc4;
-struct vc4_uncompiled_shader *shader_state;
-};
-
-static enum pipe_error
-fs_delete_from_cache(void *in_key, void *in_value, void *data)
-{
-struct delete_state *del = data;
-struct vc4_fs_key *key = in_key;
-struct vc4_compiled_shader *shader = in_value;
-
-if (key-base.shader_state == data) {
-util_hash_table_remove(del-vc4-fs_cache, key);
-vc4_bo_unreference(shader-bo);
-ralloc_free(shader);
-}
-
-return 0;
-}
-
-static enum pipe_error
-vs_delete_from_cache(void *in_key, void *in_value, void *data)
+static void
+delete_from_cache_if_matches(struct hash_table *ht,
+ struct hash_entry *entry,
+ struct vc4_uncompiled_shader *so)
 {
-struct delete_state *del = data;
-struct vc4_vs_key *key = in_key;
-struct vc4_compiled_shader *shader = in_value;
+struct vc4_key *key = entry-data;
 
-if (key-base.shader_state == data) {
-util_hash_table_remove(del-vc4-vs_cache, key);
+if (key-shader_state == so) {
+struct vc4_compiled_shader *shader = entry-data

Mesa (master): vc4: Fix leak of simulator memory on screen cleanup.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: c84306fdc2f2f1aefd7c526e92d66fafda5b306c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c84306fdc2f2f1aefd7c526e92d66fafda5b306c

Author: Eric Anholt e...@anholt.net
Date:   Sat Dec 13 16:40:54 2014 -0800

vc4: Fix leak of simulator memory on screen cleanup.

---

 src/gallium/drivers/vc4/vc4_screen.c|5 +++--
 src/gallium/drivers/vc4/vc4_simulator.c |4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_screen.c 
b/src/gallium/drivers/vc4/vc4_screen.c
index 18451bd..7a1840a 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -30,6 +30,7 @@
 #include util/u_debug.h
 #include util/u_memory.h
 #include util/u_format.h
+#include util/ralloc.h
 
 #include vc4_screen.h
 #include vc4_context.h
@@ -75,7 +76,7 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen)
 static void
 vc4_screen_destroy(struct pipe_screen *pscreen)
 {
-free(pscreen);
+ralloc_free(pscreen);
 }
 
 static int
@@ -402,7 +403,7 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
 struct pipe_screen *
 vc4_screen_create(int fd)
 {
-struct vc4_screen *screen = CALLOC_STRUCT(vc4_screen);
+struct vc4_screen *screen = ralloc(NULL, struct vc4_screen);
 struct pipe_screen *pscreen;
 
 pscreen = screen-base;
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c 
b/src/gallium/drivers/vc4/vc4_simulator.c
index bb4a669..ee4ad96 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -24,6 +24,7 @@
 #ifdef USE_VC4_SIMULATOR
 
 #include util/u_memory.h
+#include util/ralloc.h
 
 #include vc4_screen.h
 #include vc4_context.h
@@ -191,7 +192,8 @@ void
 vc4_simulator_init(struct vc4_screen *screen)
 {
 screen-simulator_mem_size = 256 * 1024 * 1024;
-screen-simulator_mem_base = malloc(screen-simulator_mem_size);
+screen-simulator_mem_base = ralloc_size(screen,
+ screen-simulator_mem_size);
 
 /* We supply our own memory so that we can have more aperture
  * available (256MB instead of simpenrose's default 64MB).

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix leaks of the compiled shaders' keys.

2014-12-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: e108442bb10088607f2a67b648c42c0dece2e595
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e108442bb10088607f2a67b648c42c0dece2e595

Author: Eric Anholt e...@anholt.net
Date:   Sun Dec 14 20:50:15 2014 -0800

vc4: Fix leaks of the compiled shaders' keys.

---

 src/gallium/drivers/vc4/vc4_program.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 3af738f..781e7e2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2254,7 +2254,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 qir_compile_destroy(c);
 
 struct vc4_key *dup_key;
-dup_key = malloc(key_size);
+dup_key = ralloc_size(shader, key_size);
 memcpy(dup_key, key, key_size);
 _mesa_hash_table_insert(ht, dup_key, shader);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix referencing of sync objects.

2014-12-12 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: e5eaf8ec600b73de7f52555ce1c9a74883dba941
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5eaf8ec600b73de7f52555ce1c9a74883dba941

Author: Eric Anholt e...@anholt.net
Date:   Fri Dec 12 00:06:21 2014 -0800

vc4: Fix referencing of sync objects.

While the pipe_reference_* helpers set the pointer, a bare pipe_reference
doesn't.   Fixes 5 ARB_sync tests.

---

 src/gallium/drivers/vc4/vc4_fence.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/vc4/vc4_fence.c 
b/src/gallium/drivers/vc4/vc4_fence.c
index c081d51..01906cd 100644
--- a/src/gallium/drivers/vc4/vc4_fence.c
+++ b/src/gallium/drivers/vc4/vc4_fence.c
@@ -56,6 +56,7 @@ vc4_fence_reference(struct pipe_screen *pscreen,
 if (pipe_reference((*p)-reference, f-reference)) {
 free(old);
 }
+*p = f;
 }
 
 static boolean

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix a memory leak in setting up QPU instructions for scheduling.

2014-12-11 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 87db578268012ffc7ca09b737441490144340c6e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=87db578268012ffc7ca09b737441490144340c6e

Author: Eric Anholt e...@anholt.net
Date:   Thu Dec 11 19:56:42 2014 -0800

vc4: Fix a memory leak in setting up QPU instructions for scheduling.

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c |3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index f2620c0..c4359e7 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -26,6 +26,7 @@
 #include vc4_context.h
 #include vc4_qir.h
 #include vc4_qpu.h
+#include util/ralloc.h
 
 static void
 vc4_dump_program(struct vc4_compile *c)
@@ -44,7 +45,7 @@ vc4_dump_program(struct vc4_compile *c)
 static void
 queue(struct vc4_compile *c, uint64_t inst)
 {
-struct queued_qpu_inst *q = calloc(1, sizeof(*q));
+struct queued_qpu_inst *q = rzalloc(c, struct queued_qpu_inst);
 q-inst = inst;
 insert_at_tail(c-qpu_inst_list, q-link);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Don' t throw out the index offset in the shadow index buffer path.

2014-12-11 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: d78eb5752838979057a8a1210d6951c084904c0f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d78eb5752838979057a8a1210d6951c084904c0f

Author: Eric Anholt e...@anholt.net
Date:   Thu Dec 11 20:34:06 2014 -0800

vc4: Don't throw out the index offset in the shadow index buffer path.

When we upload shadow indices at draw time, we need the source offset.
Fixes the piglit draw-elements test.

---

 src/gallium/drivers/vc4/vc4_state.c |3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_state.c 
b/src/gallium/drivers/vc4/vc4_state.c
index 73d..630ac4d 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -300,12 +300,11 @@ vc4_set_index_buffer(struct pipe_context *pctx,
 pipe_resource_reference(vc4-indexbuf.buffer, NULL);
 vc4-indexbuf.buffer = pshadow;
 vc4-indexbuf.index_size = 2;
-vc4-indexbuf.offset = 0;
 } else {
 pipe_resource_reference(vc4-indexbuf.buffer, 
ib-buffer);
 vc4-indexbuf.index_size = ib-index_size;
-vc4-indexbuf.offset = ib-offset;
 }
+vc4-indexbuf.offset = ib-offset;
 } else {
 pipe_resource_reference(vc4-indexbuf.buffer, NULL);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix triangle-guardband-viewport piglit test.

2014-12-11 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 0ae5e002e090c5e1bd237fa3f23549f9ab105a44
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ae5e002e090c5e1bd237fa3f23549f9ab105a44

Author: Eric Anholt e...@anholt.net
Date:   Thu Dec 11 20:11:21 2014 -0800

vc4: Fix triangle-guardband-viewport piglit test.

The original Broadcom driver also did this with the viewport.

---

 src/gallium/drivers/vc4/vc4_emit.c |   19 ++-
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_emit.c 
b/src/gallium/drivers/vc4/vc4_emit.c
index 8df7073..6856441 100644
--- a/src/gallium/drivers/vc4/vc4_emit.c
+++ b/src/gallium/drivers/vc4/vc4_emit.c
@@ -28,12 +28,21 @@ vc4_emit_state(struct pipe_context *pctx)
 {
 struct vc4_context *vc4 = vc4_context(pctx);
 
-if (vc4-dirty  VC4_DIRTY_SCISSOR) {
+if (vc4-dirty  (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
+float *vpscale = vc4-viewport.scale;
+float *vptranslate = vc4-viewport.translate;
+float vp_minx = -fabs(vpscale[0]) + vptranslate[0];
+float vp_maxx = fabs(vpscale[0]) + vptranslate[0];
+float vp_miny = -fabs(vpscale[1]) + vptranslate[1];
+float vp_maxy = fabs(vpscale[1]) + vptranslate[1];
+uint32_t minx = MAX2(vc4-scissor.minx, vp_minx);
+uint32_t miny = MAX2(vc4-scissor.miny, vp_miny);
+
 cl_u8(vc4-bcl, VC4_PACKET_CLIP_WINDOW);
-cl_u16(vc4-bcl, vc4-scissor.minx);
-cl_u16(vc4-bcl, vc4-scissor.miny);
-cl_u16(vc4-bcl, vc4-scissor.maxx - vc4-scissor.minx);
-cl_u16(vc4-bcl, vc4-scissor.maxy - vc4-scissor.miny);
+cl_u16(vc4-bcl, minx);
+cl_u16(vc4-bcl, miny);
+cl_u16(vc4-bcl, MIN2(vc4-scissor.maxx, vp_maxx) - minx);
+cl_u16(vc4-bcl, MIN2(vc4-scissor.maxy, vp_maxy) - miny);
 }
 
 if (vc4-dirty  (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Drop redundant index size setting.

2014-12-11 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5b3c0d999c38ef51ece422010cdb836fbbaffe0e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b3c0d999c38ef51ece422010cdb836fbbaffe0e

Author: Eric Anholt e...@anholt.net
Date:   Thu Dec 11 20:34:57 2014 -0800

vc4: Drop redundant index size setting.

This is already done at set_index_buffer() time.

---

 src/gallium/drivers/vc4/vc4_draw.c |1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index 2d82462..79d7d73 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -218,7 +218,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
 if (rsc-shadow_parent) {
 vc4_update_shadow_index_buffer(pctx, vc4-indexbuf);
 offset = 0;
-index_size = 2;
 }
 
 cl_start_reloc(vc4-bcl, 1);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Consider FS backface color loads as color inputs as well.

2014-12-11 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 92b85fba89851b85e7a50b1f6f90c05ecaacdcd5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=92b85fba89851b85e7a50b1f6f90c05ecaacdcd5

Author: Eric Anholt e...@anholt.net
Date:   Thu Dec 11 21:28:12 2014 -0800

vc4: Consider FS backface color loads as color inputs as well.

This fixes flatshading of backface color in 4 of the piglit interpolation
tests.

---

 src/gallium/drivers/vc4/vc4_program.c |5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index cea2a49..b48c2c4 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2210,8 +2210,11 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 if (sem-semantic == (uint8_t)~0)
 continue;
 
-if (sem-semantic == TGSI_SEMANTIC_COLOR)
+if (sem-semantic == TGSI_SEMANTIC_COLOR ||
+sem-semantic == TGSI_SEMANTIC_BCOLOR) {
 shader-color_inputs |= (1  
shader-num_inputs);
+}
+
 shader-input_semantics[shader-num_inputs] = *sem;
 shader-num_inputs++;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Prioritize allocating accumulators to short-lived values.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  8 17:43:29 2014 -0800

vc4: Prioritize allocating accumulators to short-lived values.

The register allocator walks from the end of the nodes array looking for
trivially-allocatable things to put on the stack, meaning (assuming
everything is trivially colorable and gets put on the stack in a single
pass) the low node numbers get allocated first.  The things allocated
first happen to get the lower-numbered registers, which is to say the fast
accumulators that can be paired more easily.

When we previously made the nodes match the temporary register numbers,
we'd end up putting the shader inputs (VS or FS) in the accumulators,
which are often long-lived values.  By prioritizing the shortest-lived
values for allocation, we can get a lot more instructions that involve
accumulators, and thus fewer conflicts for raddr and WS.

total instructions in shared programs: 52870 - 46428 (-12.18%)
instructions in affected programs: 52260 - 45818 (-12.33%)

---

 src/gallium/drivers/vc4/vc4_register_allocate.c |   73 ++-
 1 file changed, 59 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c 
b/src/gallium/drivers/vc4/vc4_register_allocate.c
index b62669f..3001900 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
 ra_set_finalize(vc4-regs, NULL);
 }
 
+struct node_to_temp_map {
+uint32_t temp;
+uint32_t priority;
+};
+
+static int
+node_to_temp_priority(const void *in_a, const void *in_b)
+{
+const struct node_to_temp_map *a = in_a;
+const struct node_to_temp_map *b = in_b;
+
+return a-priority - b-priority;
+}
+
 /**
  * Returns a mapping from QFILE_TEMP indices to struct qpu_regs.
  *
@@ -148,6 +162,8 @@ struct qpu_reg *
 vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
 {
 struct simple_node *node;
+struct node_to_temp_map map[c-num_temps];
+uint32_t temp_to_node[c-num_temps];
 uint32_t def[c-num_temps];
 uint32_t use[c-num_temps];
 struct qpu_reg *temp_registers = calloc(c-num_temps,
@@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c)
 struct ra_graph *g = ra_alloc_interference_graph(vc4-regs,
  c-num_temps);
 
-for (uint32_t i = 0; i  c-num_temps; i++)
+for (uint32_t i = 0; i  c-num_temps; i++) {
 ra_set_node_class(g, i, vc4-reg_class_any);
+}
 
-/* Compute the live ranges so we can figure out interference, and
- * figure out our register classes and preallocated registers.
+/* Compute the live ranges so we can figure out interference.
  */
 uint32_t ip = 0;
 foreach(node, c-instructions) {
@@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c)
 
 switch (inst-op) {
 case QOP_FRAG_Z:
+case QOP_FRAG_W:
+/* The payload registers have values implicitly loaded
+ * at the start of the program.
+ */
 def[inst-dst.index] = 0;
-ra_set_node_reg(g, inst-dst.index,
+break;
+default:
+break;
+}
+
+ip++;
+}
+
+for (uint32_t i = 0; i  c-num_temps; i++) {
+map[i].temp = i;
+map[i].priority = use[i] - def[i];
+}
+qsort(map, c-num_temps, sizeof(map[0]), node_to_temp_priority);
+for (uint32_t i = 0; i  c-num_temps; i++) {
+temp_to_node[map[i].temp] = i;
+}
+
+/* Figure out our register classes and preallocated registers*/
+foreach(node, c-instructions) {
+struct qinst *inst = (struct qinst *)node;
+
+switch (inst-op) {
+case QOP_FRAG_Z:
+ra_set_node_reg(g, temp_to_node[inst-dst.index],
 AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 
1);
 break;
 
 case QOP_FRAG_W:
-def[inst-dst.index] = 0;
-ra_set_node_reg(g, inst-dst.index,
+ra_set_node_reg(g, temp_to_node[inst-dst.index],
 AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
 break;
 
 case QOP_TEX_RESULT:
 case QOP_TLB_COLOR_READ:
 assert

Mesa (master): vc4: Reserve rb31 instead of r3 for raddr conflict spills.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 8420a956924c720b3c4932a577623f836758c21c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8420a956924c720b3c4932a577623f836758c21c

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  8 16:52:53 2014 -0800

vc4: Reserve rb31 instead of r3 for raddr conflict spills.

This increases the cost of a raddr b conflict spill (save r3 to rb31, move
src1 to r3, move rb31 back to r3 when done, instead of just move src1 to
r3), but on average thanks to instruction pairing it's more worthwhile to
have another accumulator.

total instructions in shared programs: 46428 - 46171 (-0.55%)
instructions in affected programs: 38030 - 37773 (-0.68%)

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c  |   50 +++
 src/gallium/drivers/vc4/vc4_register_allocate.c |6 +--
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 856f844..f2620c0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -93,21 +93,41 @@ swap_file(struct qpu_reg *src)
  * In that case, we need to move one to a temporary that can be used in the
  * instruction, instead.
  */
-static void
+static bool
 fixup_raddr_conflict(struct vc4_compile *c,
- struct qpu_reg *src0, struct qpu_reg *src1)
+ struct qpu_reg dst,
+ struct qpu_reg *src0, struct qpu_reg *src1,
+ bool r3_live)
 {
 if ((src0-mux != QPU_MUX_A  src0-mux != QPU_MUX_B) ||
 src0-mux != src1-mux ||
 src0-addr == src1-addr) {
-return;
+return false;
 }
 
 if (swap_file(src0) || swap_file(src1))
-return;
+return false;
+
+if (src0-mux == QPU_MUX_A) {
+/* If we're conflicting over the A regfile, then we can just
+ * use the reserved rb31.
+ */
+queue(c, qpu_a_MOV(qpu_rb(31), *src1));
+*src1 = qpu_rb(31);
+return false;
+} else {
+/* Otherwise, we need a non-B regfile.  So, we spill r3 out to
+ * rb31, then store our desired value in r3, and tell the
+ * caller to put rb31 back into r3 when we're done.
+ */
+if (r3_live)
+queue(c, qpu_a_MOV(qpu_rb(31), qpu_r3()));
+queue(c, qpu_a_MOV(qpu_r3(), *src1));
+
+*src1 = qpu_r3();
 
-queue(c, qpu_a_MOV(qpu_r3(), *src1));
-*src1 = qpu_r3();
+return r3_live  dst.mux != QPU_MUX_R3;
+}
 }
 
 void
@@ -118,6 +138,8 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 uint32_t inputs_remaining = c-num_inputs;
 uint32_t vpm_read_fifo_count = 0;
 uint32_t vpm_read_offset = 0;
+bool written_r3 = false;
+bool needs_restore;
 
 make_empty_list(c-qpu_inst_list);
 
@@ -416,8 +438,12 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 break;
 
 case QOP_TEX_DIRECT:
-fixup_raddr_conflict(c, src[0], src[1]);
+needs_restore = fixup_raddr_conflict(c, dst,
+ src[0], src[1],
+ written_r3);
 queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], 
src[1]));
+if (needs_restore)
+queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
 break;
 
 case QOP_TEX_RESULT:
@@ -477,7 +503,9 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 if (qir_get_op_nsrc(qinst-op) == 1)
 src[1] = src[0];
 
-fixup_raddr_conflict(c, src[0], src[1]);
+needs_restore = fixup_raddr_conflict(c, dst,
+ src[0], src[1],
+ written_r3);
 
 if (translate[qinst-op].is_mul) {
 queue(c, qpu_m_alu2(translate[qinst-op].op,
@@ -488,8 +516,14 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 dst,
 src[0], src[1]));
 }
+if (needs_restore)
+queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31)));
+
 break;
 }
+
+if (dst.mux == QPU_MUX_R3)
+written_r3 = true

Mesa (master): vc4: Populate the delay field better, and schedule high delay first.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: c5b544403fbc955dd441fb5a2e11f0de2a75e9e4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5b544403fbc955dd441fb5a2e11f0de2a75e9e4

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  9 14:05:52 2014 -0800

vc4: Populate the delay field better, and schedule high delay first.

This is a standard scheduling heuristic, and clearly helps.

total instructions in shared programs: 46418 - 44467 (-4.20%)
instructions in affected programs: 42531 - 40580 (-4.59%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   50 +++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 8df816f..c733e6e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -49,7 +49,19 @@ struct schedule_node {
 uint32_t child_count;
 uint32_t child_array_size;
 uint32_t parent_count;
+
+/**
+ * Minimum number of cycles from scheduling this instruction until the
+ * end of the program, based on the slowest dependency chain through
+ * the children.
+ */
 uint32_t delay;
+
+/**
+ * cycles between this instruction being scheduled and when its result
+ * can be consumed.
+ */
+uint32_t latency;
 };
 
 struct schedule_node_child {
@@ -548,6 +560,13 @@ choose_instruction_to_schedule(struct choose_scoreboard 
*scoreboard,
 } else if (prio  chosen_prio) {
 continue;
 }
+
+if (n-delay  chosen-delay) {
+chosen = n;
+chosen_prio = prio;
+} else if (n-delay  chosen-delay) {
+continue;
+}
 }
 
 return chosen;
@@ -612,7 +631,7 @@ compute_delay(struct schedule_node *n)
 if (!n-children[i].node-delay)
 compute_delay(n-children[i].node);
 n-delay = MAX2(n-delay,
-n-children[i].node-delay + 1);
+n-children[i].node-delay + 
n-latency);
 }
 }
 }
@@ -734,6 +753,33 @@ schedule_instructions(struct vc4_compile *c, struct 
simple_node *schedule_list)
 }
 }
 
+static uint32_t waddr_latency(uint32_t waddr)
+{
+if (waddr  32)
+return 2;
+
+/* Some huge number, really. */
+if (waddr = QPU_W_TMU0_S  waddr = QPU_W_TMU1_B)
+return 10;
+
+switch(waddr) {
+case QPU_W_SFU_RECIP:
+case QPU_W_SFU_RECIPSQRT:
+case QPU_W_SFU_EXP:
+case QPU_W_SFU_LOG:
+return 3;
+default:
+return 1;
+}
+}
+
+static uint32_t
+instruction_latency(uint64_t inst)
+{
+return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)),
+waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
+}
+
 void
 qpu_schedule_instructions(struct vc4_compile *c)
 {
@@ -761,6 +807,8 @@ qpu_schedule_instructions(struct vc4_compile *c)
 struct schedule_node *n = rzalloc(mem_ctx, struct 
schedule_node);
 
 n-inst = inst;
+n-latency = instruction_latency(inst-inst);
+
 remove_from_list(inst-link);
 insert_at_tail(schedule_list, n-link);
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Mark VPM read setup as impacting VPM reads, not writes.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: f431b4f110946a0f6e3822f870fbcd3d23c8317a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f431b4f110946a0f6e3822f870fbcd3d23c8317a

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  9 14:20:54 2014 -0800

vc4: Mark VPM read setup as impacting VPM reads, not writes.

Fixes assertion failures if we adjust scheduling priorities to emphasize
VPM reads more.

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 6bba66a..4bb9b3a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -260,10 +260,16 @@ process_waddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 break;
 
 case QPU_W_VPM:
-case QPU_W_VPMVCD_SETUP:
 add_write_dep(state, state-last_vpm, n);
 break;
 
+case QPU_W_VPMVCD_SETUP:
+if (is_a)
+add_write_dep(state, state-last_vpm_read, n);
+else
+add_write_dep(state, state-last_vpm, n);
+break;
+
 case QPU_W_SFU_RECIP:
 case QPU_W_SFU_RECIPSQRT:
 case QPU_W_SFU_EXP:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Refuse to merge instructions involving 32-bit immediate loads.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: cff8c96a0d418f41e00aa97a13dc55e3ed213eb7
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cff8c96a0d418f41e00aa97a13dc55e3ed213eb7

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  9 16:34:37 2014 -0800

vc4: Refuse to merge instructions involving 32-bit immediate loads.

An immediate load overwrites the mul and add operations, so you can't
merge with them.

---

 src/gallium/drivers/vc4/vc4_qpu.c |5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 6daa072..faf8790 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -356,6 +356,11 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 if (qpu_num_sf_accesses(a)  qpu_num_sf_accesses(b))
 return 0;
 
+if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM ||
+QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) {
+return 0;
+}
+
 ok = ok  merge_fields(merge, a, b, QPU_SIG_MASK,
 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Skip raddr dependencies for 32-bit immediate loads.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 45a89237711acff7ee31c854361f8f580ccdcc9f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=45a89237711acff7ee31c854361f8f580ccdcc9f

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  9 14:23:39 2014 -0800

vc4: Skip raddr dependencies for 32-bit immediate loads.

These don't have raddr fields.

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 4bb9b3a..8df816f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -334,8 +334,11 @@ calculate_deps(struct schedule_state *state, struct 
schedule_node *n)
 uint32_t mul_b = QPU_GET_FIELD(inst, QPU_MUL_B);
 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
-process_raddr_deps(state, n, raddr_a, true);
-process_raddr_deps(state, n, raddr_b, false);
+if (sig != QPU_SIG_LOAD_IMM) {
+process_raddr_deps(state, n, raddr_a, true);
+process_raddr_deps(state, n, raddr_b, false);
+}
+
 if (add_op != QPU_A_NOP) {
 process_mux_deps(state, n, add_a);
 process_mux_deps(state, n, add_b);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Do QPU scheduling across uniform loads.

2014-12-09 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 8812dc503eb48bac5b9c9b5740f76025c046f90d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8812dc503eb48bac5b9c9b5740f76025c046f90d

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  9 18:54:29 2014 -0800

vc4: Do QPU scheduling across uniform loads.

This means another pass of reordering the uniform data store, but it lets
us pair up a lot more instructions.

total instructions in shared programs: 44639 - 43176 (-3.28%)
instructions in affected programs: 36938 - 35475 (-3.96%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   88 +++-
 1 file changed, 60 insertions(+), 28 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index c733e6e..0700b0d 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -62,6 +62,12 @@ struct schedule_node {
  * can be consumed.
  */
 uint32_t latency;
+
+/**
+ * Which uniform from uniform_data[] this instruction read, or -1 if
+ * not reading a uniform.
+ */
+int uniform;
 };
 
 struct schedule_node_child {
@@ -80,7 +86,6 @@ struct schedule_state {
 struct schedule_node *last_rb[32];
 struct schedule_node *last_sf;
 struct schedule_node *last_vpm_read;
-struct schedule_node *last_unif_read;
 struct schedule_node *last_tmu_write;
 struct schedule_node *last_tlb;
 struct schedule_node *last_vpm;
@@ -174,9 +179,6 @@ process_raddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 break;
 
 case QPU_R_UNIF:
-add_write_dep(state, state-last_unif_read, n);
-break;
-
 case QPU_R_NOP:
 case QPU_R_ELEM_QPU:
 case QPU_R_XY_PIXEL_COORD:
@@ -215,6 +217,18 @@ is_tmu_write(uint32_t waddr)
 }
 }
 
+static bool
+reads_uniform(uint64_t inst)
+{
+if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM)
+return false;
+
+return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF ||
+QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF ||
+is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
+is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
+}
+
 static void
 process_mux_deps(struct schedule_state *state, struct schedule_node *n,
  uint32_t mux)
@@ -224,17 +238,6 @@ process_mux_deps(struct schedule_state *state, struct 
schedule_node *n,
 }
 
 
-static bool
-is_direct_tmu_read(uint64_t inst)
-{
-/* If it's a direct read, we happen to structure the code such that
- * there's an explicit uniform read in the instruction (for kernel
- * texture reloc processing).
- */
-return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF ||
-QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF);
-}
-
 static void
 process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
uint32_t waddr, bool is_add)
@@ -250,14 +253,6 @@ process_waddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 }
 } else if (is_tmu_write(waddr)) {
 add_write_dep(state, state-last_tmu_write, n);
-
-/* There is an implicit uniform read in texture ops in
- * hardware, unless this is a direct-addressed uniform read,
- * so we need to keep it in the same order as the other
- * uniforms.
- */
-if (!is_direct_tmu_read(n-inst-inst))
-add_write_dep(state, state-last_unif_read, n);
 } else if (qpu_waddr_is_tlb(waddr)) {
 add_write_dep(state, state-last_tlb, n);
 } else {
@@ -509,7 +504,7 @@ get_instruction_priority(uint64_t inst)
 static struct schedule_node *
 choose_instruction_to_schedule(struct choose_scoreboard *scoreboard,
struct simple_node *schedule_list,
-   uint64_t prev_inst)
+   struct schedule_node *prev_inst)
 {
 struct schedule_node *chosen = NULL;
 struct simple_node *node;
@@ -537,8 +532,11 @@ choose_instruction_to_schedule(struct choose_scoreboard 
*scoreboard,
 /* If we're trying to pair with another instruction, check
  * that they're compatible.
  */
-if (prev_inst != 0) {
-inst = qpu_merge_inst(prev_inst, inst);
+if (prev_inst) {
+if (prev_inst-uniform != -1  n-uniform != -1)
+continue;
+
+inst = qpu_merge_inst(prev_inst-inst-inst, inst);
 if (!inst)
 continue;
 }
@@ -668,6 +666,17

Mesa (master): vc4: Drop dependency on r3 for color packing.

2014-12-08 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e

Author: Eric Anholt e...@anholt.net
Date:   Sun Sep  7 14:38:24 2014 -0700

vc4: Drop dependency on r3 for color packing.

We can avoid it by carefully ordering the packing.  This is important as a
step in giving r3 to the register allocator.

total instructions in shared programs: 56087 - 55957 (-0.23%)
instructions in affected programs: 18368 - 18238 (-0.71%)

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c |   31 +++
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 3cb709f..856f844 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -296,17 +296,40 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 
 break;
 
-case QOP_PACK_COLORS:
+case QOP_PACK_COLORS: {
+/* We have to be careful not to start writing over one
+ * of our source values when incrementally writing the
+ * destination.  So, if the dst is one of the srcs, we
+ * pack that one first (and we pack 4 channels at once
+ * for the first pack).
+ */
+struct qpu_reg first_pack = src[0];
+for (int i = 0; i  4; i++) {
+if (src[i].mux == dst.mux 
+src[i].addr == dst.addr) {
+first_pack = dst;
+break;
+}
+}
+queue(c, qpu_m_MOV(dst, first_pack));
+*last_inst(c) |= QPU_PM;
+*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_,
+   QPU_PACK);
+
 for (int i = 0; i  4; i++) {
-queue(c, qpu_m_MOV(qpu_r3(), src[i]));
+if (src[i].mux == first_pack.mux 
+src[i].addr == first_pack.addr) {
+continue;
+}
+
+queue(c, qpu_m_MOV(dst, src[i]));
 *last_inst(c) |= QPU_PM;
 *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A 
+ i,
QPU_PACK);
 }
 
-queue(c, qpu_a_MOV(dst, qpu_r3()));
-
 break;
+}
 
 case QOP_FRAG_X:
 queue(c, qpu_a_ITOF(dst,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Interleave register allocation from regfile A and B.

2014-12-08 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 70dd3df344ddeb4b6d0f2e990dd1afaf4e46e39f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=70dd3df344ddeb4b6d0f2e990dd1afaf4e46e39f

Author: Eric Anholt e...@anholt.net
Date:   Fri Dec  5 17:08:28 2014 -0800

vc4: Interleave register allocation from regfile A and B.

The register allocator prefers low-index registers from vc4_regs[] in the
configuration we're using, which is good because it means we prioritize
allocating the accumulators (which are faster).  On the other hand, it was
causing raddr conflicts because everything beyond r0-r2 ended up in
regfile A until you got massive register pressure.  By interleaving, we
end up getting more instruction pairing from getting non-conflicting
raddrs and QPU_WSes.

total instructions in shared programs: 55957 - 52719 (-5.79%)
instructions in affected programs: 46855 - 43617 (-6.91%)

---

 src/gallium/drivers/vc4/vc4_register_allocate.c |   77 +++
 1 file changed, 38 insertions(+), 39 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c 
b/src/gallium/drivers/vc4/vc4_register_allocate.c
index 72f5271..b62669f 100644
--- a/src/gallium/drivers/vc4/vc4_register_allocate.c
+++ b/src/gallium/drivers/vc4/vc4_register_allocate.c
@@ -36,80 +36,79 @@ static const struct qpu_reg vc4_regs[] = {
 { QPU_MUX_R3, 0},
 { QPU_MUX_R4, 0},
 QPU_R(A, 0),
-QPU_R(A, 1),
-QPU_R(A, 2),
-QPU_R(A, 3),
-QPU_R(A, 4),
-QPU_R(A, 5),
-QPU_R(A, 6),
-QPU_R(A, 7),
-QPU_R(A, 8),
-QPU_R(A, 9),
-QPU_R(A, 10),
-QPU_R(A, 11),
-QPU_R(A, 12),
-QPU_R(A, 13),
-QPU_R(A, 14),
-QPU_R(A, 15),
-QPU_R(A, 16),
-QPU_R(A, 17),
-QPU_R(A, 18),
-QPU_R(A, 19),
-QPU_R(A, 20),
-QPU_R(A, 21),
-QPU_R(A, 22),
-QPU_R(A, 23),
-QPU_R(A, 24),
-QPU_R(A, 25),
-QPU_R(A, 26),
-QPU_R(A, 27),
-QPU_R(A, 28),
-QPU_R(A, 29),
-QPU_R(A, 30),
-QPU_R(A, 31),
 QPU_R(B, 0),
+QPU_R(A, 1),
 QPU_R(B, 1),
+QPU_R(A, 2),
 QPU_R(B, 2),
+QPU_R(A, 3),
 QPU_R(B, 3),
+QPU_R(A, 4),
 QPU_R(B, 4),
+QPU_R(A, 5),
 QPU_R(B, 5),
+QPU_R(A, 6),
 QPU_R(B, 6),
+QPU_R(A, 7),
 QPU_R(B, 7),
+QPU_R(A, 8),
 QPU_R(B, 8),
+QPU_R(A, 9),
 QPU_R(B, 9),
+QPU_R(A, 10),
 QPU_R(B, 10),
+QPU_R(A, 11),
 QPU_R(B, 11),
+QPU_R(A, 12),
 QPU_R(B, 12),
+QPU_R(A, 13),
 QPU_R(B, 13),
+QPU_R(A, 14),
 QPU_R(B, 14),
+QPU_R(A, 15),
 QPU_R(B, 15),
+QPU_R(A, 16),
 QPU_R(B, 16),
+QPU_R(A, 17),
 QPU_R(B, 17),
+QPU_R(A, 18),
 QPU_R(B, 18),
+QPU_R(A, 19),
 QPU_R(B, 19),
+QPU_R(A, 20),
 QPU_R(B, 20),
+QPU_R(A, 21),
 QPU_R(B, 21),
+QPU_R(A, 22),
 QPU_R(B, 22),
+QPU_R(A, 23),
 QPU_R(B, 23),
+QPU_R(A, 24),
 QPU_R(B, 24),
+QPU_R(A, 25),
 QPU_R(B, 25),
+QPU_R(A, 26),
 QPU_R(B, 26),
+QPU_R(A, 27),
 QPU_R(B, 27),
+QPU_R(A, 28),
 QPU_R(B, 28),
+QPU_R(A, 29),
 QPU_R(B, 29),
+QPU_R(A, 30),
 QPU_R(B, 30),
+QPU_R(A, 31),
 QPU_R(B, 31),
 };
 #define ACC_INDEX 0
-#define A_INDEX   (ACC_INDEX + 5)
-#define B_INDEX   (A_INDEX + 32)
+#define AB_INDEX  (ACC_INDEX + 5)
 
 static void
 vc4_alloc_reg_set(struct vc4_context *vc4)
 {
-assert(vc4_regs[A_INDEX].addr == 0);
-assert(vc4_regs[B_INDEX].addr == 0);
-STATIC_ASSERT(ARRAY_SIZE(vc4_regs) == B_INDEX + 32);
+assert(vc4_regs[AB_INDEX].addr == 0);
+assert(vc4_regs[AB_INDEX + 1].addr == 0);
+STATIC_ASSERT(ARRAY_SIZE(vc4_regs) == AB_INDEX + 64);
 
 if (vc4-regs)
 return;
@@ -134,7 +133,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
 }
 
 vc4-reg_class_a = ra_alloc_reg_class(vc4-regs);
-for (uint32_t i = A_INDEX; i  A_INDEX + 32; i++)
+for (uint32_t i = AB_INDEX; i  AB_INDEX + 64; i += 2)
 ra_class_add_reg(vc4-regs, vc4-reg_class_a, i);
 
 ra_set_finalize(vc4-regs, NULL);
@@ -191,13 +190,13 @@ vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c)
 case QOP_FRAG_Z:
 def[inst-dst.index] = 0;
 ra_set_node_reg(g, inst-dst.index,
-B_INDEX + QPU_R_FRAG_PAYLOAD_ZW);
+AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 
1);
 break;
 
 case QOP_FRAG_W:
 def[inst-dst.index] = 0

Mesa (master): vc4: Add support for GL 1.0 logic ops.

2014-12-08 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: dfbf58c439870d46abcc8868b8ca145318aee125
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfbf58c439870d46abcc8868b8ca145318aee125

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  8 12:40:58 2014 -0800

vc4: Add support for GL 1.0 logic ops.

---

 src/gallium/drivers/vc4/vc4_program.c |   62 +++--
 1 file changed, 60 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index d21522f..cea2a49 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -69,6 +69,7 @@ struct vc4_fs_key {
 bool point_coord_upper_left;
 bool light_twoside;
 uint8_t alpha_test_func;
+uint8_t logicop_func;
 uint32_t point_sprite_mask;
 
 struct pipe_rt_blend_state blend;
@@ -1629,6 +1630,46 @@ alpha_test_discard(struct vc4_compile *c)
 }
 }
 
+static struct qreg
+vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst)
+{
+switch (c-fs_key-logicop_func) {
+case PIPE_LOGICOP_CLEAR:
+return qir_uniform_f(c, 0.0);
+case PIPE_LOGICOP_NOR:
+return qir_NOT(c, qir_OR(c, src, dst));
+case PIPE_LOGICOP_AND_INVERTED:
+return qir_AND(c, qir_NOT(c, src), dst);
+case PIPE_LOGICOP_COPY_INVERTED:
+return qir_NOT(c, src);
+case PIPE_LOGICOP_AND_REVERSE:
+return qir_AND(c, src, qir_NOT(c, dst));
+case PIPE_LOGICOP_INVERT:
+return qir_NOT(c, dst);
+case PIPE_LOGICOP_XOR:
+return qir_XOR(c, src, dst);
+case PIPE_LOGICOP_NAND:
+return qir_NOT(c, qir_AND(c, src, dst));
+case PIPE_LOGICOP_AND:
+return qir_AND(c, src, dst);
+case PIPE_LOGICOP_EQUIV:
+return qir_NOT(c, qir_XOR(c, src, dst));
+case PIPE_LOGICOP_NOOP:
+return dst;
+case PIPE_LOGICOP_OR_INVERTED:
+return qir_OR(c, qir_NOT(c, src), dst);
+case PIPE_LOGICOP_OR_REVERSE:
+return qir_OR(c, src, qir_NOT(c, dst));
+case PIPE_LOGICOP_OR:
+return qir_OR(c, src, dst);
+case PIPE_LOGICOP_SET:
+return qir_uniform_ui(c, ~0);
+case PIPE_LOGICOP_COPY:
+default:
+return src;
+}
+}
+
 static void
 emit_frag_end(struct vc4_compile *c)
 {
@@ -1640,8 +1681,11 @@ emit_frag_end(struct vc4_compile *c)
 struct qreg tlb_read_color[4] = { c-undef, c-undef, c-undef, 
c-undef };
 struct qreg dst_color[4] = { c-undef, c-undef, c-undef, c-undef };
 struct qreg linear_dst_color[4] = { c-undef, c-undef, c-undef, 
c-undef };
+struct qreg packed_dst_color = c-undef;
+
 if (c-fs_key-blend.blend_enable ||
-c-fs_key-blend.colormask != 0xf) {
+c-fs_key-blend.colormask != 0xf ||
+c-fs_key-logicop_func != PIPE_LOGICOP_COPY) {
 struct qreg r4 = qir_TLB_COLOR_READ(c);
 for (int i = 0; i  4; i++)
 tlb_read_color[i] = qir_R4_UNPACK(c, r4, i);
@@ -1656,6 +1700,11 @@ emit_frag_end(struct vc4_compile *c)
 linear_dst_color[i] = dst_color[i];
 }
 }
+
+/* Save the packed value for logic ops.  Can't reuse r4
+ * becuase other things might smash it (like sRGB)
+ */
+packed_dst_color = qir_MOV(c, r4);
 }
 
 struct qreg blend_color[4];
@@ -1748,6 +1797,11 @@ emit_frag_end(struct vc4_compile *c)
 packed_color = qir_uniform_ui(c, 0);
 }
 
+
+if (c-fs_key-logicop_func != PIPE_LOGICOP_COPY) {
+packed_color = vc4_logicop(c, packed_color, packed_dst_color);
+}
+
 qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c-undef,
  packed_color, c-undef));
 }
@@ -2254,7 +2308,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t 
prim_mode)
 key-is_lines = (prim_mode = PIPE_PRIM_LINES 
  prim_mode = PIPE_PRIM_LINE_STRIP);
 key-blend = vc4-blend-rt[0];
-
+if (vc4-blend-logicop_enable) {
+key-logicop_func = vc4-blend-logicop_func;
+} else {
+key-logicop_func = PIPE_LOGICOP_COPY;
+}
 if (vc4-framebuffer.cbufs[0])
 key-color_format = vc4-framebuffer.cbufs[0]-format;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix decision for whether the MIN operation writes to the B regfile.

2014-12-08 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 46741c1b872e47a76c152b0c36064f465da6e001
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=46741c1b872e47a76c152b0c36064f465da6e001

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  8 11:27:50 2014 -0800

vc4: Fix decision for whether the MIN operation writes to the B regfile.

---

 src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c |6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index e797c59..f5e152b 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -222,7 +222,7 @@ check_register_write(uint64_t inst,
uint32_t waddr = (is_mul ?
  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
-   bool is_b = is_mul != ((inst  QPU_PM) != 0);
+   bool is_b = is_mul != ((inst  QPU_WS) != 0);
uint32_t live_reg_index;
 
switch (waddr) {
@@ -298,7 +298,7 @@ track_live_clamps(uint64_t inst,
uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
-   bool pm = inst  QPU_PM;
+   bool is_b = inst  QPU_WS;
uint32_t live_reg_index;
 
if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN)
@@ -309,7 +309,7 @@ track_live_clamps(uint64_t inst,
return;
}
 
-   live_reg_index = waddr_to_live_reg_index(waddr_add, pm);
+   live_reg_index = waddr_to_live_reg_index(waddr_add, is_b);
if (live_reg_index != ~0) {
validation_state-live_clamp_offsets[live_reg_index] =
validated_shader-uniforms_size;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add support for TGSI_OPCODE_UCMP.

2014-12-08 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5045d8ca421168a8ba2eb861298fc67be4f1eb03
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5045d8ca421168a8ba2eb861298fc67be4f1eb03

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  8 11:57:15 2014 -0800

vc4: Add support for TGSI_OPCODE_UCMP.

This is being emitted now from st_glsl_to_tgsi.cpp.

---

 src/gallium/drivers/vc4/vc4_program.c |   12 
 1 file changed, 12 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 5fab1de..d21522f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -569,6 +569,17 @@ tgsi_to_qir_cmp(struct vc4_compile *c,
 }
 
 static struct qreg
+tgsi_to_qir_ucmp(struct vc4_compile *c,
+ struct tgsi_full_instruction *tgsi_inst,
+ enum qop op, struct qreg *src, int i)
+{
+qir_SF(c, src[0 * 4 + i]);
+return qir_SEL_X_Y_ZC(c,
+  src[1 * 4 + i],
+  src[2 * 4 + i]);
+}
+
+static struct qreg
 tgsi_to_qir_mad(struct vc4_compile *c,
 struct tgsi_full_instruction *tgsi_inst,
 enum qop op, struct qreg *src, int i)
@@ -1292,6 +1303,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
 [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
 
 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
+[TGSI_OPCODE_UCMP] = { 0, tgsi_to_qir_ucmp },
 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add a debug flag for waiting for sync on submit.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 34cf86bdc48e1b350437fe831fedd4632f4e06c2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=34cf86bdc48e1b350437fe831fedd4632f4e06c2

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  2 13:18:56 2014 -0800

vc4: Add a debug flag for waiting for sync on submit.

This is nice when you're tracking down which command list is hanging the
GPU.

---

 src/gallium/drivers/vc4/vc4_context.c |8 
 src/gallium/drivers/vc4/vc4_screen.c  |2 ++
 src/gallium/drivers/vc4/vc4_screen.h  |1 +
 3 files changed, 11 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index bb30c0e..b26c071 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -324,6 +324,14 @@ vc4_flush(struct pipe_context *pctx)
 
 vc4-last_emit_seqno = submit.seqno;
 
+if (vc4_debug  VC4_DEBUG_ALWAYS_SYNC) {
+if (!vc4_wait_seqno(vc4-screen, vc4-last_emit_seqno,
+PIPE_TIMEOUT_INFINITE)) {
+fprintf(stderr, Wait failed.\n);
+abort();
+}
+}
+
 vc4_reset_cl(vc4-bcl);
 vc4_reset_cl(vc4-rcl);
 vc4_reset_cl(vc4-shader_rec);
diff --git a/src/gallium/drivers/vc4/vc4_screen.c 
b/src/gallium/drivers/vc4/vc4_screen.c
index abd44ee..18451bd 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -52,6 +52,8 @@ static const struct debug_named_value debug_options[] = {
   Skip actual hardware execution of commands },
 { always_flush, VC4_DEBUG_ALWAYS_FLUSH,
   Flush after each draw call },
+{ always_sync, VC4_DEBUG_ALWAYS_SYNC,
+  Wait for finish after each flush },
 { NULL }
 };
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.h 
b/src/gallium/drivers/vc4/vc4_screen.h
index ba07490..3819bda 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -37,6 +37,7 @@ struct vc4_bo;
 #define VC4_DEBUG_PERF  0x0020
 #define VC4_DEBUG_NORAST0x0040
 #define VC4_DEBUG_ALWAYS_FLUSH 0x0080
+#define VC4_DEBUG_ALWAYS_SYNC  0x0100
 
 #define VC4_MAX_MIP_LEVELS 12
 #define VC4_MAX_TEXTURE_SAMPLERS 16

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix inverted priority of instructions for QPU scheduling.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 042962df2d058c4dd4e45b7deaa3b4519141758e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=042962df2d058c4dd4e45b7deaa3b4519141758e

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  2 16:31:29 2014 -0800

vc4: Fix inverted priority of instructions for QPU scheduling.

We were scheduling TLB operations as early as possible, and texture setup
as late as possible.  When I introduced prioritization, I visually
inspected that an independent operation got moved above texture results
collection, which tricked me into thinking it was working (but it was just
because texture setup was being pushed late).

total instructions in shared programs: 57651 - 57486 (-0.29%)
instructions in affected programs: 18532 - 18367 (-0.89%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 8aa8374..2b0a632 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -439,24 +439,24 @@ get_instruction_priority(uint64_t inst)
 uint32_t baseline_score;
 uint32_t next_score = 0;
 
-/* Schedule texture read setup early to hide their latency better. */
-if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul))
+/* Schedule TLB operations as late as possible, to get more
+ * parallelism between shaders.
+ */
+if (qpu_inst_is_tlb(inst))
 return next_score;
 next_score++;
 
-/* Default score for things that aren't otherwise special. */
-baseline_score = next_score;
-next_score++;
-
 /* Schedule texture read results collection late to hide latency. */
 if (sig == QPU_SIG_LOAD_TMU0 || sig == QPU_SIG_LOAD_TMU1)
 return next_score;
 next_score++;
 
-/* Schedule TLB operations as late as possible, to get more
- * parallelism between shaders.
- */
-if (qpu_inst_is_tlb(inst))
+/* Default score for things that aren't otherwise special. */
+baseline_score = next_score;
+next_score++;
+
+/* Schedule texture read setup early to hide their latency better. */
+if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul))
 return next_score;
 next_score++;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Refuse to merge two ops that both access shared functions.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: bd4057a5d74fd1801c55ee98346af9c1095d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd4057a5d74fd1801c55ee98346af9c1095d

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  2 16:23:40 2014 -0800

vc4: Refuse to merge two ops that both access shared functions.

Avoids assertion failures in vc4_qpu_validate.c if we happen to find the
right set of operations available.

---

 src/gallium/drivers/vc4/vc4_qpu.c  |   53 
 src/gallium/drivers/vc4/vc4_qpu.h  |1 +
 src/gallium/drivers/vc4/vc4_qpu_validate.c |   37 +--
 3 files changed, 55 insertions(+), 36 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 54c79e9..fc5d4b0 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -209,6 +209,56 @@ merge_fields(uint64_t *merge,
 return true;
 }
 
+int
+qpu_num_sf_accesses(uint64_t inst)
+{
+int accesses = 0;
+static const uint32_t specials[] = {
+QPU_W_TLB_COLOR_MS,
+QPU_W_TLB_COLOR_ALL,
+QPU_W_TLB_Z,
+QPU_W_TMU0_S,
+QPU_W_TMU0_T,
+QPU_W_TMU0_R,
+QPU_W_TMU0_B,
+QPU_W_TMU1_S,
+QPU_W_TMU1_T,
+QPU_W_TMU1_R,
+QPU_W_TMU1_B,
+QPU_W_SFU_RECIP,
+QPU_W_SFU_RECIPSQRT,
+QPU_W_SFU_EXP,
+QPU_W_SFU_LOG,
+};
+uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
+uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
+uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
+uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
+
+for (int j = 0; j  ARRAY_SIZE(specials); j++) {
+if (waddr_add == specials[j])
+accesses++;
+if (waddr_mul == specials[j])
+accesses++;
+}
+
+if (raddr_a == QPU_R_MUTEX_ACQUIRE)
+accesses++;
+if (raddr_b == QPU_R_MUTEX_ACQUIRE)
+accesses++;
+
+/* XXX: semaphore, combined color read/write? */
+switch (QPU_GET_FIELD(inst, QPU_SIG)) {
+case QPU_SIG_COLOR_LOAD:
+case QPU_SIG_COLOR_LOAD_END:
+case QPU_SIG_LOAD_TMU0:
+case QPU_SIG_LOAD_TMU1:
+accesses++;
+}
+
+return accesses;
+}
+
 uint64_t
 qpu_merge_inst(uint64_t a, uint64_t b)
 {
@@ -223,6 +273,9 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
 return 0;
 
+if (qpu_num_sf_accesses(a)  qpu_num_sf_accesses(b))
+return 0;
+
 ok = ok  merge_fields(merge, a, b, QPU_SIG_MASK,
 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h 
b/src/gallium/drivers/vc4/vc4_qpu.h
index eb06d1a..e1307eb 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -137,6 +137,7 @@ uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
 
 bool qpu_waddr_is_tlb(uint32_t waddr);
 bool qpu_inst_is_tlb(uint64_t inst);
+int qpu_num_sf_accesses(uint64_t inst);
 void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
 
 static inline uint64_t
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c 
b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index 16de82a..ffd1b47 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -255,42 +255,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
  */
 for (int i = 0; i  num_inst - 1; i++) {
 uint64_t inst = insts[i];
-int accesses = 0;
-static const uint32_t specials[] = {
-QPU_W_TLB_COLOR_MS,
-QPU_W_TLB_COLOR_ALL,
-QPU_W_TLB_Z,
-QPU_W_TMU0_S,
-QPU_W_TMU0_T,
-QPU_W_TMU0_R,
-QPU_W_TMU0_B,
-QPU_W_TMU1_S,
-QPU_W_TMU1_T,
-QPU_W_TMU1_R,
-QPU_W_TMU1_B,
-QPU_W_SFU_RECIP,
-QPU_W_SFU_RECIPSQRT,
-QPU_W_SFU_EXP,
-QPU_W_SFU_LOG,
-};
-
-for (int j = 0; j  ARRAY_SIZE(specials); j++) {
-if (writes_reg(inst, specials[j]))
-accesses++;
-}
-
-if (reads_reg(inst, QPU_R_MUTEX_ACQUIRE))
-accesses++;
-
-/* XXX: semaphore, combined color read/write? */
-switch (QPU_GET_FIELD(inst, QPU_SIG

Mesa (master): vc4: Allow dead code elimination of color reads.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: dadc32ac8072cf78b405d1b54414e1f020b0de41
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dadc32ac8072cf78b405d1b54414e1f020b0de41

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  2 12:58:27 2014 -0800

vc4: Allow dead code elimination of color reads.

This might happen if the blending functions are set up to not actually use
the destination color/alpha, for example.

---

 src/gallium/drivers/vc4/vc4_qir.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qir.c 
b/src/gallium/drivers/vc4/vc4_qir.c
index cd731bc..c43b9b6 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -80,7 +80,7 @@ static const struct qir_op_info qir_op_info[] = {
 [QOP_TLB_STENCIL_SETUP] = { tlb_stencil_setup, 0, 1, true },
 [QOP_TLB_Z_WRITE] = { tlb_z, 0, 1, true },
 [QOP_TLB_COLOR_WRITE] = { tlb_color, 0, 1, true },
-[QOP_TLB_COLOR_READ] = { tlb_color_read, 1, 0, true },
+[QOP_TLB_COLOR_READ] = { tlb_color_read, 1, 0 },
 [QOP_VARY_ADD_C] = { vary_add_c, 1, 1 },
 
 [QOP_FRAG_X] = { frag_x, 1, 0 },

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add separate write-after-read dependency tracking for pairing.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6f32deb538b1b62ff6d5d1212105bbe8d6adce72
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f32deb538b1b62ff6d5d1212105bbe8d6adce72

Author: Eric Anholt e...@anholt.net
Date:   Tue Dec  2 15:42:58 2014 -0800

vc4: Add separate write-after-read dependency tracking for pairing.

If an operation is the last one to read a register, the instruction
containing it can also include the op that has the next write to that
register.

total instructions in shared programs: 57486 - 56995 (-0.85%)
instructions in affected programs: 43004 - 42513 (-1.14%)

---

 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   78 +---
 1 file changed, 58 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 2b0a632..6bba66a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -40,16 +40,23 @@
 
 static bool debug;
 
+struct schedule_node_child;
+
 struct schedule_node {
 struct simple_node link;
 struct queued_qpu_inst *inst;
-struct schedule_node **children;
+struct schedule_node_child *children;
 uint32_t child_count;
 uint32_t child_array_size;
 uint32_t parent_count;
 uint32_t delay;
 };
 
+struct schedule_node_child {
+struct schedule_node *node;
+bool write_after_read;
+};
+
 /* When walking the instructions in reverse, we need to swap before/after in
  * add_dep().
  */
@@ -71,8 +78,11 @@ struct schedule_state {
 static void
 add_dep(struct schedule_state *state,
 struct schedule_node *before,
-struct schedule_node *after)
+struct schedule_node *after,
+bool write)
 {
+bool write_after_read = !write  state-dir == R;
+
 if (!before || !after)
 return;
 
@@ -85,28 +95,40 @@ add_dep(struct schedule_state *state,
 }
 
 for (int i = 0; i  before-child_count; i++) {
-if (before-children[i] == after)
+if (before-children[i].node == after 
+(before-children[i].write_after_read == 
write_after_read)) {
 return;
+}
 }
 
 if (before-child_array_size = before-child_count) {
 before-child_array_size = MAX2(before-child_array_size * 2, 
16);
 before-children = reralloc(before, before-children,
-struct schedule_node *,
+struct schedule_node_child,
 before-child_array_size);
 }
 
-before-children[before-child_count] = after;
+before-children[before-child_count].node = after;
+before-children[before-child_count].write_after_read =
+write_after_read;
 before-child_count++;
 after-parent_count++;
 }
 
 static void
+add_read_dep(struct schedule_state *state,
+  struct schedule_node *before,
+  struct schedule_node *after)
+{
+add_dep(state, before, after, false);
+}
+
+static void
 add_write_dep(struct schedule_state *state,
   struct schedule_node **before,
   struct schedule_node *after)
 {
-add_dep(state, *before, after);
+add_dep(state, *before, after, true);
 *before = after;
 }
 
@@ -152,9 +174,9 @@ process_raddr_deps(struct schedule_state *state, struct 
schedule_node *n,
 default:
 if (raddr  32) {
 if (is_a)
-add_dep(state, state-last_ra[raddr], n);
+add_read_dep(state, state-last_ra[raddr], n);
 else
-add_dep(state, state-last_rb[raddr], n);
+add_read_dep(state, state-last_rb[raddr], n);
 } else {
 fprintf(stderr, unknown raddr %d\n, raddr);
 abort();
@@ -186,7 +208,7 @@ process_mux_deps(struct schedule_state *state, struct 
schedule_node *n,
  uint32_t mux)
 {
 if (mux != QPU_MUX_A  mux != QPU_MUX_B)
-add_dep(state, state-last_r[mux], n);
+add_read_dep(state, state-last_r[mux], n);
 }
 
 
@@ -278,7 +300,7 @@ process_cond_deps(struct schedule_state *state, struct 
schedule_node *n,
 case QPU_COND_ALWAYS:
 break;
 default:
-add_dep(state, state-last_sf, n);
+add_read_dep(state, state-last_sf, n);
 break;
 }
 }
@@ -339,7 +361,7 @@ calculate_deps(struct schedule_state *state, struct 
schedule_node *n)
 break;
 
 case QPU_SIG_COLOR_LOAD:
-add_dep(state, state-last_tlb, n);
+add_read_dep(state, state-last_tlb, n

Mesa (master): vc4: Try swapping the regfile A to B to pair instructions.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: befdff81423a1b6a05969dfde59bfa9c521c4621
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=befdff81423a1b6a05969dfde59bfa9c521c4621

Author: Eric Anholt e...@anholt.net
Date:   Fri Dec  5 13:23:17 2014 -0800

vc4: Try swapping the regfile A to B to pair instructions.

total instructions in shared programs: 56995 - 56087 (-1.59%)
instructions in affected programs: 40503 - 39595 (-2.24%)

---

 src/gallium/drivers/vc4/vc4_qpu.c |   64 +++--
 1 file changed, 62 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 0e38890..6daa072 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -290,6 +290,55 @@ qpu_waddr_ignores_pm(uint32_t waddr)
 return false;
 }
 
+static void
+swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)
+{
+uint64_t mux_mask = (uint64_t)0x7  mux_shift;
+uint64_t mux_a_val = (uint64_t)QPU_MUX_A  mux_shift;
+uint64_t mux_b_val = (uint64_t)QPU_MUX_B  mux_shift;
+
+if ((*a  mux_mask) == mux_a_val) {
+*a = (*a  ~mux_mask) | mux_b_val;
+*merge = (*merge  ~mux_mask) | mux_b_val;
+}
+}
+
+static bool
+try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)
+{
+uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);
+uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);
+uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);
+uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);
+
+if (raddr_a_b != QPU_R_NOP)
+return false;
+
+switch (raddr_a_a) {
+case QPU_R_UNIF:
+case QPU_R_VARY:
+break;
+default:
+return false;
+}
+
+if (raddr_b_b != QPU_R_NOP 
+raddr_b_b != raddr_a_a)
+return false;
+
+/* Move raddr A to B in instruction a. */
+*a = (*a  ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);
+*a = (*a  ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);
+*merge = ((*merge  ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(raddr_b_a, 
QPU_RADDR_A));
+*merge = ((*merge  ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, 
QPU_RADDR_B));
+swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);
+swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);
+swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);
+swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);
+
+return true;
+}
+
 uint64_t
 qpu_merge_inst(uint64_t a, uint64_t b)
 {
@@ -314,8 +363,19 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 ok = ok  merge_fields(merge, a, b, QPU_SF | QPU_PM,
 ~0);
 
-ok = ok  merge_fields(merge, a, b, QPU_RADDR_A_MASK,
-QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
+if (!merge_fields(merge, a, b, QPU_RADDR_A_MASK,
+  QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {
+/* Since we tend to use regfile A by default both for register
+ * allocation and for our special values (uniforms and
+ * varyings), try swapping uniforms and varyings to regfile B
+ * to resolve raddr A conflicts.
+ */
+if (!try_swap_ra_file(merge, a, b) 
+!try_swap_ra_file(merge, b, a)) {
+return 0;
+}
+}
+
 ok = ok  merge_fields(merge, a, b, QPU_RADDR_B_MASK,
 QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Allow pairing of some instructions that disagree about the WS bit.

2014-12-05 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 7d8b79f398f18ed7bb48a74b1b82950e2f08abad
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7d8b79f398f18ed7bb48a74b1b82950e2f08abad

Author: Eric Anholt e...@anholt.net
Date:   Fri Dec  5 12:34:30 2014 -0800

vc4: Allow pairing of some instructions that disagree about the WS bit.

No difference on shader-db because we tend to have a lot of other
conflicts going on as well (like RADDR_A disagreements)

---

 src/gallium/drivers/vc4/vc4_qpu.c |   48 -
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index fc5d4b0..0e38890 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -259,6 +259,37 @@ qpu_num_sf_accesses(uint64_t inst)
 return accesses;
 }
 
+static bool
+qpu_waddr_ignores_pm(uint32_t waddr)
+{
+switch(waddr) {
+case QPU_W_ACC0:
+case QPU_W_ACC1:
+case QPU_W_ACC2:
+case QPU_W_ACC3:
+case QPU_W_TLB_Z:
+case QPU_W_TLB_COLOR_MS:
+case QPU_W_TLB_COLOR_ALL:
+case QPU_W_TLB_ALPHA_MASK:
+case QPU_W_VPM:
+case QPU_W_SFU_RECIP:
+case QPU_W_SFU_RECIPSQRT:
+case QPU_W_SFU_EXP:
+case QPU_W_SFU_LOG:
+case QPU_W_TMU0_S:
+case QPU_W_TMU0_T:
+case QPU_W_TMU0_R:
+case QPU_W_TMU0_B:
+case QPU_W_TMU1_S:
+case QPU_W_TMU1_T:
+case QPU_W_TMU1_R:
+case QPU_W_TMU1_B:
+return true;
+}
+
+return false;
+}
+
 uint64_t
 qpu_merge_inst(uint64_t a, uint64_t b)
 {
@@ -280,7 +311,7 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 
 /* Misc fields that have to match exactly. */
-ok = ok  merge_fields(merge, a, b, QPU_SF | QPU_WS | QPU_PM,
+ok = ok  merge_fields(merge, a, b, QPU_SF | QPU_PM,
 ~0);
 
 ok = ok  merge_fields(merge, a, b, QPU_RADDR_A_MASK,
@@ -293,6 +324,21 @@ qpu_merge_inst(uint64_t a, uint64_t b)
 ok = ok  merge_fields(merge, a, b, QPU_WADDR_MUL_MASK,
 QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
 
+/* Allow disagreement on WS (swapping A vs B physical reg file as the
+ * destination for ADD/MUL) if one of the original instructions
+ * ignores it (probably because it's just writing to accumulators).
+ */
+if (qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_ADD)) 
+qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {
+merge = (merge  ~QPU_WS) | (b  QPU_WS);
+} else if (qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_ADD)) 
+   qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {
+merge = (merge  ~QPU_WS) | (a  QPU_WS);
+} else {
+if ((a  QPU_WS) != (b  QPU_WS))
+return 0;
+}
+
 if (ok)
 return merge;
 else

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix assertion about SFU versus texturing.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: a7b1a93137f4441822760f1cfe261065474163ab
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a7b1a93137f4441822760f1cfe261065474163ab

Author: Eric Anholt e...@anholt.net
Date:   Sun Nov 30 13:25:16 2014 -0800

vc4: Fix assertion about SFU versus texturing.

We're supposed to be checking that nothing else writes r4, which is done
by the TMU result collection signal, not the coordinate setup.

Avoids a regression when QPU instruction scheduling is introduced.

---

 src/gallium/drivers/vc4/vc4_qpu_validate.c |7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c 
b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index 8fe5f41..16de82a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -202,12 +202,13 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
 int last_sfu_inst = -10;
 for (int i = 0; i  num_inst - 1; i++) {
 uint64_t inst = insts[i];
+uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
 
 assert(i - last_sfu_inst  2 ||
(!writes_sfu(inst) 
-!writes_reg(inst, QPU_W_TMU0_S) 
-!writes_reg(inst, QPU_W_TMU1_S) 
-QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD));
+sig != QPU_SIG_LOAD_TMU0 
+sig != QPU_SIG_LOAD_TMU1 
+sig != QPU_SIG_COLOR_LOAD));
 
 if (writes_sfu(inst))
 last_sfu_inst = i;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Drop the explicit scoreboard wait.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6958c404caf3f4b2219ef686e2beeeaf48664905
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6958c404caf3f4b2219ef686e2beeeaf48664905

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 26 12:51:27 2014 -0800

vc4: Drop the explicit scoreboard wait.

This is actually implicitly handled by the TLB operations.

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c |   23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 3d7bdb4..e6e97cc 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -130,7 +130,6 @@ static void
 serialize_insts(struct vc4_compile *c)
 {
 int last_sfu_write = -10;
-bool scoreboard_wait_emitted = false;
 
 while (!is_empty_list(c-qpu_inst_list)) {
 struct queued_qpu_inst *q =
@@ -210,22 +209,18 @@ serialize_insts(struct vc4_compile *c)
  *  explicit Wait for Scoreboard signal or an implicit wait
  *  with the first tile-buffer read or write instruction.
  */
-if (!scoreboard_wait_emitted 
-(waddr_a == QPU_W_TLB_Z || waddr_m == QPU_W_TLB_Z ||
- waddr_a == QPU_W_TLB_COLOR_MS ||
- waddr_m == QPU_W_TLB_COLOR_MS ||
- waddr_a == QPU_W_TLB_COLOR_ALL ||
- waddr_m == QPU_W_TLB_COLOR_ALL ||
- QPU_GET_FIELD(q-inst, QPU_SIG) == QPU_SIG_COLOR_LOAD)) {
+if (waddr_a == QPU_W_TLB_Z ||
+waddr_m == QPU_W_TLB_Z ||
+waddr_a == QPU_W_TLB_COLOR_MS ||
+waddr_m == QPU_W_TLB_COLOR_MS ||
+waddr_a == QPU_W_TLB_COLOR_ALL ||
+waddr_m == QPU_W_TLB_COLOR_ALL ||
+QPU_GET_FIELD(q-inst, QPU_SIG) == QPU_SIG_COLOR_LOAD) {
 while (c-qpu_inst_count  3 ||
QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 
1],
  QPU_SIG) != QPU_SIG_NONE) {
 serialize_one_inst(c, qpu_NOP());
 }
-c-qpu_insts[c-qpu_inst_count - 1] =
-qpu_set_sig(c-qpu_insts[c-qpu_inst_count - 
1],
-QPU_SIG_WAIT_FOR_SCOREBOARD);
-scoreboard_wait_emitted = true;
 }
 
 serialize_one_inst(c, q-inst);
@@ -616,6 +611,10 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 serialize_one_inst(c, qpu_NOP());
 }
 
+/* thread end can't have TLB operations */
+if (qpu_inst_is_tlb(c-qpu_insts[c-qpu_inst_count - 1]))
+serialize_one_inst(c, qpu_NOP());
+
 c-qpu_insts[c-qpu_inst_count - 1] =
 qpu_set_sig(c-qpu_insts[c-qpu_inst_count - 1],
 QPU_SIG_PROG_END);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Also deal with VPM reads at thread end.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 334036fb640741e51ecc54b823866710e99c853d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=334036fb640741e51ecc54b823866710e99c853d

Author: Eric Anholt e...@anholt.net
Date:   Sun Nov 30 15:13:40 2014 -0800

vc4: Also deal with VPM reads at thread end.

Prevents a regression with QPU scheduling, which happens to put the no-op
reads for unused VPM contents end up at the end of the program.

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 1d12d11..3d7bdb4 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -596,11 +596,15 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 
 serialize_insts(c);
 
-/* thread end can't have VPM write */
+/* thread end can't have VPM write or read */
 if (QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1],
   QPU_WADDR_ADD) == QPU_W_VPM ||
 QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1],
-  QPU_WADDR_MUL) == QPU_W_VPM) {
+  QPU_WADDR_MUL) == QPU_W_VPM ||
+QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1],
+  QPU_RADDR_A) == QPU_R_VPM ||
+QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1],
+  QPU_RADDR_B) == QPU_R_VPM) {
 serialize_one_inst(c, qpu_NOP());
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add another check for invalid TLB scoreboard handling.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 2d5784c8254b4a0e3e04dd0f1e46ab1eb85612dd
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d5784c8254b4a0e3e04dd0f1e46ab1eb85612dd

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 26 17:01:59 2014 -0800

vc4: Add another check for invalid TLB scoreboard handling.

This was caught by an assertion in the simulator.

---

 src/gallium/drivers/vc4/vc4_qpu.c  |   23 +++
 src/gallium/drivers/vc4/vc4_qpu.h  |3 +++
 src/gallium/drivers/vc4/vc4_qpu_validate.c |   21 +
 3 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index a551a0f..093ca07 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -244,3 +244,26 @@ qpu_set_cond_mul(uint64_t inst, uint32_t sig)
 return (inst  ~QPU_COND_MUL_MASK) | QPU_SET_FIELD(sig, QPU_COND_MUL);
 }
 
+bool
+qpu_waddr_is_tlb(uint32_t waddr)
+{
+switch (waddr) {
+case QPU_W_TLB_COLOR_ALL:
+case QPU_W_TLB_COLOR_MS:
+case QPU_W_TLB_Z:
+return true;
+default:
+return false;
+}
+}
+
+bool
+qpu_inst_is_tlb(uint64_t inst)
+{
+uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
+
+return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||
+qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||
+sig == QPU_SIG_COLOR_LOAD ||
+sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
+}
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h 
b/src/gallium/drivers/vc4/vc4_qpu.h
index 2f37108..5f4caab 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -133,6 +133,9 @@ uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
 uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
 uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
 
+bool qpu_waddr_is_tlb(uint32_t waddr);
+bool qpu_inst_is_tlb(uint64_t inst);
+
 static inline uint64_t
 qpu_load_imm_f(struct qpu_reg dst, float val)
 {
diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c 
b/src/gallium/drivers/vc4/vc4_qpu_validate.c
index d043733..8fe5f41 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_validate.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c
@@ -91,11 +91,17 @@ writes_sfu(uint64_t inst)
 void
 vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
 {
+bool scoreboard_locked = false;
+
 for (int i = 0; i  num_inst; i++) {
 uint64_t inst = insts[i];
 
-if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END)
+if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
+if (qpu_inst_is_tlb(inst))
+scoreboard_locked = true;
+
 continue;
+}
 
 /* The Thread End instruction must not write to either 
physical
  *  regfile A or B.
@@ -103,6 +109,11 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
 assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) = 32);
 assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) = 32);
 
+/* Can't trigger an implicit wait on scoreboard in the program
+ * end instruction.
+ */
+assert(!qpu_inst_is_tlb(inst) || scoreboard_locked);
+
 /* Two delay slots will be executed. */
 assert(i + 2 = num_inst);
 
@@ -141,13 +152,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
 for (int i = 0; i  2; i++) {
 uint64_t inst = insts[i];
 
-assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD);
-assert(QPU_GET_FIELD(inst, QPU_SIG) !=
-   QPU_SIG_WAIT_FOR_SCOREBOARD);
-assert(!writes_reg(inst, QPU_W_TLB_COLOR_MS));
-assert(!writes_reg(inst, QPU_W_TLB_COLOR_ALL));
-assert(!writes_reg(inst, QPU_W_TLB_Z));
-
+assert(!qpu_inst_is_tlb(inst));
 }
 
 /* If TMU_NOSWAP is written, the write must be three instructions

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Introduce scheduling of QPU instructions.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 3fe4d8e1e39b47c9c5c4bfdd87300abd0c336a7e
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=3fe4d8e1e39b47c9c5c4bfdd87300abd0c336a7e

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 26 12:44:19 2014 -0800

vc4: Introduce scheduling of QPU instructions.

This doesn't reschedule much currently, just tries to fit things into the
regfile A/B write-versus-read slots (the cause of the improvements in
shader-db), and hide texture fetch latency by scheduling setup early and
results collection late (haven't performance tested it).  This
infrastructure will be important for doing instruction pairing, though.

shader-db2 results:
total instructions in shared programs: 61874 - 59583 (-3.70%)
instructions in affected programs: 50677 - 48386 (-4.52%)

---

 src/gallium/drivers/vc4/Makefile.sources   |1 +
 src/gallium/drivers/vc4/vc4_qir.h  |7 +
 src/gallium/drivers/vc4/vc4_qpu.c  |   12 +
 src/gallium/drivers/vc4/vc4_qpu.h  |3 +
 src/gallium/drivers/vc4/vc4_qpu_emit.c |  132 +-
 src/gallium/drivers/vc4/vc4_qpu_schedule.c |  693 
 6 files changed, 722 insertions(+), 126 deletions(-)

diff --git a/src/gallium/drivers/vc4/Makefile.sources 
b/src/gallium/drivers/vc4/Makefile.sources
index 6ec48ab..6bcb731 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -24,6 +24,7 @@ C_SOURCES := \
vc4_qpu_disasm.c \
vc4_qpu_emit.c \
vc4_qpu.h \
+   vc4_qpu_schedule.c \
vc4_qpu_validate.c \
vc4_query.c \
vc4_register_allocate.c \
diff --git a/src/gallium/drivers/vc4/vc4_qir.h 
b/src/gallium/drivers/vc4/vc4_qir.h
index cb02db5..0b76a2f 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -148,6 +148,11 @@ struct simple_node {
 struct simple_node *prev;
 };
 
+struct queued_qpu_inst {
+struct simple_node link;
+uint64_t inst;
+};
+
 struct qinst {
 struct simple_node link;
 
@@ -368,6 +373,8 @@ bool qir_opt_copy_propagation(struct vc4_compile *c);
 bool qir_opt_cse(struct vc4_compile *c);
 bool qir_opt_dead_code(struct vc4_compile *c);
 
+void qpu_schedule_instructions(struct vc4_compile *c);
+
 #define QIR_ALU0(name)   \
 static inline struct qreg\
 qir_##name(struct vc4_compile *c)\
diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 093ca07..723b361 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -22,6 +22,7 @@
  */
 
 #include stdbool.h
+#include vc4_qir.h
 #include vc4_qpu.h
 
 static uint64_t
@@ -267,3 +268,14 @@ qpu_inst_is_tlb(uint64_t inst)
 sig == QPU_SIG_COLOR_LOAD ||
 sig == QPU_SIG_WAIT_FOR_SCOREBOARD);
 }
+
+void
+qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)
+{
+if (c-qpu_inst_count = c-qpu_inst_size) {
+c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2);
+c-qpu_insts = realloc(c-qpu_insts,
+   c-qpu_inst_size * sizeof(uint64_t));
+}
+c-qpu_insts[c-qpu_inst_count++] = inst;
+}
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h 
b/src/gallium/drivers/vc4/vc4_qpu.h
index 5f4caab..bf41f72 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -30,6 +30,8 @@
 
 #include vc4_qpu_defines.h
 
+struct vc4_compile;
+
 struct qpu_reg {
 enum qpu_mux mux;
 uint8_t addr;
@@ -135,6 +137,7 @@ uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond);
 
 bool qpu_waddr_is_tlb(uint32_t waddr);
 bool qpu_inst_is_tlb(uint64_t inst);
+void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst);
 
 static inline uint64_t
 qpu_load_imm_f(struct qpu_reg dst, float val)
diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index e6e97cc..3cb709f 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -41,11 +41,6 @@ vc4_dump_program(struct vc4_compile *c)
 }
 }
 
-struct queued_qpu_inst {
-struct simple_node link;
-uint64_t inst;
-};
-
 static void
 queue(struct vc4_compile *c, uint64_t inst)
 {
@@ -115,121 +110,6 @@ fixup_raddr_conflict(struct vc4_compile *c,
 *src1 = qpu_r3();
 }
 
-static void
-serialize_one_inst(struct vc4_compile *c, uint64_t inst)
-{
-if (c-qpu_inst_count = c-qpu_inst_size) {
-c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2);
-c-qpu_insts = realloc(c-qpu_insts,
-   c-qpu_inst_size * sizeof(uint64_t));
-}
-c-qpu_insts[c-qpu_inst_count++] = inst;
-}
-
-static void
-serialize_insts(struct vc4_compile *c)
-{
-int last_sfu_write

Mesa (master): vc4: Pair up QPU instructions when scheduling.

2014-12-01 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 29c7cf2b2ba850cf467167548d53383e1338fd5c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=29c7cf2b2ba850cf467167548d53383e1338fd5c

Author: Eric Anholt e...@anholt.net
Date:   Mon Dec  1 11:48:20 2014 -0800

vc4: Pair up QPU instructions when scheduling.

We've got two mostly-independent operations in each QPU instruction, so
try to pack two operations together.  This is fairly naive (doesn't track
read and write separately in instructions, doesn't convert ADD-based MOVs
into MUL-based movs, doesn't reorder across uniform loads), but does show
a decent improvement on shader-db-2.

total instructions in shared programs: 59583 - 57651 (-3.24%)
instructions in affected programs: 47361 - 45429 (-4.08%)

---

 src/gallium/drivers/vc4/vc4_qpu.c  |   62 +++---
 src/gallium/drivers/vc4/vc4_qpu.h  |2 +-
 src/gallium/drivers/vc4/vc4_qpu_schedule.c |   79 ++--
 3 files changed, 105 insertions(+), 38 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu.c 
b/src/gallium/drivers/vc4/vc4_qpu.c
index 723b361..54c79e9 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.c
+++ b/src/gallium/drivers/vc4/vc4_qpu.c
@@ -192,36 +192,58 @@ qpu_m_alu2(enum qpu_op_mul op,
 return inst;
 }
 
-static uint64_t
-merge_fields(uint64_t merge,
- uint64_t add, uint64_t mul,
+static bool
+merge_fields(uint64_t *merge,
+ uint64_t a, uint64_t b,
  uint64_t mask, uint64_t ignore)
 {
-if ((add  mask) == ignore)
-return (merge  ~mask) | (mul  mask);
-else if ((mul  mask) == ignore)
-return (merge  ~mask) | (add  mask);
-else {
-assert((add  mask) == (mul  mask));
-return merge;
+if ((a  mask) == ignore) {
+*merge = (*merge  ~mask) | (b  mask);
+} else if ((b  mask) == ignore) {
+*merge = (*merge  ~mask) | (a  mask);
+} else {
+if ((a  mask) != (b  mask))
+return false;
 }
+
+return true;
 }
 
 uint64_t
-qpu_inst(uint64_t add, uint64_t mul)
+qpu_merge_inst(uint64_t a, uint64_t b)
 {
-uint64_t merge = ((add  ~QPU_WADDR_MUL_MASK) |
-  (mul  ~QPU_WADDR_ADD_MASK));
+uint64_t merge = a | b;
+bool ok = true;
+
+if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP 
+QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP)
+return 0;
 
-merge = merge_fields(merge, add, mul, QPU_SIG_MASK,
- QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
+if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP 
+QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)
+return 0;
 
-merge = merge_fields(merge, add, mul, QPU_RADDR_A_MASK,
- QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
-merge = merge_fields(merge, add, mul, QPU_RADDR_B_MASK,
- QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
+ok = ok  merge_fields(merge, a, b, QPU_SIG_MASK,
+QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));
 
-return merge;
+/* Misc fields that have to match exactly. */
+ok = ok  merge_fields(merge, a, b, QPU_SF | QPU_WS | QPU_PM,
+~0);
+
+ok = ok  merge_fields(merge, a, b, QPU_RADDR_A_MASK,
+QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A));
+ok = ok  merge_fields(merge, a, b, QPU_RADDR_B_MASK,
+QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));
+
+ok = ok  merge_fields(merge, a, b, QPU_WADDR_ADD_MASK,
+QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));
+ok = ok  merge_fields(merge, a, b, QPU_WADDR_MUL_MASK,
+QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));
+
+if (ok)
+return merge;
+else
+return 0;
 }
 
 uint64_t
diff --git a/src/gallium/drivers/vc4/vc4_qpu.h 
b/src/gallium/drivers/vc4/vc4_qpu.h
index bf41f72..eb06d1a 100644
--- a/src/gallium/drivers/vc4/vc4_qpu.h
+++ b/src/gallium/drivers/vc4/vc4_qpu.h
@@ -129,7 +129,7 @@ uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst,
 struct qpu_reg src0, struct qpu_reg src1);
 uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
 struct qpu_reg src0, struct qpu_reg src1);
-uint64_t qpu_inst(uint64_t add, uint64_t mul);
+uint64_t qpu_merge_inst(uint64_t a, uint64_t b);
 uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val);
 uint64_t qpu_set_sig(uint64_t inst, uint32_t sig);
 uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond);
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c 
b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index f309034..8aa8374 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4

Mesa (master): vc4: Fix some inconsistent indentation.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 52a7cb2ec465497bf6a4e20558340b45fbe7d7c3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=52a7cb2ec465497bf6a4e20558340b45fbe7d7c3

Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 20 19:44:15 2014 -0800

vc4: Fix some inconsistent indentation.

---

 src/gallium/drivers/vc4/vc4_screen.c |   12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_screen.c 
b/src/gallium/drivers/vc4/vc4_screen.c
index 96536ac..abd44ee 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -284,8 +284,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, 
unsigned shader,
 return 8;
 else
 return 16;
-   case PIPE_SHADER_CAP_MAX_OUTPUTS:
-   return shader == PIPE_SHADER_FRAGMENT ? 1 : 8;
+case PIPE_SHADER_CAP_MAX_OUTPUTS:
+return shader == PIPE_SHADER_FRAGMENT ? 1 : 8;
 case PIPE_SHADER_CAP_MAX_TEMPS:
 return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */
 case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
@@ -386,12 +386,12 @@ vc4_screen_is_format_supported(struct pipe_screen 
*pscreen,
 retval |= PIPE_BIND_TRANSFER_WRITE;
 
 #if 0
-   if (retval != usage) {
-   fprintf(stderr,
+if (retval != usage) {
+fprintf(stderr,
 not supported: format=%s, target=%d, sample_count=%d, 

 usage=0x%x, retval=0x%x\n, util_format_name(format),
 target, sample_count, usage, retval);
-   }
+}
 #endif
 
 return retval == usage;
@@ -416,7 +416,7 @@ vc4_screen_create(int fd)
 
 vc4_fence_init(screen);
 
-   vc4_debug = debug_get_option_vc4_debug();
+vc4_debug = debug_get_option_vc4_debug();
 if (vc4_debug  VC4_DEBUG_SHADERDB)
 vc4_debug |= VC4_DEBUG_NORAST;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add a note about a piece of errata I've learned about.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: fa74ec7e98edb185806967fb17dd2d885727aa95
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa74ec7e98edb185806967fb17dd2d885727aa95

Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 20 19:41:26 2014 -0800

vc4: Add a note about a piece of errata I've learned about.

Right now in my environment I've only got a small CMA area, so this
constraint ends up holding.

---

 src/gallium/drivers/vc4/vc4_draw.c |4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index e70506b..2d82462 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -48,6 +48,10 @@ vc4_start_draw(struct vc4_context *vc4)
  * BO allocations align to that anyway), then for some reason the
  * simulator wants an extra page available, even if you have overflow
  * memory set up.
+ *
+ * XXX: The binner only does 28-bit addressing math, so the tile alloc
+ * and tile state should be in the same BO and that BO needs to not
+ * cross a 256MB boundary, somehow.
  */
 uint32_t tile_alloc_size = 32 * tilew * tileh;
 tile_alloc_size = align(tile_alloc_size, 4096);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Don't forget to actually connect the fence code.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6f4adb7483c86eabfb1b89f19f6b48b4413a79d1
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f4adb7483c86eabfb1b89f19f6b48b4413a79d1

Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 20 19:43:07 2014 -0800

vc4: Don't forget to actually connect the fence code.

I thought I'd tested this.

---

 src/gallium/drivers/vc4/vc4_screen.c |2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_screen.c 
b/src/gallium/drivers/vc4/vc4_screen.c
index c18760c..96536ac 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -414,6 +414,8 @@ vc4_screen_create(int fd)
 
 screen-fd = fd;
 
+vc4_fence_init(screen);
+
vc4_debug = debug_get_option_vc4_debug();
 if (vc4_debug  VC4_DEBUG_SHADERDB)
 vc4_debug |= VC4_DEBUG_NORAST;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): r600: Drop the /* gap */ notes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 2f01cc8417243884ac4767b0e53a5df20db2dc96
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f01cc8417243884ac4767b0e53a5df20db2dc96

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 13:24:59 2014 -0800

r600: Drop the /* gap */ notes.

These are obviously the gaps already, due to the bare numbers with
unsupported implementations.

This makes inserting new gaps less irritating.

---

 src/gallium/drivers/r600/r600_shader.c |   19 ---
 1 file changed, 19 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index a772dee..29d27ce 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -7210,7 +7210,6 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
{23,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FRC,   0, ALU_OP1_FRACT, tgsi_op2},
@@ -7221,7 +7220,6 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_LG2,   0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_POW,   0, ALU_OP0_NOP, tgsi_pow},
{TGSI_OPCODE_XPD,   0, ALU_OP0_NOP, tgsi_xpd},
-   /* gap */
{32,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS,   0, ALU_OP1_MOV, tgsi_op2},
{TGSI_OPCODE_RCC,   0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7278,7 +7276,6 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NOT,   0, ALU_OP1_NOT_INT, tgsi_op2},
{TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL,   0, ALU_OP2_LSHL_INT, tgsi_op2_trans},
-   /* gap */
{88,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_AND,   0, ALU_OP2_AND_INT, tgsi_op2},
{TGSI_OPCODE_OR,0, ALU_OP2_OR_INT, tgsi_op2},
@@ -7295,7 +7292,6 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_ENDLOOP,   0, ALU_OP0_NOP, tgsi_endloop},
{TGSI_OPCODE_ENDSUB,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_TXQ_LZ,0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex},
-   /* gap */
{104,   0, ALU_OP0_NOP, tgsi_unsupported},
{105,   0, ALU_OP0_NOP, tgsi_unsupported},
{106,   0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7306,12 +7302,10 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_FSNE,  0, ALU_OP2_SETNE_DX10, tgsi_op2_swap},
{TGSI_OPCODE_NRM4,  0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_CALLNZ,0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
{114,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_BREAKC,0, ALU_OP0_NOP, tgsi_loop_breakc},
{TGSI_OPCODE_KILL_IF,   0, ALU_OP2_KILLGT, tgsi_kill},  /* conditional 
kill */
{TGSI_OPCODE_END,   0, ALU_OP0_NOP, tgsi_end},  /* aka HALT */
-   /* gap */
{118,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_F2I,   0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans},
{TGSI_OPCODE_IDIV,  0, ALU_OP0_NOP, tgsi_idiv},
@@ -7415,7 +7409,6 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
{23,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_FRC,   0, ALU_OP1_FRACT, tgsi_op2},
@@ -7426,7 +7419,6 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_LG2,   0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate},
{TGSI_OPCODE_POW,   0, ALU_OP0_NOP, tgsi_pow},
{TGSI_OPCODE_XPD,   0, ALU_OP0_NOP, tgsi_xpd},
-   /* gap */
{32,0, ALU_OP0_NOP, tgsi_unsupported},
{TGSI_OPCODE_ABS,   0, ALU_OP1_MOV, tgsi_op2},
{TGSI_OPCODE_RCC,   0, ALU_OP0_NOP, tgsi_unsupported},
@@ -7483,7 +7475,6 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_NOT,   0, ALU_OP1_NOT_INT, tgsi_op2},
{TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2},
{TGSI_OPCODE_SHL,   0, ALU_OP2_LSHL_INT, tgsi_op2

Mesa (master): r300: Drop the /* gap */ notes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 386c3fcb14354b131cd51b902d3aac7c15169658
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=386c3fcb14354b131cd51b902d3aac7c15169658

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 13:28:07 2014 -0800

r300: Drop the /* gap */ notes.

This switch statement's code structure isn't dependent on the numbers of
the opcodes at all.

---

 src/gallium/drivers/r300/r300_tgsi_to_rc.c |3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c 
b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
index 4448f88..7ea9cd2 100644
--- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c
+++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c
@@ -53,7 +53,6 @@ static unsigned translate_opcode(unsigned opcode)
 case TGSI_OPCODE_LRP: return RC_OPCODE_LRP;
 case TGSI_OPCODE_CND: return RC_OPCODE_CND;
  /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */
-/* gap */
 case TGSI_OPCODE_FRC: return RC_OPCODE_FRC;
 case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP;
 case TGSI_OPCODE_FLR: return RC_OPCODE_FLR;
@@ -62,7 +61,6 @@ static unsigned translate_opcode(unsigned opcode)
 case TGSI_OPCODE_LG2: return RC_OPCODE_LG2;
 case TGSI_OPCODE_POW: return RC_OPCODE_POW;
 case TGSI_OPCODE_XPD: return RC_OPCODE_XPD;
-/* gap */
 case TGSI_OPCODE_ABS: return RC_OPCODE_ABS;
  /* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */
 case TGSI_OPCODE_DPH: return RC_OPCODE_DPH;
@@ -132,7 +130,6 @@ static unsigned translate_opcode(unsigned opcode)
  /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */
  /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */
 case TGSI_OPCODE_NOP: return RC_OPCODE_NOP;
-/* gap */
  /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */
  /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */
  /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): nine: Don't reference the dead TGSI_OPCODE_NRM.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 56fd7c63617ae9ec0f795c9685a89f54073cb0ea
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=56fd7c63617ae9ec0f795c9685a89f54073cb0ea

Author: Jose Fonseca jfons...@vmware.com
Date:   Thu Nov 20 14:21:04 2014 -0800

nine: Don't reference the dead TGSI_OPCODE_NRM.

The translation is lowering it to not using TGSI_OPCODE_NRM, anyway.

v2: Extracted from a larger patch by Jose that also dropped DP2A usage.

Reviewed-by: Jose Fonseca jfons...@vmware.com
Reviewed-by: Axel Davy axel.d...@ens.fr
Reviewed-by: David Heidelberg da...@ixit.cz

---

 src/gallium/state_trackers/nine/nine_shader.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index 9b324c3..85cc190 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -2316,7 +2316,7 @@ struct sm1_op_info inst_table[] =
 _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */
 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* 
ignore src1,2 */
 _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL),
-_OPI(NRM, NRM, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM 
doesn't fit */
+_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM 
doesn't fit */
 
 _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)),
 _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)),

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallium: Drop unused X2D opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 8c822b1e912de9af6bb16a052bf9f188c2f4a33f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c822b1e912de9af6bb16a052bf9f188c2f4a33f

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:30:03 2014 -0800

gallium: Drop unused X2D opcode.

Nothing in the tree generates it.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 -
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |6 ---
 src/gallium/auxiliary/tgsi/tgsi_exec.c  |   45 ---
 src/gallium/auxiliary/tgsi/tgsi_info.c  |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 -
 src/gallium/docs/source/tgsi.rst|   16 
 src/gallium/drivers/ilo/shader/toy_tgsi.c   |2 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c  |1 -
 src/gallium/drivers/r600/r600_shader.c  |6 +--
 src/gallium/include/pipe/p_shader_tokens.h  |1 -
 10 files changed, 4 insertions(+), 77 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 44a44a6..c5d3679 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -211,7 +211,6 @@ lp_build_tgsi_inst_llvm(
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
-   case TGSI_OPCODE_X2D:
case TGSI_OPCODE_BRA:
case TGSI_OPCODE_PUSHA:
case TGSI_OPCODE_POPA:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index ed1798d..d1425bb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -792,12 +792,6 @@ lp_emit_instruction_aos(
   return FALSE;
   break;
 
-   case TGSI_OPCODE_X2D:
-  /* deprecated? */
-  assert(0);
-  return FALSE;
-  break;
-
case TGSI_OPCODE_ARR:
   src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL);
   dst0 = lp_build_round(bld-bld_base.base, src0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 578d4d8..b191b5c 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2784,47 +2784,6 @@ exec_scs(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_x2d(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel r[4];
-   union tgsi_exec_channel d[2];
-
-   fetch_source(mach, r[0], inst-Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
-   fetch_source(mach, r[1], inst-Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_XZ) {
-  fetch_source(mach, r[2], inst-Src[2], TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[2], r[2], r[0]);
-  fetch_source(mach, r[3], inst-Src[2], TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[3], r[3], r[1]);
-  micro_add(r[2], r[2], r[3]);
-  fetch_source(mach, r[3], inst-Src[0], TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-  micro_add(d[0], r[2], r[3]);
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_YW) {
-  fetch_source(mach, r[2], inst-Src[2], TGSI_CHAN_Z, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[2], r[2], r[0]);
-  fetch_source(mach, r[3], inst-Src[2], TGSI_CHAN_W, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[3], r[3], r[1]);
-  micro_add(r[2], r[2], r[3]);
-  fetch_source(mach, r[3], inst-Src[0], TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-  micro_add(d[1], r[2], r[3]);
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_X) {
-  store_dest(mach, d[0], inst-Dst[0], inst, TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_Y) {
-  store_dest(mach, d[1], inst-Dst[0], inst, TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_Z) {
-  store_dest(mach, d[0], inst-Dst[0], inst, TGSI_CHAN_Z, 
TGSI_EXEC_DATA_FLOAT);
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_W) {
-  store_dest(mach, d[1], inst-Dst[0], inst, TGSI_CHAN_W, 
TGSI_EXEC_DATA_FLOAT);
-   }
-}
-
-static void
 exec_rfl(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
@@ -3908,10 +3867,6 @@ exec_instruction(
   assert (0);
   break;
 
-   case TGSI_OPCODE_X2D:
-  exec_x2d(mach, inst);
-  break;
-
case TGSI_OPCODE_ARR:
   exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_FLOAT);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index b94f5ac..94de670 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -96,7 +96,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, COMP, UP2US, TGSI_OPCODE_UP2US

Mesa (master): gallium: Drop the NRM and NRM4 opcodes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: d4864cdf15ccd30f0e82d07fd0e9db8a0c115cda
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4864cdf15ccd30f0e82d07fd0e9db8a0c115cda

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 13:13:59 2014 -0800

gallium: Drop the NRM and NRM4 opcodes.

They weren't generated in tree, and as far as I know all hardware had to
lower it to a DP, RSQ, MUL.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |5 --
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c |   95 ---
 src/gallium/auxiliary/tgsi/tgsi_exec.c  |   72 -
 src/gallium/auxiliary/tgsi/tgsi_info.c  |4 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|2 -
 src/gallium/docs/source/tgsi.rst|   34 
 src/gallium/drivers/ilo/shader/toy_tgsi.c   |   89 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c  |2 -
 src/gallium/drivers/r600/r600_shader.c  |   12 +--
 src/gallium/drivers/svga/svga_tgsi_insn.c   |   38 -
 src/gallium/include/pipe/p_shader_tokens.h  |4 +-
 11 files changed, 10 insertions(+), 347 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index f2fc7b0..7829a7e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -852,11 +852,6 @@ lp_emit_instruction_aos(
   dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS);
   break;
 
-   case TGSI_OPCODE_NRM:
-  /* fall-through */
-   case TGSI_OPCODE_NRM4:
-  return FALSE;
-
case TGSI_OPCODE_DIV:
   assert(0);
   return FALSE;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index 05618bc..76b9d69 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -3507,99 +3507,6 @@ cont_emit(
lp_exec_continue(bld-exec_mask);
 }
 
-/* XXX: Refactor and move it to lp_bld_tgsi_action.c
- *
- * XXX: What do the comments about xmm registers mean?  Maybe they are left 
over
- * from old code, but there is no garauntee that LLVM will use those registers
- * for this code.
- *
- * XXX: There should be no calls to lp_build_emit_fetch in this function.  This
- * should be handled by the emit_data-fetch_args function. */
-static void
-nrm_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   LLVMValueRef tmp0, tmp1;
-   LLVMValueRef tmp4 = NULL;
-   LLVMValueRef tmp5 = NULL;
-   LLVMValueRef tmp6 = NULL;
-   LLVMValueRef tmp7 = NULL;
-   struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
-
-   uint dims = (emit_data-inst-Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 
4;
-
-  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X) ||
-  TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Y) ||
-  TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Z) ||
-  (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_W)  dims == 
4)) {
-
-  /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */
-
-  /* xmm4 = src.x */
-  /* xmm0 = src.x * src.x */
-  tmp0 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, 
TGSI_CHAN_X);
-  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X)) {
- tmp4 = tmp0;
-  }
-  tmp0 = lp_build_mul( bld-bld_base.base, tmp0, tmp0);
-
-  /* xmm5 = src.y */
-  /* xmm0 = xmm0 + src.y * src.y */
-  tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, 
TGSI_CHAN_Y);
-  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Y)) {
- tmp5 = tmp1;
-  }
-  tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1);
-  tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1);
-
-  /* xmm6 = src.z */
-  /* xmm0 = xmm0 + src.z * src.z */
-  tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, 
TGSI_CHAN_Z);
-  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Z)) {
- tmp6 = tmp1;
-  }
-  tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1);
-  tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1);
-
-  if (dims == 4) {
- /* xmm7 = src.w */
- /* xmm0 = xmm0 + src.w * src.w */
- tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, 
TGSI_CHAN_W);
- if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_W)) {
-tmp7 = tmp1;
- }
- tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1);
- tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1);
-  }
-  /* xmm1 = 1 / sqrt(xmm0) */
-  tmp1 = lp_build_rsqrt( bld-bld_base.base, tmp0);
-   /* dst.x = xmm1 * src.x */
-  if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X

Mesa (master): ilo: Drop the explicit intialization of gaps in TGSI opcodes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 7361d5ba63dda35683569e76caa33f886304958f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7361d5ba63dda35683569e76caa33f886304958f

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 13:40:50 2014 -0800

ilo: Drop the explicit intialization of gaps in TGSI opcodes.

The nice thing about the good way of initializing arrays like this is that
you don't need to initialize everything in order, or even everything at
all.  Taking advantage of that only needs a tiny fixup to deal with the
default NULL value of the pointers.

I haven't dropped the initialization of opcodes that exist and are unsupported.

---

 src/gallium/drivers/ilo/shader/toy_tgsi.c |   28 ++--
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c 
b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index 7c74bad..1ba0606 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -853,8 +853,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_CND]  = aos_CND,
[TGSI_OPCODE_SQRT] = aos_simple,
[TGSI_OPCODE_DP2A] = aos_DP2A,
-   [22]   = aos_unsupported,
-   [23]   = aos_unsupported,
[TGSI_OPCODE_FRC]  = aos_simple,
[TGSI_OPCODE_CLAMP]= aos_CLAMP,
[TGSI_OPCODE_FLR]  = aos_simple,
@@ -863,7 +861,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_LG2]  = aos_simple,
[TGSI_OPCODE_POW]  = aos_simple,
[TGSI_OPCODE_XPD]  = aos_XPD,
-   [32]   = aos_unsupported,
[TGSI_OPCODE_ABS]  = aos_simple,
[TGSI_OPCODE_RCC]  = aos_unsupported,
[TGSI_OPCODE_DPH]  = aos_simple,
@@ -907,11 +904,8 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_BRK]  = aos_BRK,
[TGSI_OPCODE_IF]   = aos_simple,
[TGSI_OPCODE_UIF]  = aos_simple,
-   [76]   = aos_unsupported,
[TGSI_OPCODE_ELSE] = aos_simple,
[TGSI_OPCODE_ENDIF]= aos_simple,
-   [79]   = aos_unsupported,
-   [80]   = aos_unsupported,
[TGSI_OPCODE_PUSHA]= aos_unsupported,
[TGSI_OPCODE_POPA] = aos_unsupported,
[TGSI_OPCODE_CEIL] = aos_CEIL,
@@ -919,7 +913,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_NOT]  = aos_simple,
[TGSI_OPCODE_TRUNC]= aos_simple,
[TGSI_OPCODE_SHL]  = aos_simple,
-   [88]   = aos_unsupported,
[TGSI_OPCODE_AND]  = aos_simple,
[TGSI_OPCODE_OR]   = aos_simple,
[TGSI_OPCODE_MOD]  = aos_simple,
@@ -935,9 +928,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_ENDLOOP]  = aos_ENDLOOP,
[TGSI_OPCODE_ENDSUB]   = aos_unsupported,
[TGSI_OPCODE_TXQ_LZ]   = aos_tex,
-   [104]  = aos_unsupported,
-   [105]  = aos_unsupported,
-   [106]  = aos_unsupported,
[TGSI_OPCODE_NOP]  = aos_simple,
[TGSI_OPCODE_FSEQ] = aos_set_on_cond,
[TGSI_OPCODE_FSGE] = aos_set_on_cond,
@@ -948,7 +938,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_BREAKC]   = aos_unsupported,
[TGSI_OPCODE_KILL_IF]  = aos_simple,
[TGSI_OPCODE_END]  = aos_simple,
-   [118]  = aos_unsupported,
[TGSI_OPCODE_F2I]  = aos_simple,
[TGSI_OPCODE_IDIV] = aos_simple,
[TGSI_OPCODE_IMAX] = aos_simple,
@@ -1469,8 +1458,6 @@ static const toy_tgsi_translate 
soa_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_CND]  = soa_per_channel,
[TGSI_OPCODE_SQRT] = soa_scalar_replicate,
[TGSI_OPCODE_DP2A] = soa_dot_product,
-   [22]   = soa_unsupported,
-   [23]   = soa_unsupported,
[TGSI_OPCODE_FRC]  = soa_per_channel,
[TGSI_OPCODE_CLAMP]= soa_per_channel,
[TGSI_OPCODE_FLR]  = soa_per_channel,
@@ -1479,7 +1466,6 @@ static const toy_tgsi_translate 
soa_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_LG2]  = soa_scalar_replicate,
[TGSI_OPCODE_POW]  = soa_scalar_replicate,
[TGSI_OPCODE_XPD]  = soa_XPD,
-   [32]   = soa_unsupported,
[TGSI_OPCODE_ABS]  = soa_per_channel,
[TGSI_OPCODE_RCC]  = soa_unsupported,
[TGSI_OPCODE_DPH]  = soa_dot_product,
@@ -1523,11 +1509,8 @@ static const toy_tgsi_translate 
soa_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_BRK]  = soa_passthrough,
[TGSI_OPCODE_IF

Mesa (master): mesa: Drop unused SFL/STR opcodes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 868f95f1da74cf6dd7468cba1b56664aad585ccb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=868f95f1da74cf6dd7468cba1b56664aad585ccb

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 16:35:05 2014 -0800

mesa: Drop unused SFL/STR opcodes.

They're part of NV_vertex_program2, which I'm pretty sure we're never
going to support.

Reviewed-by: Jose Fonseca jfons...@vmware.com
Reviewed-by: Ian Roamnick ian.d.roman...@intel.com

---

 src/mesa/program/prog_execute.c |   12 
 src/mesa/program/prog_instruction.c |2 --
 src/mesa/program/prog_instruction.h |2 --
 3 files changed, 16 deletions(-)

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index fcc9ed5..e59ae70 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -1279,12 +1279,6 @@ _mesa_execute_program(struct gl_context * ctx,
 }
  }
  break;
-  case OPCODE_SFL: /* set false, operands ignored */
- {
-static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
-store_vector4(inst, machine, result);
- }
- break;
   case OPCODE_SGE: /* set on greater or equal */
  {
 GLfloat a[4], b[4], result[4];
@@ -1395,12 +1389,6 @@ _mesa_execute_program(struct gl_context * ctx,
 store_vector4(inst, machine, result);
  }
  break;
-  case OPCODE_STR: /* set true, operands ignored */
- {
-static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
-store_vector4(inst, machine, result);
- }
- break;
   case OPCODE_SUB:
  {
 GLfloat a[4], b[4], result[4];
diff --git a/src/mesa/program/prog_instruction.c 
b/src/mesa/program/prog_instruction.c
index e2eadc3..abe663d 100644
--- a/src/mesa/program/prog_instruction.c
+++ b/src/mesa/program/prog_instruction.c
@@ -202,7 +202,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = 
{
{ OPCODE_RSQ,RSQ, 1, 1 },
{ OPCODE_SCS,SCS, 1, 1 },
{ OPCODE_SEQ,SEQ, 2, 1 },
-   { OPCODE_SFL,SFL, 0, 1 },
{ OPCODE_SGE,SGE, 2, 1 },
{ OPCODE_SGT,SGT, 2, 1 },
{ OPCODE_SIN,SIN, 1, 1 },
@@ -210,7 +209,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = 
{
{ OPCODE_SLT,SLT, 2, 1 },
{ OPCODE_SNE,SNE, 2, 1 },
{ OPCODE_SSG,SSG, 1, 1 },
-   { OPCODE_STR,STR, 0, 1 },
{ OPCODE_SUB,SUB, 2, 1 },
{ OPCODE_SWZ,SWZ, 1, 1 },
{ OPCODE_TEX,TEX, 1, 1 },
diff --git a/src/mesa/program/prog_instruction.h 
b/src/mesa/program/prog_instruction.h
index b9604e5..4cca975 100644
--- a/src/mesa/program/prog_instruction.h
+++ b/src/mesa/program/prog_instruction.h
@@ -198,7 +198,6 @@ typedef enum prog_opcode {
OPCODE_RSQ,   /*   XX   X   X X   */
OPCODE_SCS,   /*X X   */
OPCODE_SEQ,   /*2   X X   */
-   OPCODE_SFL,   /*2   X */
OPCODE_SGE,   /*   XX   X   X X   */
OPCODE_SGT,   /*2   X X   */
OPCODE_SIN,   /*X   2   X X   */
@@ -206,7 +205,6 @@ typedef enum prog_opcode {
OPCODE_SLT,   /*   XX   X   X X   */
OPCODE_SNE,   /*2   X X   */
OPCODE_SSG,   /*2 X   */
-   OPCODE_STR,   /*2   X */
OPCODE_SUB,   /*   XX   1.1 X X   */
OPCODE_SWZ,   /*   XX X   */
OPCODE_TEX,   /*X   3   X X   */

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): mesa: Drop unused NV_fragment_program opcodes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: a3688d686f147f4252d19b298ae26d4ac72c2e08
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3688d686f147f4252d19b298ae26d4ac72c2e08

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 16:39:49 2014 -0800

mesa: Drop unused NV_fragment_program opcodes.

The extension itself was deleted 2 years ago.  There are still some
prog_instruction opcodes from NV_fp that exist because they're used by
ir_to_mesa.cpp, though.

Reviewed-by: Jose Fonseca jfons...@vmware.com
Reviewed-by: Ian Roamnick ian.d.roman...@intel.com

---

 src/mesa/program/prog_execute.c |  144 ---
 src/mesa/program/prog_instruction.c |   10 ---
 src/mesa/program/prog_instruction.h |   10 ---
 src/mesa/program/program_lexer.l|   13 
 4 files changed, 177 deletions(-)

diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c
index e59ae70..650c40f 100644
--- a/src/mesa/program/prog_execute.c
+++ b/src/mesa/program/prog_execute.c
@@ -1119,77 +1119,6 @@ _mesa_execute_program(struct gl_context * ctx,
  break;
   case OPCODE_NOP:
  break;
-  case OPCODE_PK2H:/* pack two 16-bit floats in one 32-bit float */
- {
-GLfloat a[4];
-GLuint result[4];
-GLhalfNV hx, hy;
-fetch_vector4(inst-SrcReg[0], machine, a);
-hx = _mesa_float_to_half(a[0]);
-hy = _mesa_float_to_half(a[1]);
-result[0] =
-result[1] =
-result[2] =
-result[3] = hx | (hy  16);
-store_vector4ui(inst, machine, result);
- }
- break;
-  case OPCODE_PK2US:   /* pack two GLushorts into one 32-bit float */
- {
-GLfloat a[4];
-GLuint result[4], usx, usy;
-fetch_vector4(inst-SrcReg[0], machine, a);
-a[0] = CLAMP(a[0], 0.0F, 1.0F);
-a[1] = CLAMP(a[1], 0.0F, 1.0F);
-usx = F_TO_I(a[0] * 65535.0F);
-usy = F_TO_I(a[1] * 65535.0F);
-result[0] =
-result[1] =
-result[2] =
-result[3] = usx | (usy  16);
-store_vector4ui(inst, machine, result);
- }
- break;
-  case OPCODE_PK4B:/* pack four GLbytes into one 32-bit float */
- {
-GLfloat a[4];
-GLuint result[4], ubx, uby, ubz, ubw;
-fetch_vector4(inst-SrcReg[0], machine, a);
-a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
-a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
-a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
-a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
-ubx = F_TO_I(127.0F * a[0] + 128.0F);
-uby = F_TO_I(127.0F * a[1] + 128.0F);
-ubz = F_TO_I(127.0F * a[2] + 128.0F);
-ubw = F_TO_I(127.0F * a[3] + 128.0F);
-result[0] =
-result[1] =
-result[2] =
-result[3] = ubx | (uby  8) | (ubz  16) | (ubw  24);
-store_vector4ui(inst, machine, result);
- }
- break;
-  case OPCODE_PK4UB:   /* pack four GLubytes into one 32-bit float */
- {
-GLfloat a[4];
-GLuint result[4], ubx, uby, ubz, ubw;
-fetch_vector4(inst-SrcReg[0], machine, a);
-a[0] = CLAMP(a[0], 0.0F, 1.0F);
-a[1] = CLAMP(a[1], 0.0F, 1.0F);
-a[2] = CLAMP(a[2], 0.0F, 1.0F);
-a[3] = CLAMP(a[3], 0.0F, 1.0F);
-ubx = F_TO_I(255.0F * a[0]);
-uby = F_TO_I(255.0F * a[1]);
-ubz = F_TO_I(255.0F * a[2]);
-ubw = F_TO_I(255.0F * a[3]);
-result[0] =
-result[1] =
-result[2] =
-result[3] = ubx | (uby  8) | (ubz  16) | (ubw  24);
-store_vector4ui(inst, machine, result);
- }
- break;
   case OPCODE_POW:
  {
 GLfloat a[4], b[4], result[4];
@@ -1224,20 +1153,6 @@ _mesa_execute_program(struct gl_context * ctx,
 pc = machine-CallStack[--machine-StackDepth] - 1;
  }
  break;
-  case OPCODE_RFL: /* reflection vector */
- {
-GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
-fetch_vector4(inst-SrcReg[0], machine, axis);
-fetch_vector4(inst-SrcReg[1], machine, dir);
-tmpW = DOT3(axis, axis);
-tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
-result[0] = tmpX * axis[0] - dir[0];
-result[1] = tmpX * axis[1] - dir[1];
-result[2] = tmpX * axis[2] - dir[2];
-/* result[3] is never written! XXX enforce in parser! */
-store_vector4(inst, machine, result);
- }
- break;
   case OPCODE_RSQ: /* 1 / sqrt() */
  {
 GLfloat a[4], result[4];
@@ -1562,52 +1477,6 @@ _mesa_execute_program(struct gl_context * ctx

Mesa (master): gallium: Drop the unused RCC opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: de2f8d75db3dad3089c96b65223e47ad3986a25c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=de2f8d75db3dad3089c96b65223e47ad3986a25c

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 13:27:49 2014 -0800

gallium: Drop the unused RCC opcode.

Nothing in the tree generated it.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 -
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |5 -
 src/gallium/auxiliary/tgsi/tgsi_exec.c  |   20 
 src/gallium/auxiliary/tgsi/tgsi_info.c  |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c  |1 -
 src/gallium/docs/source/tgsi.rst|   11 ---
 src/gallium/drivers/ilo/shader/toy_tgsi.c   |2 --
 src/gallium/drivers/r300/r300_tgsi_to_rc.c  |1 -
 src/gallium/drivers/r600/r600_shader.c  |6 +++---
 src/gallium/include/pipe/p_shader_tokens.h  |2 +-
 11 files changed, 5 insertions(+), 47 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 51cb54c..4a9ce37 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -207,7 +207,6 @@ lp_build_tgsi_inst_llvm(
/* Ignore deprecated instructions */
switch (inst-Instruction.Opcode) {
 
-   case TGSI_OPCODE_RCC:
case TGSI_OPCODE_UP2H:
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 7829a7e..3b9833a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -679,11 +679,6 @@ lp_emit_instruction_aos(
case TGSI_OPCODE_XPD:
   return FALSE;
 
-   case TGSI_OPCODE_RCC:
-  /* deprecated? */
-  assert(0);
-  return FALSE;
-
case TGSI_OPCODE_DPH:
   return FALSE;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b9a4c7b..b3ea82f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -911,22 +911,6 @@ micro_div(
 }
 
 static void
-micro_rcc(union tgsi_exec_channel *dst,
-  const union tgsi_exec_channel *src)
-{
-   uint i;
-
-   for (i = 0; i  4; i++) {
-  float recip = 1.0f / src-f[i];
-
-  if (recip  0.0f)
- dst-f[i] = CLAMP(recip, 5.42101e-020f, 1.84467e+019f);
-  else
- dst-f[i] = CLAMP(recip, -1.84467e+019f, -5.42101e-020f);
-   }
-}
-
-static void
 micro_lt(
union tgsi_exec_channel *dst,
const union tgsi_exec_channel *src0,
@@ -3799,10 +3783,6 @@ exec_instruction(
   exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
   break;
 
-   case TGSI_OPCODE_RCC:
-  exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
-  break;
-
case TGSI_OPCODE_DPH:
   exec_dph(mach, inst);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 6336304..d17426f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -71,7 +71,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 2, 0, 0, 0, 0, COMP, XPD, TGSI_OPCODE_XPD },
{ 0, 0, 0, 0, 0, 0, NONE, , 32 },  /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, ABS, TGSI_OPCODE_ABS },
-   { 1, 1, 0, 0, 0, 0, REPL, RCC, TGSI_OPCODE_RCC },
+   { 0, 0, 0, 0, 0, 0, NONE, , 34 },  /* removed */
{ 1, 2, 0, 0, 0, 0, REPL, DPH, TGSI_OPCODE_DPH },
{ 1, 1, 0, 0, 0, 0, REPL, COS, TGSI_OPCODE_COS },
{ 1, 1, 0, 0, 0, 0, COMP, DDX, TGSI_OPCODE_DDX },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 56a7a97..33cf38b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -87,7 +87,6 @@ OP11(LG2)
 OP12(POW)
 OP12(XPD)
 OP11(ABS)
-OP11(RCC)
 OP12(DPH)
 OP11(COS)
 OP11(DDX)
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c 
b/src/gallium/auxiliary/tgsi/tgsi_util.c
index e1cba95..66cb167 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.c
@@ -253,7 +253,6 @@ tgsi_util_get_inst_usage_mask(const struct 
tgsi_full_instruction *inst,
 
case TGSI_OPCODE_EX2:
case TGSI_OPCODE_LG2:
-   case TGSI_OPCODE_RCC:
   read_mask = TGSI_WRITEMASK_X;
   break;
 
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 49de4ca..c912ec5 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -404,17 +404,6 @@ This instruction replicates its result.
   dst.w = |src.w|
 
 
-.. opcode:: RCC - Reciprocal Clamped
-
-This instruction

Mesa (master): nine: Drop use of TGSI_OPCODE_CND.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 925cb75f894c312478e3e2f4124d2913c4d969ab
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=925cb75f894c312478e3e2f4124d2913c4d969ab

Author: Jose Fonseca jfons...@vmware.com
Date:   Thu Nov 20 14:25:30 2014 -0800

nine: Drop use of TGSI_OPCODE_CND.

This was the only state tracker emitting it, and hardware was just having
to lower it anyway (or failing to lower it at all).

v2: Extracted from a larger patch by Jose (which also dropped DP2A), fixed
to actually not reference TGSI_OPCODE_CND.  Change by anholt.

Reviewed-by: Jose Fonseca jfons...@vmware.com
Reviewed-by: Axel Davy axel.d...@ens.fr
Reviewed-by: David Heidelberg da...@ixit.cz

---

 src/gallium/state_trackers/nine/nine_shader.c |   10 +-
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index 85cc190..268612e 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1374,7 +1374,6 @@ DECL_SPECIAL(CND)
 }
 
 cnd = tx_src_param(tx, tx-insn.src[0]);
-#ifdef NINE_TGSI_LAZY_R600
 cgt = tx_scratch(tx);
 
 if (tx-version.major == 1  tx-version.minor  4) {
@@ -1387,13 +1386,6 @@ DECL_SPECIAL(CND)
 ureg_CMP(tx-ureg, dst,
  tx_src_param(tx, tx-insn.src[1]),
  tx_src_param(tx, tx-insn.src[2]), ureg_negate(cnd));
-#else
-if (tx-version.major == 1  tx-version.minor  4)
-cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W);
-ureg_CND(tx-ureg, dst,
- tx_src_param(tx, tx-insn.src[1]),
- tx_src_param(tx, tx-insn.src[2]), cnd);
-#endif
 return D3D_OK;
 }
 
@@ -2356,7 +2348,7 @@ struct sm1_op_info inst_table[] =
 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL),
 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL),
-_OPI(CND,  CND, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
+_OPI(CND,  NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)),
 
 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)),
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallium: Drop the unused SFL/STR opcodes.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: ecfe9e2ad2b5f178ef09420f8d95d49937137cd9
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecfe9e2ad2b5f178ef09420f8d95d49937137cd9

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:36:19 2014 -0800

gallium: Drop the unused SFL/STR opcodes.

Nothing generated them.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |   24 --
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|8 
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   47 
 src/gallium/auxiliary/tgsi/tgsi_info.c |4 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |2 -
 src/gallium/docs/source/tgsi.rst   |   13 --
 src/gallium/drivers/ilo/shader/toy_tgsi.c  |   26 ---
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  |6 ---
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c   |6 ---
 src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c   |6 ---
 src/gallium/drivers/r300/r300_tgsi_to_rc.c |2 -
 src/gallium/drivers/r600/r600_shader.c |   12 ++---
 src/gallium/include/pipe/p_shader_tokens.h |4 +-
 13 files changed, 10 insertions(+), 150 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index 722aa9a..d0f1a7c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -691,28 +691,6 @@ const struct lp_build_tgsi_action scs_action = {
scs_emit /* emit */
 };
 
-/* TGSI_OPCODE_SFL */
-
-static void
-sfl_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   emit_data-output[emit_data-chan] = bld_base-base.zero;
-}
-
-/* TGSI_OPCODE_STR */
-
-static void
-str_emit(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   emit_data-output[emit_data-chan] = bld_base-base.one;
-}
-
 /* TGSI_OPCODE_SUB */
 static void
 sub_emit(
@@ -958,8 +936,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * 
bld_base)
bld_base-op_actions[TGSI_OPCODE_MUL].emit = mul_emit;
bld_base-op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit;
bld_base-op_actions[TGSI_OPCODE_RCP].emit = rcp_emit;
-   bld_base-op_actions[TGSI_OPCODE_SFL].emit = sfl_emit;
-   bld_base-op_actions[TGSI_OPCODE_STR].emit = str_emit;
bld_base-op_actions[TGSI_OPCODE_SUB].emit = sub_emit;
 
bld_base-op_actions[TGSI_OPCODE_UARL].emit = mov_emit;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 49ad3b6..aacbeff 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -722,10 +722,6 @@ lp_emit_instruction_aos(
   dst0 = lp_build_select(bld-bld_base.base, tmp0, 
bld-bld_base.base.one, bld-bld_base.base.zero);
   break;
 
-   case TGSI_OPCODE_SFL:
-  dst0 = bld-bld_base.base.zero;
-  break;
-
case TGSI_OPCODE_SGT:
   src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL);
   src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL);
@@ -753,10 +749,6 @@ lp_emit_instruction_aos(
   dst0 = lp_build_select(bld-bld_base.base, tmp0, 
bld-bld_base.base.one, bld-bld_base.base.zero);
   break;
 
-   case TGSI_OPCODE_STR:
-  dst0 = bld-bld_base.base.one;
-  break;
-
case TGSI_OPCODE_TEX:
   dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ab13c13..03cb277 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -452,24 +452,6 @@ micro_sne(union tgsi_exec_channel *dst,
 }
 
 static void
-micro_sfl(union tgsi_exec_channel *dst)
-{
-   dst-f[0] = 0.0f;
-   dst-f[1] = 0.0f;
-   dst-f[2] = 0.0f;
-   dst-f[3] = 0.0f;
-}
-
-static void
-micro_str(union tgsi_exec_channel *dst)
-{
-   dst-f[0] = 1.0f;
-   dst-f[1] = 1.0f;
-   dst-f[2] = 1.0f;
-   dst-f[3] = 1.0f;
-}
-
-static void
 micro_trunc(union tgsi_exec_channel *dst,
 const union tgsi_exec_channel *src)
 {
@@ -2446,27 +2428,6 @@ exec_declaration(struct tgsi_exec_machine *mach,
}
 }
 
-
-typedef void (* micro_op)(union tgsi_exec_channel *dst);
-
-static void
-exec_vector(struct tgsi_exec_machine *mach,
-const struct tgsi_full_instruction *inst,
-micro_op op,
-enum tgsi_exec_datatype dst_datatype)
-{
-   unsigned int chan;
-
-   for (chan = 0; chan  TGSI_NUM_CHANNELS; chan++) {
-  if (inst-Dst[0].Register.WriteMask  (1  chan)) {
- union tgsi_exec_channel dst;
-
- op(dst);
- store_dest(mach, dst, inst-Dst[0], inst, chan, dst_datatype

Mesa (master): nine: Don' t use the otherwise-dead SFL opcode in an unreachable path.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 7c0acd8535b36077b832e78f1f3a53176e453d74
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c0acd8535b36077b832e78f1f3a53176e453d74

Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 20 14:17:07 2014 -0800

nine: Don't use the otherwise-dead SFL opcode in an unreachable path.

Reviewed-by: Jose Fonseca jfons...@vmware.com
Reviewed-by: Axel Davy axel.d...@ens.fr
Reviewed-by: David Heidelberg da...@ixit.cz

---

 src/gallium/state_trackers/nine/nine_shader.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/state_trackers/nine/nine_shader.c 
b/src/gallium/state_trackers/nine/nine_shader.c
index cc027b4..9b324c3 100644
--- a/src/gallium/state_trackers/nine/nine_shader.c
+++ b/src/gallium/state_trackers/nine/nine_shader.c
@@ -1615,7 +1615,7 @@ sm1_insn_flags_to_tgsi_setop(BYTE flags)
 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE;
 default:
 assert(!invalid comparison flags);
-return TGSI_OPCODE_SFL;
+return TGSI_OPCODE_SGT;
 }
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): gallium: Drop unused BRA opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 00f7002c5c45887b204a3f14b8e3b32472cc39bb
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=00f7002c5c45887b204a3f14b8e3b32472cc39bb

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:51:22 2014 -0800

gallium: Drop unused BRA opcode.

Never generated, and implemented in only nvfx vertprog.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c  |1 -
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c  |6 --
 src/gallium/auxiliary/tgsi/tgsi_exec.c   |4 
 src/gallium/auxiliary/tgsi/tgsi_info.c   |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 -
 src/gallium/docs/source/tgsi.rst |9 -
 src/gallium/drivers/ilo/shader/toy_tgsi.c|2 --
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c |7 ---
 src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c |1 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c   |1 -
 src/gallium/drivers/r600/r600_shader.c   |6 +++---
 src/gallium/include/pipe/p_shader_tokens.h   |2 +-
 12 files changed, 5 insertions(+), 37 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index c5d3679..e391d8a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -211,7 +211,6 @@ lp_build_tgsi_inst_llvm(
case TGSI_OPCODE_UP2US:
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
-   case TGSI_OPCODE_BRA:
case TGSI_OPCODE_PUSHA:
case TGSI_OPCODE_POPA:
case TGSI_OPCODE_SAD:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index aacbeff..9e468f9 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -786,12 +786,6 @@ lp_emit_instruction_aos(
   dst0 = lp_build_round(bld-bld_base.base, src0);
   break;
 
-   case TGSI_OPCODE_BRA:
-  /* deprecated */
-  assert(0);
-  return FALSE;
-  break;
-
case TGSI_OPCODE_CAL:
   return FALSE;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index 03cb277..ec1374a 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3768,10 +3768,6 @@ exec_instruction(
   exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_FLOAT);
   break;
 
-   case TGSI_OPCODE_BRA:
-  assert (0);
-  break;
-
case TGSI_OPCODE_CAL:
   /* skip the call if no execution channels are enabled */
   if (mach-ExecMask) {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index e546816..2ec2853 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -99,7 +99,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 0, 1, 0, 0, 0, 1, NONE, , 59 },  /* removed */
{ 0, 1, 0, 0, 0, 1, NONE, , 60 },  /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, ARR, TGSI_OPCODE_ARR },
-   { 0, 1, 0, 0, 0, 0, NONE, BRA, TGSI_OPCODE_BRA },
+   { 0, 1, 0, 0, 0, 1, NONE, , 62 },  /* removed */
{ 0, 0, 0, 1, 0, 0, NONE, CAL, TGSI_OPCODE_CAL },
{ 0, 0, 0, 0, 0, 0, NONE, RET, TGSI_OPCODE_RET },
{ 1, 1, 0, 0, 0, 0, COMP, SSG, TGSI_OPCODE_SSG },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index bfa78fc..b8bdba9 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -109,7 +109,6 @@ OP11(UP2US)
 OP11(UP4B)
 OP11(UP4UB)
 OP11(ARR)
-OP01(BRA)
 OP00_LBL(CAL)
 OP00(RET)
 OP11(SSG)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index 5145332..d4bf9cc 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -868,15 +868,6 @@ This instruction replicates its result.
Considered for removal.
 
 
-.. opcode:: BRA - Branch
-
-  pc = target
-
-.. note::
-
-   Considered for removal.
-
-
 .. opcode:: CALLNZ - Subroutine Call If Not Zero
 
TBD
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c 
b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index 5989fc4..5938de4 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -829,7 +829,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_UP4B] = aos_unsupported,
[TGSI_OPCODE_UP4UB]= aos_unsupported,
[TGSI_OPCODE_ARR]  = aos_simple,
-   [TGSI_OPCODE_BRA]  = aos_unsupported,
[TGSI_OPCODE_CAL]  = aos_unsupported,
[TGSI_OPCODE_RET]  = aos_unsupported,
[TGSI_OPCODE_SSG]  = aos_set_sign,
@@ -1374,7 +1373,6 @@ static const toy_tgsi_translate 
soa_translate_table

Mesa (master): gallium: Drop the unused RFL opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: dc00b382b58bb3eb94ca393d32bd7eb3bb07d021
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc00b382b58bb3eb94ca393d32bd7eb3bb07d021

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:32:13 2014 -0800

gallium: Drop the unused RFL opcode.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c  |3 --
 src/gallium/auxiliary/tgsi/tgsi_exec.c   |   56 --
 src/gallium/auxiliary/tgsi/tgsi_info.c   |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 -
 src/gallium/docs/source/tgsi.rst |   17 ---
 src/gallium/drivers/ilo/shader/toy_tgsi.c|2 -
 src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c |   13 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c   |1 -
 src/gallium/drivers/r600/r600_shader.c   |6 +--
 src/gallium/include/pipe/p_shader_tokens.h   |2 +-
 10 files changed, 5 insertions(+), 98 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index d1425bb..49ad3b6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -715,9 +715,6 @@ lp_emit_instruction_aos(
case TGSI_OPCODE_PK4UB:
   return FALSE;
 
-   case TGSI_OPCODE_RFL:
-  return FALSE;
-
case TGSI_OPCODE_SEQ:
   src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL);
   src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b191b5c..ab13c13 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2784,58 +2784,6 @@ exec_scs(struct tgsi_exec_machine *mach,
 }
 
 static void
-exec_rfl(struct tgsi_exec_machine *mach,
- const struct tgsi_full_instruction *inst)
-{
-   union tgsi_exec_channel r[9];
-
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_XYZ) {
-  /* r0 = dp3(src0, src0) */
-  fetch_source(mach, r[2], inst-Src[0], TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[0], r[2], r[2]);
-  fetch_source(mach, r[4], inst-Src[0], TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[8], r[4], r[4]);
-  micro_add(r[0], r[0], r[8]);
-  fetch_source(mach, r[6], inst-Src[0], TGSI_CHAN_Z, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[8], r[6], r[6]);
-  micro_add(r[0], r[0], r[8]);
-
-  /* r1 = dp3(src0, src1) */
-  fetch_source(mach, r[3], inst-Src[1], TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[1], r[2], r[3]);
-  fetch_source(mach, r[5], inst-Src[1], TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[8], r[4], r[5]);
-  micro_add(r[1], r[1], r[8]);
-  fetch_source(mach, r[7], inst-Src[1], TGSI_CHAN_Z, 
TGSI_EXEC_DATA_FLOAT);
-  micro_mul(r[8], r[6], r[7]);
-  micro_add(r[1], r[1], r[8]);
-
-  /* r1 = 2 * r1 / r0 */
-  micro_add(r[1], r[1], r[1]);
-  micro_div(r[1], r[1], r[0]);
-
-  if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_X) {
- micro_mul(r[2], r[2], r[1]);
- micro_sub(r[2], r[2], r[3]);
- store_dest(mach, r[2], inst-Dst[0], inst, TGSI_CHAN_X, 
TGSI_EXEC_DATA_FLOAT);
-  }
-  if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_Y) {
- micro_mul(r[4], r[4], r[1]);
- micro_sub(r[4], r[4], r[5]);
- store_dest(mach, r[4], inst-Dst[0], inst, TGSI_CHAN_Y, 
TGSI_EXEC_DATA_FLOAT);
-  }
-  if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_Z) {
- micro_mul(r[6], r[6], r[1]);
- micro_sub(r[6], r[6], r[7]);
- store_dest(mach, r[6], inst-Dst[0], inst, TGSI_CHAN_Z, 
TGSI_EXEC_DATA_FLOAT);
-  }
-   }
-   if (inst-Dst[0].Register.WriteMask  TGSI_WRITEMASK_W) {
-  store_dest(mach, OneVec, inst-Dst[0], inst, TGSI_CHAN_W, 
TGSI_EXEC_DATA_FLOAT);
-   }
-}
-
-static void
 exec_xpd(struct tgsi_exec_machine *mach,
  const struct tgsi_full_instruction *inst)
 {
@@ -3782,10 +3730,6 @@ exec_instruction(
   assert (0);
   break;
 
-   case TGSI_OPCODE_RFL:
-  exec_rfl(mach, inst);
-  break;
-
case TGSI_OPCODE_SEQ:
   exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 94de670..bc64505 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -81,7 +81,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, COMP, PK2US, TGSI_OPCODE_PK2US },
{ 1, 1, 0, 0, 0, 0, COMP, PK4B, TGSI_OPCODE_PK4B },
{ 1, 1, 0, 0, 0, 0, COMP, PK4UB, TGSI_OPCODE_PK4UB },
-   { 1, 2, 0, 0, 0, 0, COMP, RFL, TGSI_OPCODE_RFL },
+   { 0, 1, 0, 0, 0, 1, NONE, , 44 },  /* removed */
{ 1, 2, 0, 0, 0, 0, COMP, SEQ

Mesa (master): gallium: Drop the unused ARA opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: ff886c49555c2033dd5fda50459cafaf16540f86
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ff886c49555c2033dd5fda50459cafaf16540f86

Author: Eric Anholt e...@anholt.net
Date:   Thu Nov 13 10:08:02 2014 -0800

gallium: Drop the unused ARA opcode.

Nothing in the tree generated it.

v2: Only drop ARA, not ARR as well.

Reviewed-by: Jose Fonseca jfons...@vmware.com (v2)

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 -
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |6 --
 src/gallium/auxiliary/tgsi/tgsi_exec.c  |4 
 src/gallium/auxiliary/tgsi/tgsi_info.c  |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 -
 src/gallium/docs/source/tgsi.rst|8 
 src/gallium/drivers/ilo/shader/toy_tgsi.c   |2 --
 src/gallium/drivers/r300/r300_tgsi_to_rc.c  |1 -
 src/gallium/drivers/r600/r600_shader.c  |6 +++---
 src/gallium/include/pipe/p_shader_tokens.h  |2 +-
 10 files changed, 5 insertions(+), 28 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
index 4a9ce37..44a44a6 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -212,7 +212,6 @@ lp_build_tgsi_inst_llvm(
case TGSI_OPCODE_UP4B:
case TGSI_OPCODE_UP4UB:
case TGSI_OPCODE_X2D:
-   case TGSI_OPCODE_ARA:
case TGSI_OPCODE_BRA:
case TGSI_OPCODE_PUSHA:
case TGSI_OPCODE_POPA:
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 3b9833a..ed1798d 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -798,12 +798,6 @@ lp_emit_instruction_aos(
   return FALSE;
   break;
 
-   case TGSI_OPCODE_ARA:
-  /* deprecated */
-  assert(0);
-  return FALSE;
-  break;
-
case TGSI_OPCODE_ARR:
   src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL);
   dst0 = lp_build_round(bld-bld_base.base, src0);
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index b3ea82f..578d4d8 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -3912,10 +3912,6 @@ exec_instruction(
   exec_x2d(mach, inst);
   break;
 
-   case TGSI_OPCODE_ARA:
-  assert (0);
-  break;
-
case TGSI_OPCODE_ARR:
   exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, 
TGSI_EXEC_DATA_FLOAT);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi/tgsi_info.c
index d17426f..b94f5ac 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -97,7 +97,7 @@ static const struct tgsi_opcode_info 
opcode_info[TGSI_OPCODE_LAST] =
{ 1, 1, 0, 0, 0, 0, COMP, UP4B, TGSI_OPCODE_UP4B },
{ 1, 1, 0, 0, 0, 0, COMP, UP4UB, TGSI_OPCODE_UP4UB },
{ 1, 3, 0, 0, 0, 0, COMP, X2D, TGSI_OPCODE_X2D },
-   { 1, 1, 0, 0, 0, 0, COMP, ARA, TGSI_OPCODE_ARA },
+   { 0, 1, 0, 0, 0, 1, NONE, , 60 },  /* removed */
{ 1, 1, 0, 0, 0, 0, COMP, ARR, TGSI_OPCODE_ARR },
{ 0, 1, 0, 0, 0, 0, NONE, BRA, TGSI_OPCODE_BRA },
{ 0, 0, 0, 1, 0, 0, NONE, CAL, TGSI_OPCODE_CAL },
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h 
b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
index 33cf38b..2ac9031 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -112,7 +112,6 @@ OP11(UP2US)
 OP11(UP4B)
 OP11(UP4UB)
 OP13(X2D)
-OP11(ARA)
 OP11(ARR)
 OP01(BRA)
 OP00_LBL(CAL)
diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst
index c912ec5..2138b18 100644
--- a/src/gallium/docs/source/tgsi.rst
+++ b/src/gallium/docs/source/tgsi.rst
@@ -701,14 +701,6 @@ This instruction replicates its result.
Considered for removal.
 
 
-.. opcode:: ARA - Address Register Add
-
-  TBD
-
-.. note::
-
-   Considered for removal.
-
 .. opcode:: ARR - Address Register Load With Round
 
 .. math::
diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c 
b/src/gallium/drivers/ilo/shader/toy_tgsi.c
index 1bf9f21..b71d577 100644
--- a/src/gallium/drivers/ilo/shader/toy_tgsi.c
+++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c
@@ -854,7 +854,6 @@ static const toy_tgsi_translate 
aos_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_UP4B] = aos_unsupported,
[TGSI_OPCODE_UP4UB]= aos_unsupported,
[TGSI_OPCODE_X2D]  = aos_unsupported,
-   [TGSI_OPCODE_ARA]  = aos_unsupported,
[TGSI_OPCODE_ARR]  = aos_simple,
[TGSI_OPCODE_BRA]  = aos_unsupported,
[TGSI_OPCODE_CAL]  = aos_unsupported,
@@ -1404,7 +1403,6 @@ static const toy_tgsi_translate 
soa_translate_table[TGSI_OPCODE_LAST] = {
[TGSI_OPCODE_UP4B] = soa_unsupported

Mesa (master): gallium: Drop the unused CND opcode.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 365a4a3f9a80d1b7a6d030d2921578dfc5c899c6
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=365a4a3f9a80d1b7a6d030d2921578dfc5c899c6

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:23:59 2014 -0800

gallium: Drop the unused CND opcode.

Nothing in the tree generates it.

Reviewed-by: Jose Fonseca jfons...@vmware.com

---

 src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c |   19 ---
 src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|9 -
 src/gallium/auxiliary/tgsi/tgsi_exec.c |   16 
 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +-
 src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h   |1 -
 src/gallium/auxiliary/tgsi/tgsi_util.c |1 -
 src/gallium/docs/source/tgsi.rst   |   13 -
 src/gallium/drivers/ilo/shader/toy_tgsi.c  |   17 -
 src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 -
 src/gallium/drivers/r600/r600_shader.c |6 +++---
 src/gallium/include/pipe/p_shader_tokens.h |2 +-
 11 files changed, 5 insertions(+), 82 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
index d0f1a7c..9cb42b2 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -1057,24 +1057,6 @@ ucmp_emit_cpu(
   cond, emit_data-args[1], emit_data-args[2]);
 }
 
-
-/* TGSI_OPCODE_CND (CPU Only) */
-static void
-cnd_emit_cpu(
-   const struct lp_build_tgsi_action * action,
-   struct lp_build_tgsi_context * bld_base,
-   struct lp_build_emit_data * emit_data)
-{
-   LLVMValueRef half, tmp;
-   half = lp_build_const_vec(bld_base-base.gallivm, bld_base-base.type, 0.5);
-   tmp = lp_build_cmp(bld_base-base, PIPE_FUNC_GREATER,
-  emit_data-args[2], half);
-   emit_data-output[emit_data-chan] = lp_build_select(bld_base-base,
-  tmp,
-  emit_data-args[0],
-  emit_data-args[1]);
-}
-
 /* TGSI_OPCODE_COS (CPU Only) */
 static void
 cos_emit_cpu(
@@ -1821,7 +1803,6 @@ lp_set_default_actions_cpu(
bld_base-op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu;
bld_base-op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu;
bld_base-op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu;
-   bld_base-op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu;
bld_base-op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu;
bld_base-op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu;
bld_base-op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu;
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c 
b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
index 9e468f9..2ef5db1 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c
@@ -620,15 +620,6 @@ lp_emit_instruction_aos(
   dst0 = lp_build_add(bld-bld_base.base, tmp0, src2);
   break;
 
-   case TGSI_OPCODE_CND:
-  src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL);
-  src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL);
-  src2 = lp_build_emit_fetch(bld-bld_base, inst, 2, LP_CHAN_ALL);
-  tmp1 = lp_build_const_vec(bld-bld_base.base.gallivm, 
bld-bld_base.base.type, 0.5);
-  tmp0 = lp_build_cmp(bld-bld_base.base, PIPE_FUNC_GREATER, src2, tmp1);
-  dst0 = lp_build_select(bld-bld_base.base, tmp0, src0, src1);
-  break;
-
case TGSI_OPCODE_DP2A:
   return FALSE;
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c 
b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ec1374a..834568b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -137,18 +137,6 @@ micro_cmp(union tgsi_exec_channel *dst,
 }
 
 static void
-micro_cnd(union tgsi_exec_channel *dst,
-  const union tgsi_exec_channel *src0,
-  const union tgsi_exec_channel *src1,
-  const union tgsi_exec_channel *src2)
-{
-   dst-f[0] = src2-f[0]  0.5f ? src0-f[0] : src1-f[0];
-   dst-f[1] = src2-f[1]  0.5f ? src0-f[1] : src1-f[1];
-   dst-f[2] = src2-f[2]  0.5f ? src0-f[2] : src1-f[2];
-   dst-f[3] = src2-f[3]  0.5f ? src0-f[3] : src1-f[3];
-}
-
-static void
 micro_cos(union tgsi_exec_channel *dst,
   const union tgsi_exec_channel *src)
 {
@@ -3603,10 +3591,6 @@ exec_instruction(
   exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
   break;
 
-   case TGSI_OPCODE_CND:
-  exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
-  break;
-
case TGSI_OPCODE_SQRT:
   exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, 
TGSI_EXEC_DATA_FLOAT);
   break;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c 
b/src/gallium/auxiliary/tgsi

Mesa (master): nouveau: Fix build after STR/BRA opcode dropping.

2014-11-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 93d30ff5d628770fcff5a931401fe76fbd8242df
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=93d30ff5d628770fcff5a931401fe76fbd8242df

Author: Eric Anholt e...@anholt.net
Date:   Mon Nov 24 15:22:25 2014 -0800

nouveau: Fix build after STR/BRA opcode dropping.

I missed these while git grepping for users of the dead opcodes.  Sigh,
macros.

---

 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |2 --
 1 file changed, 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index aa7390b..39a70aa 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -538,12 +538,10 @@ static nv50_ir::operation translateOpcode(uint opcode)
NV50_IR_OPCODE_CASE(SIN, SIN);
NV50_IR_OPCODE_CASE(SLE, SET);
NV50_IR_OPCODE_CASE(SNE, SET);
-   NV50_IR_OPCODE_CASE(STR, SET);
NV50_IR_OPCODE_CASE(TEX, TEX);
NV50_IR_OPCODE_CASE(TXD, TXD);
NV50_IR_OPCODE_CASE(TXP, TEX);
 
-   NV50_IR_OPCODE_CASE(BRA, BRA);
NV50_IR_OPCODE_CASE(CAL, CALL);
NV50_IR_OPCODE_CASE(RET, RET);
NV50_IR_OPCODE_CASE(CMP, SLCT);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Update for new kernel ABI with async execution and waits.

2014-11-20 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 21577571b37e68edc0422fbf80932588a4614abc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=21577571b37e68edc0422fbf80932588a4614abc

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 19 17:39:04 2014 -0800

vc4: Update for new kernel ABI with async execution and waits.

Our submits now return immediately and you have to manually wait for
things to complete if you want to (like a normal driver).

---

 src/gallium/drivers/vc4/Makefile.sources |1 +
 src/gallium/drivers/vc4/vc4_bufmgr.c |   65 +-
 src/gallium/drivers/vc4/vc4_bufmgr.h |   10 ++-
 src/gallium/drivers/vc4/vc4_context.c|   10 +++
 src/gallium/drivers/vc4/vc4_context.h|3 +
 src/gallium/drivers/vc4/vc4_drm.h|   38 +++
 src/gallium/drivers/vc4/vc4_fence.c  |  108 ++
 src/gallium/drivers/vc4/vc4_resource.c   |5 +-
 src/gallium/drivers/vc4/vc4_screen.h |   13 
 9 files changed, 250 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/Makefile.sources 
b/src/gallium/drivers/vc4/Makefile.sources
index 2336565..6ec48ab 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -9,6 +9,7 @@ C_SOURCES := \
vc4_draw.c \
vc4_drm.h \
vc4_emit.c \
+   vc4_fence.c \
vc4_formats.c \
vc4_opt_algebraic.c \
vc4_opt_copy_propagation.c \
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c 
b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 33592e8..3b73ac8 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -152,8 +152,57 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
 return true;
 }
 
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+struct drm_vc4_wait_seqno wait;
+memset(wait, 0, sizeof(wait));
+wait.seqno = seqno;
+wait.timeout_ns = timeout_ns;
+
+int ret = drmIoctl(screen-fd, DRM_IOCTL_VC4_WAIT_SEQNO, wait);
+if (ret == -ETIME) {
+return false;
+} else if (ret != 0) {
+fprintf(stderr, wait failed\n);
+abort();
+} else {
+screen-finished_seqno = wait.seqno;
+return true;
+}
+#else
+return true;
+#endif
+}
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns)
+{
+#ifndef USE_VC4_SIMULATOR
+struct vc4_screen *screen = bo-screen;
+
+struct drm_vc4_wait_bo wait;
+memset(wait, 0, sizeof(wait));
+wait.handle = bo-handle;
+wait.timeout_ns = timeout_ns;
+
+int ret = drmIoctl(screen-fd, DRM_IOCTL_VC4_WAIT_BO, wait);
+if (ret == -ETIME) {
+return false;
+} else if (ret != 0) {
+fprintf(stderr, wait failed\n);
+abort();
+} else {
+return true;
+}
+#else
+return true;
+#endif
+}
+
 void *
-vc4_bo_map(struct vc4_bo *bo)
+vc4_bo_map_unsynchronized(struct vc4_bo *bo)
 {
 int ret;
 
@@ -179,3 +228,17 @@ vc4_bo_map(struct vc4_bo *bo)
 
 return bo-map;
 }
+
+void *
+vc4_bo_map(struct vc4_bo *bo)
+{
+void *map = vc4_bo_map_unsynchronized(bo);
+
+bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE);
+if (!ok) {
+fprintf(stderr, BO wait for map failed\n);
+abort();
+}
+
+return map;
+}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h 
b/src/gallium/drivers/vc4/vc4_bufmgr.h
index 00ea149..4a1d4a4 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -78,9 +78,17 @@ vc4_bo_unreference(struct vc4_bo **bo)
 *bo = NULL;
 }
 
-
 void *
 vc4_bo_map(struct vc4_bo *bo);
 
+void *
+vc4_bo_map_unsynchronized(struct vc4_bo *bo);
+
+bool
+vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
+
+bool
+vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
+
 #endif /* VC4_BUFMGR_H */
 
diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index a6becaf..bb30c0e 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -322,6 +322,8 @@ vc4_flush(struct pipe_context *pctx)
 }
 }
 
+vc4-last_emit_seqno = submit.seqno;
+
 vc4_reset_cl(vc4-bcl);
 vc4_reset_cl(vc4-rcl);
 vc4_reset_cl(vc4-shader_rec);
@@ -350,7 +352,15 @@ static void
 vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
unsigned flags)
 {
+struct vc4_context *vc4 = vc4_context(pctx);
+
 vc4_flush(pctx);
+
+if (fence) {
+struct vc4_fence *f = vc4_fence_create(vc4-screen,
+   vc4-last_emit_seqno);
+*fence = (struct

Mesa (master): vc4: Emit semaphore instructions for new kernel ABI.

2014-11-18 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 82e919d33bbe508b3e1ba883a01ef2512dbc8f72
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=82e919d33bbe508b3e1ba883a01ef2512dbc8f72

Author: Eric Anholt e...@anholt.net
Date:   Tue Nov 18 12:16:55 2014 -0800

vc4: Emit semaphore instructions for new kernel ABI.

Previously, the kernel would dispatch thread 0, wait, then dispatch thread
1.  By insisting that the thread contents use semaphores in the right
place, the kernel can sleep for longer by dispatching both threads at
once.

---

 src/gallium/drivers/vc4/kernel/vc4_drv.h  |2 +
 src/gallium/drivers/vc4/kernel/vc4_validate.c |   77 -
 src/gallium/drivers/vc4/vc4_context.c |   14 -
 3 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h 
b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index b0eb3f0..81ffa03 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -82,6 +82,8 @@ struct exec_info {
bool found_tile_binning_mode_config_packet;
bool found_tile_rendering_mode_config_packet;
bool found_start_tile_binning_packet;
+   bool found_increment_semaphore_packet;
+   bool found_wait_on_semaphore_packet;
uint8_t bin_tiles_x, bin_tiles_y;
uint32_t fb_width, fb_height;
uint32_t tile_alloc_init_block_size;
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index 8b04eb9..ba6e46f 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -203,6 +203,18 @@ check_tex_size(struct exec_info *exec, struct 
drm_gem_cma_object *fbo,
 }
 
 static int
+validate_flush_all(VALIDATE_ARGS)
+{
+   if (exec-found_increment_semaphore_packet) {
+   DRM_ERROR(VC4_PACKET_FLUSH_ALL after 
+ VC4_PACKET_INCREMENT_SEMAPHORE\n);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
 validate_start_tile_binning(VALIDATE_ARGS)
 {
if (exec-found_start_tile_binning_packet) {
@@ -220,6 +232,41 @@ validate_start_tile_binning(VALIDATE_ARGS)
 }
 
 static int
+validate_increment_semaphore(VALIDATE_ARGS)
+{
+   if (exec-found_increment_semaphore_packet) {
+   DRM_ERROR(Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n);
+   return -EINVAL;
+   }
+   exec-found_increment_semaphore_packet = true;
+
+   /* Once we've found the semaphore increment, there should be one FLUSH
+* then the end of the command list.  The FLUSH actually triggers the
+* increment, so we only need to make sure there
+*/
+
+   return 0;
+}
+
+static int
+validate_wait_on_semaphore(VALIDATE_ARGS)
+{
+   if (exec-found_wait_on_semaphore_packet) {
+   DRM_ERROR(Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n);
+   return -EINVAL;
+   }
+   exec-found_wait_on_semaphore_packet = true;
+
+   if (!exec-found_increment_semaphore_packet) {
+   DRM_ERROR(VC4_PACKET_WAIT_ON_SEMAPHORE without 
+ VC4_PACKET_INCREMENT_SEMAPHORE\n);
+   return -EINVAL;
+   }
+
+   return 0;
+}
+
+static int
 validate_branch_to_sublist(VALIDATE_ARGS)
 {
struct drm_gem_cma_object *target;
@@ -233,6 +280,11 @@ validate_branch_to_sublist(VALIDATE_ARGS)
return -EINVAL;
}
 
+   if (!exec-found_wait_on_semaphore_packet) {
+   DRM_ERROR(Jumping to tile alloc before binning finished.\n);
+   return -EINVAL;
+   }
+
offset = *(uint32_t *)(untrusted + 0);
if (offset % exec-tile_alloc_init_block_size ||
offset / exec-tile_alloc_init_block_size 
@@ -322,6 +374,11 @@ validate_indexed_prim_list(VALIDATE_ARGS)
uint32_t index_size = (*(uint8_t *)(untrusted + 0)  4) ? 2 : 1;
struct vc4_shader_state *shader_state;
 
+   if (exec-found_increment_semaphore_packet) {
+   DRM_ERROR(Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n);
+   return -EINVAL;
+   }
+
/* Check overflow condition */
if (exec-shader_state_count == 0) {
DRM_ERROR(shader state must precede primitives\n);
@@ -355,6 +412,11 @@ validate_gl_array_primitive(VALIDATE_ARGS)
uint32_t max_index;
struct vc4_shader_state *shader_state;
 
+   if (exec-found_increment_semaphore_packet) {
+   DRM_ERROR(Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n);
+   return -EINVAL;
+   }
+
/* Check overflow condition */
if (exec-shader_state_count == 0) {
DRM_ERROR(shader state must precede primitives\n);
@@ -600,10 +662,10 @@ static const struct cmd_info {
[VC4_PACKET_HALT] = { 1, 1, 1, halt, NULL },
[VC4_PACKET_NOP] = { 1, 1, 1, nop, NULL },
[VC4_PACKET_FLUSH] = { 1, 1, 1, flush, NULL

Mesa (master): vc4: Mark a big array as const.

2014-11-18 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 05f165b62d1adce60b18783407c80b9fa2efa533
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=05f165b62d1adce60b18783407c80b9fa2efa533

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct 29 15:15:29 2014 -0700

vc4: Mark a big array as const.

Drops 1kb of code from this inner loop, in exchange for 2.5k of data.

---

 src/gallium/drivers/vc4/vc4_program.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index ef0d5b8..5fab1de 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1247,7 +1247,7 @@ static void
 emit_tgsi_instruction(struct vc4_compile *c,
   struct tgsi_full_instruction *tgsi_inst)
 {
-struct {
+static const struct {
 enum qop op;
 struct qreg (*func)(struct vc4_compile *c,
 struct tgsi_full_instruction *tgsi_inst,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Avoid reusing a pointer from c-outputs[] after add_output().

2014-11-12 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: b3d269f5ae1844b542d8e875d7177c5eff3a29f2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3d269f5ae1844b542d8e875d7177c5eff3a29f2

Author: Eric Anholt e...@anholt.net
Date:   Wed Nov 12 14:14:32 2014 -0800

vc4: Avoid reusing a pointer from c-outputs[] after add_output().

add_output() can resize the qreg array, so we might use a stale pointer.

---

 src/gallium/drivers/vc4/vc4_program.c |   11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 1cc6e96..ef0d5b8 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1822,12 +1822,11 @@ emit_stub_vpm_read(struct vc4_compile *c)
 static void
 emit_ucp_clipdistance(struct vc4_compile *c)
 {
-struct qreg *clipvertex;
-
+unsigned cv;
 if (c-output_clipvertex_index != -1)
-clipvertex = c-outputs[c-output_clipvertex_index];
+cv = c-output_clipvertex_index;
 else if (c-output_position_index != -1)
-clipvertex = c-outputs[c-output_position_index];
+cv = c-output_position_index;
 else
 return;
 
@@ -1846,12 +1845,14 @@ emit_ucp_clipdistance(struct vc4_compile *c)
plane,
TGSI_SWIZZLE_X);
 
+
 struct qreg dist = qir_uniform_f(c, 0.0);
 for (int i = 0; i  4; i++) {
+struct qreg pos_chan = c-outputs[cv + i];
 struct qreg ucp =
 add_uniform(c, QUNIFORM_USER_CLIP_PLANE,
 plane * 4 + i);
-dist = qir_FADD(c, dist, qir_FMUL(c, clipvertex[i], 
ucp));
+dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp));
 }
 
 c-outputs[output_index] = dist;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix assumption of TGSI OUT[0] being POSITION in the VS.

2014-11-12 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: acc1cca7ae35e9e7fb55b4c05fd80564253e1634
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=acc1cca7ae35e9e7fb55b4c05fd80564253e1634

Author: Eric Anholt e...@anholt.net
Date:   Fri Nov  7 14:26:32 2014 -0800

vc4: Fix assumption of TGSI OUT[0] being POSITION in the VS.

All the shaders we've received so far had this be the case, but with
nir-to-tgsi that changed.

I might decide to make nir-to-tgsi keep the outputs in the same order, for
debugging sanity, but I'm not sure.

---

 src/gallium/drivers/vc4/vc4_program.c |   10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 72bbcd8..1cc6e96 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1751,7 +1751,7 @@ emit_scaled_viewport_write(struct vc4_compile *c, struct 
qreg rcp_w)
 
 xyi[i] = qir_FTOI(c, qir_FMUL(c,
   qir_FMUL(c,
-   c-outputs[i],
+   
c-outputs[c-output_position_index + i],
scale),
   rcp_w));
 }
@@ -1766,7 +1766,7 @@ emit_zs_write(struct vc4_compile *c, struct qreg rcp_w)
 struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
 
 qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c,
-  c-outputs[2],
+  
c-outputs[c-output_position_index + 2],
   zscale),
   zoffset),
   rcp_w));
@@ -1863,7 +1863,7 @@ emit_vert_end(struct vc4_compile *c,
   struct vc4_varying_semantic *fs_inputs,
   uint32_t num_fs_inputs)
 {
-struct qreg rcp_w = qir_RCP(c, c-outputs[3]);
+struct qreg rcp_w = qir_RCP(c, c-outputs[c-output_position_index + 
3]);
 
 emit_stub_vpm_read(c);
 emit_ucp_clipdistance(c);
@@ -1900,12 +1900,12 @@ emit_vert_end(struct vc4_compile *c,
 static void
 emit_coord_end(struct vc4_compile *c)
 {
-struct qreg rcp_w = qir_RCP(c, c-outputs[3]);
+struct qreg rcp_w = qir_RCP(c, c-outputs[c-output_position_index + 
3]);
 
 emit_stub_vpm_read(c);
 
 for (int i = 0; i  4; i++)
-qir_VPM_WRITE(c, c-outputs[i]);
+qir_VPM_WRITE(c, c-outputs[c-output_position_index + i]);
 
 emit_scaled_viewport_write(c, rcp_w);
 emit_zs_write(c, rcp_w);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Don't forget to validate code that' s got PROG_END on it.

2014-10-28 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 8911879dec564ab2ef343d58e6de4fd558e35c3d
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8911879dec564ab2ef343d58e6de4fd558e35c3d

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 20:50:20 2014 +0100

vc4: Don't forget to validate code that's got PROG_END on it.

This signal doesn't terminate the program now, it terminates the program
soon.  So you have to actually validate the code in the instruction.

---

 src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c |   11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index 56c4a17..fdce033 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -261,6 +261,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
case QPU_SIG_COLOR_LOAD:
case QPU_SIG_LOAD_TMU0:
case QPU_SIG_LOAD_TMU1:
+   case QPU_SIG_PROG_END:
if (!check_instruction_writes(inst, validated_shader,
  validation_state)) {
DRM_ERROR(Bad write at ip %d\n, ip);
@@ -270,6 +271,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
if (!check_instruction_reads(inst, validated_shader))
goto fail;
 
+   if (sig == QPU_SIG_PROG_END) {
+   found_shader_end = true;
+   shader_end_ip = ip;
+   }
+
break;
 
case QPU_SIG_LOAD_IMM:
@@ -280,11 +286,6 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
}
break;
 
-   case QPU_SIG_PROG_END:
-   found_shader_end = true;
-   shader_end_ip = ip;
-   break;
-
default:
DRM_ERROR(Unsupported QPU signal %d at 
  instruction %d\n, sig, ip);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add support for ARL and indirect register access on TGSI_FILE_CONSTANT.

2014-10-28 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: f87c7008958cdb095efa1cfb29ca8f3c9b9066e4
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=f87c7008958cdb095efa1cfb29ca8f3c9b9066e4

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct  1 18:27:24 2014 -0700

vc4: Add support for ARL and indirect register access on TGSI_FILE_CONSTANT.

Fixes 14 ARB_vp tests (which had no lowering done), and should improve
performance of indirect uniform array access in GLSL.

---

 src/gallium/drivers/vc4/kernel/vc4_drv.h   |1 +
 src/gallium/drivers/vc4/kernel/vc4_validate.c  |   20 ++-
 .../drivers/vc4/kernel/vc4_validate_shaders.c  |  188 +---
 src/gallium/drivers/vc4/vc4_context.h  |   23 +++
 src/gallium/drivers/vc4/vc4_opt_dead_code.c|3 +-
 src/gallium/drivers/vc4/vc4_program.c  |  152 +++-
 src/gallium/drivers/vc4/vc4_qir.c  |1 +
 src/gallium/drivers/vc4/vc4_qir.h  |   45 +
 src/gallium/drivers/vc4/vc4_qpu_emit.c |5 +
 src/gallium/drivers/vc4/vc4_screen.c   |3 +-
 10 files changed, 407 insertions(+), 34 deletions(-)

diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h 
b/src/gallium/drivers/vc4/kernel/vc4_drv.h
index 45d9c40..b0eb3f0 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_drv.h
+++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h
@@ -128,6 +128,7 @@ struct exec_info {
  * Setup) for definitions of the texture parameters.
  */
 struct vc4_texture_sample_info {
+   bool is_direct;
uint32_t p_offset[4];
 };
 
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index 977e071..8b04eb9 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -767,6 +767,23 @@ reloc_tex(struct exec_info *exec,
uint32_t cube_map_stride = 0;
enum vc4_texture_data_type type;
 
+   if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, tex))
+   return false;
+
+   if (sample-is_direct) {
+   uint32_t remaining_size = tex-base.size - p0;
+   if (p0  tex-base.size - 4) {
+   DRM_ERROR(UBO offset greater than UBO size\n);
+   return false;
+   }
+   if (p1  remaining_size - 4) {
+   DRM_ERROR(UBO clamp would allow reads outside of 
UBO\n);
+   return false;
+   }
+   *validated_p0 = tex-paddr + p0;
+   return true;
+   }
+
if (width == 0)
width = 2048;
if (height == 0)
@@ -832,9 +849,6 @@ reloc_tex(struct exec_info *exec,
tiling_format = VC4_TILING_FORMAT_T;
}
 
-   if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, tex))
-   return false;
-
if (!check_tex_size(exec, tex, offset + cube_map_stride * 5,
tiling_format, width, height, cpp)) {
return false;
diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
index 03c7f23..e797c59 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c
@@ -51,8 +51,39 @@
 struct vc4_shader_validation_state {
struct vc4_texture_sample_info tmu_setup[2];
int tmu_write_count[2];
+
+   /* For registers that were last written to by a MIN instruction with
+* one argument being a uniform, the address of the uniform.
+* Otherwise, ~0.
+*
+* This is used for the validation of direct address memory reads.
+*/
+   uint32_t live_clamp_offsets[32 + 32 + 4];
 };
 
+static uint32_t
+waddr_to_live_reg_index(uint32_t waddr, bool is_b)
+{
+   if (waddr  32) {
+   if (is_b)
+   return 32 + waddr;
+   else
+   return waddr;
+   } else if (waddr = QPU_W_ACC3) {
+
+   return 64 + waddr - QPU_W_ACC0;
+   } else {
+   return ~0;
+   }
+}
+
+static bool
+is_tmu_submit(uint32_t waddr)
+{
+   return (waddr == QPU_W_TMU0_S ||
+   waddr == QPU_W_TMU1_S);
+}
+
 static bool
 is_tmu_write(uint32_t waddr)
 {
@@ -75,24 +106,86 @@ record_validated_texture_sample(struct 
vc4_validated_shader_info *validated_shad
if (!temp_samples)
return false;
 
-   memcpy(temp_samples[s].p_offset,
-  validation_state-tmu_setup[tmu].p_offset,
-  validation_state-tmu_write_count[tmu] * sizeof(uint32_t));
-   for (i = validation_state-tmu_write_count[tmu]; i  4; i++)
-   temp_samples[s].p_offset[i] = ~0;
+   memcpy(temp_samples[s],
+  validation_state-tmu_setup[tmu],
+  sizeof(*temp_samples));
 
validated_shader

Mesa (master): vc4: Add .dir-locals.el for kernel style in the kernel code.

2014-10-28 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: fc1eb614a70a777be0f4f8ada194bab53105999b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc1eb614a70a777be0f4f8ada194bab53105999b

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 20:49:27 2014 +0100

vc4: Add .dir-locals.el for kernel style in the kernel code.

---

 src/gallium/drivers/vc4/kernel/.dir-locals.el |   12 
 1 file changed, 12 insertions(+)

diff --git a/src/gallium/drivers/vc4/kernel/.dir-locals.el 
b/src/gallium/drivers/vc4/kernel/.dir-locals.el
new file mode 100644
index 000..2e58e90
--- /dev/null
+++ b/src/gallium/drivers/vc4/kernel/.dir-locals.el
@@ -0,0 +1,12 @@
+((nil
+  (indent-tabs-mode . t)
+  (tab-width . 8)
+  (c-basic-offset . 8)
+  (c-file-style . stroustrup)
+  (fill-column . 78)
+  (eval . (progn
+   (c-set-offset 'innamespace '0)
+   (c-set-offset 'inline-open '0)))
+  )
+ (makefile-mode (indent-tabs-mode . t))
+ )

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Allow dead code elimination of unused varyings.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 52824811b9c0a9bb78a40fcb43af00b315f612d0
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=52824811b9c0a9bb78a40fcb43af00b315f612d0

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 15:03:04 2014 +0100

vc4: Allow dead code elimination of unused varyings.

total instructions in shared programs: 39022 - 37341 (-4.31%)
instructions in affected programs: 26979 - 25298 (-6.23%)
total uniforms in shared programs: 11242 - 10523 (-6.40%)
uniforms in affected programs: 5836 - 5117 (-12.32%)

---

 src/gallium/drivers/vc4/vc4_opt_cse.c   |2 +-
 src/gallium/drivers/vc4/vc4_opt_dead_code.c |2 +-
 src/gallium/drivers/vc4/vc4_program.c   |   15 +++
 src/gallium/drivers/vc4/vc4_qir.c   |   15 +--
 src/gallium/drivers/vc4/vc4_qir.h   |2 +-
 5 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c 
b/src/gallium/drivers/vc4/vc4_opt_cse.c
index d3ef910..bebfb652 100644
--- a/src/gallium/drivers/vc4/vc4_opt_cse.c
+++ b/src/gallium/drivers/vc4/vc4_opt_cse.c
@@ -132,7 +132,7 @@ qir_opt_cse(struct vc4_compile *c)
 foreach_s(node, t, c-instructions) {
 struct qinst *inst = (struct qinst *)node;
 
-if (qir_has_side_effects(inst)) {
+if (qir_has_side_effects(c, inst)) {
 if (inst-op == QOP_TLB_DISCARD_SETUP)
 last_sf = NULL;
 continue;
diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c 
b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
index f08818a..d958dcb 100644
--- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c
+++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c
@@ -63,7 +63,7 @@ qir_opt_dead_code(struct vc4_compile *c)
 
 if (inst-dst.file == QFILE_TEMP 
 !used[inst-dst.index] 
-(!qir_has_side_effects(inst) ||
+(!qir_has_side_effects(c, inst) ||
  inst-op == QOP_TEX_RESULT)) {
 if (inst-op == QOP_TEX_RESULT) {
 dce_tex = true;
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 01941f8..0674e4f 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2018,6 +2018,18 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 
 shader-program_id = vc4-next_compiled_program_id++;
 if (stage == QSTAGE_FRAG) {
+bool input_live[c-num_input_semantics];
+struct simple_node *node;
+
+memset(input_live, 0, sizeof(input_live));
+foreach(node, c-instructions) {
+struct qinst *inst = (struct qinst *)node;
+for (int i = 0; i  qir_get_op_nsrc(inst-op); i++) {
+if (inst-src[i].file == QFILE_VARY)
+input_live[inst-src[i].index] = true;
+}
+}
+
 shader-input_semantics = ralloc_array(shader,
struct 
vc4_varying_semantic,
c-num_input_semantics);
@@ -2025,6 +2037,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum 
qstage stage,
 for (int i = 0; i  c-num_input_semantics; i++) {
 struct vc4_varying_semantic *sem = 
c-input_semantics[i];
 
+if (!input_live[i])
+continue;
+
 /* Skip non-VS-output inputs. */
 if (sem-semantic == (uint8_t)~0)
 continue;
diff --git a/src/gallium/drivers/vc4/vc4_qir.c 
b/src/gallium/drivers/vc4/vc4_qir.c
index 9c7c15e..a7a4d96 100644
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -122,12 +122,23 @@ qir_get_op_nsrc(enum qop qop)
 abort();
 }
 
+/**
+ * Returns whether the instruction has any side effects that must be
+ * preserved.
+ */
 bool
-qir_has_side_effects(struct qinst *inst)
+qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
 {
+/* We can dead-code eliminate varyings, because we only tell the VS
+ * about the live ones at the end.  But we have to preserve the
+ * point/line coordinates reads, because they're generated by
+ * fixed-function hardware.
+ */
 for (int i = 0; i  qir_get_op_nsrc(inst-op); i++) {
-if (inst-src[i].file == QFILE_VARY)
+if (inst-src[i].file == QFILE_VARY 
+c-input_semantics[inst-src[i].index].semantic == 0xff) {
 return true;
+}
 }
 
 return qir_op_info[inst-op].has_side_effects;
diff

Mesa (master): vc4: When asked to discard-map a whole resource, discard it.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 18ccda7b86b8f7ab7466265aefb3f3e773f4a757
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=18ccda7b86b8f7ab7466265aefb3f3e773f4a757

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 16:50:37 2014 +0100

vc4: When asked to discard-map a whole resource, discard it.

This saves a bunch of extra flushes when texsubimaging a whole texture
that's been used for rendering, or subdataing a whole BO.  In particular,
this massively reduces the runtime of piglit texture-packed-formats (when
the probes have been moved out of the inner loop).

---

 src/gallium/drivers/vc4/vc4_resource.c |   42 +---
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_resource.c 
b/src/gallium/drivers/vc4/vc4_resource.c
index 62667bf..b02e289 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -34,6 +34,20 @@
 #include vc4_tiling.h
 
 static void
+vc4_resource_bo_alloc(struct vc4_resource *rsc)
+{
+struct pipe_resource *prsc = rsc-base.b;
+struct pipe_screen *pscreen = prsc-screen;
+
+vc4_bo_unreference(rsc-bo);
+rsc-bo = vc4_bo_alloc(vc4_screen(pscreen),
+   rsc-slices[0].offset +
+   rsc-slices[0].size +
+   rsc-cube_map_stride * (prsc-array_size - 1),
+   resource);
+}
+
+static void
 vc4_resource_transfer_unmap(struct pipe_context *pctx,
 struct pipe_transfer *ptrans)
 {
@@ -75,14 +89,19 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
 char *buf;
 
 if (usage  PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
-uint32_t size = rsc-bo-size;
-vc4_bo_unreference(rsc-bo);
-rsc-bo = vc4_bo_alloc(vc4-screen, size, resource);
-}
-
-if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED)) {
-if (vc4_cl_references_bo(pctx, rsc-bo))
-vc4_flush(pctx);
+vc4_resource_bo_alloc(rsc);
+} else if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED)) {
+if (vc4_cl_references_bo(pctx, rsc-bo)) {
+if ((usage  PIPE_TRANSFER_DISCARD_RANGE) 
+prsc-last_level == 0 
+prsc-width0 == box-width 
+prsc-height0 == box-height 
+prsc-depth0 == box-depth) {
+vc4_resource_bo_alloc(rsc);
+} else {
+vc4_flush(pctx);
+}
+}
 }
 
 if (usage  PIPE_TRANSFER_WRITE)
@@ -324,12 +343,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
 }
 
 vc4_setup_slices(rsc);
-
-rsc-bo = vc4_bo_alloc(vc4_screen(pscreen),
-   rsc-slices[0].offset +
-   rsc-slices[0].size +
-   rsc-cube_map_stride * (prsc-array_size - 1),
-   resource);
+vc4_resource_bo_alloc(rsc);
 if (!rsc-bo)
 goto fail;
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add debug output to match shaderdb info to program dumps.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5d32e263357e562779bfc0d2af712d4c7538a32b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d32e263357e562779bfc0d2af712d4c7538a32b

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct 22 18:02:18 2014 +0100

vc4: Add debug output to match shaderdb info to program dumps.

I'm going to be using VC4_DEBUG=shaderdb,norast to do shaderdb stats, but
when debugging regressions, I want to match shaderdb output to shader
disassembly.

---

 src/gallium/drivers/vc4/vc4_context.h  |5 +
 src/gallium/drivers/vc4/vc4_program.c  |   24 ++--
 src/gallium/drivers/vc4/vc4_qir.h  |3 +++
 src/gallium/drivers/vc4/vc4_qpu_emit.c |4 +++-
 4 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index 45dfa02..d0b280a 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -79,6 +79,10 @@ struct vc4_shader_uniform_info {
 };
 
 struct vc4_uncompiled_shader {
+/** A name for this program, so you can track it in shader-db output. 
*/
+uint32_t program_id;
+/** How many variants of this program were compiled, for shader-db. */
+uint32_t compiled_variant_count;
 struct pipe_shader_state base;
 const struct tgsi_token *twoside_tokens;
 };
@@ -183,6 +187,7 @@ struct vc4_context {
 struct primconvert_context *primconvert;
 
 struct util_hash_table *fs_cache, *vs_cache;
+uint32_t next_uncompiled_program_id;
 uint64_t next_compiled_program_id;
 
 struct ra_regs *regs;
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index c6d9fb3..01941f8 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1827,6 +1827,8 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum 
qstage stage,
 
 c-stage = stage;
 c-shader_state = key-shader_state-base;
+c-program_id = key-shader_state-program_id;
+c-variant_id = key-shader_state-compiled_variant_count++;
 
 c-key = key;
 switch (stage) {
@@ -1874,7 +1876,9 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum 
qstage stage,
 assert(ret == TGSI_PARSE_OK);
 
 if (vc4_debug  VC4_DEBUG_TGSI) {
-fprintf(stderr, TGSI:\n);
+fprintf(stderr, %s prog %d/%d TGSI:\n,
+qir_get_stage_name(c-stage),
+c-program_id, c-variant_id);
 tgsi_dump(tokens, 0);
 }
 
@@ -1918,17 +1922,23 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum 
qstage stage,
 qir_optimize(c);
 
 if (vc4_debug  VC4_DEBUG_QIR) {
-fprintf(stderr, QIR:\n);
+fprintf(stderr, %s prog %d/%d QIR:\n,
+qir_get_stage_name(c-stage),
+c-program_id, c-variant_id);
 qir_dump(c);
 }
 qir_reorder_uniforms(c);
 vc4_generate_code(vc4, c);
 
 if (vc4_debug  VC4_DEBUG_SHADERDB) {
-fprintf(stderr, SHADER-DB: %s: %d instructions\n,
-qir_get_stage_name(c-stage), c-qpu_inst_count);
-fprintf(stderr, SHADER-DB: %s: %d uniforms\n,
-qir_get_stage_name(c-stage), c-num_uniforms);
+fprintf(stderr, SHADER-DB: %s prog %d/%d: %d instructions\n,
+qir_get_stage_name(c-stage),
+c-program_id, c-variant_id,
+c-qpu_inst_count);
+fprintf(stderr, SHADER-DB: %s prog %d/%d: %d uniforms\n,
+qir_get_stage_name(c-stage),
+c-program_id, c-variant_id,
+c-num_uniforms);
 }
 
 return c;
@@ -1938,6 +1948,7 @@ static void *
 vc4_shader_state_create(struct pipe_context *pctx,
 const struct pipe_shader_state *cso)
 {
+struct vc4_context *vc4 = vc4_context(pctx);
 struct vc4_uncompiled_shader *so = 
CALLOC_STRUCT(vc4_uncompiled_shader);
 if (!so)
 return NULL;
@@ -1961,6 +1972,7 @@ vc4_shader_state_create(struct pipe_context *pctx,
 so-base.tokens = tgsi_transform_lowering(lowering_config, 
cso-tokens, info);
 if (!so-base.tokens)
 so-base.tokens = tgsi_dup_tokens(cso-tokens);
+so-program_id = vc4-next_uncompiled_program_id++;
 
 return so;
 }
diff --git a/src/gallium/drivers/vc4/vc4_qir.h 
b/src/gallium/drivers/vc4/vc4_qir.h
index b95dbc3..c2f83a7 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -289,6 +289,9 @@ struct vc4_compile {
 uint32_t qpu_inst_count;
 uint32_t qpu_inst_size;
 uint32_t num_inputs;
+
+uint32_t program_id;
+uint32_t variant_id

Mesa (master): vc4: Refactor flushing before mapping a BO.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: a71c3b885a532016aa426b5bb753291cffe39a44
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a71c3b885a532016aa426b5bb753291cffe39a44

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 16:45:04 2014 +0100

vc4: Refactor flushing before mapping a BO.

I'm going to want to make some other decisions here before flushing.

---

 src/gallium/drivers/vc4/vc4_context.c  |   17 -
 src/gallium/drivers/vc4/vc4_context.h  |2 +-
 src/gallium/drivers/vc4/vc4_resource.c |6 --
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index 87f0251..b1f0f35 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -350,13 +350,13 @@ vc4_pipe_flush(struct pipe_context *pctx, struct 
pipe_fence_handle **fence,
  *
  * This helps avoid flushing the command buffers when unnecessary.
  */
-void
-vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo)
+bool
+vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo)
 {
 struct vc4_context *vc4 = vc4_context(pctx);
 
 if (!vc4-needs_flush)
-return;
+return false;
 
 /* Walk all the referenced BOs in the drawing command list to see if
  * they match.
@@ -365,8 +365,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo 
*bo)
 for (int i = 0; i  (vc4-bo_handles.next -
  vc4-bo_handles.base) / 4; i++) {
 if (referenced_bos[i] == bo) {
-vc4_flush(pctx);
-return;
+return true;
 }
 }
 
@@ -377,8 +376,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo 
*bo)
 if (csurf) {
 struct vc4_resource *ctex = vc4_resource(csurf-base.texture);
 if (ctex-bo == bo) {
-vc4_flush(pctx);
-return;
+return true;
 }
 }
 
@@ -387,10 +385,11 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo 
*bo)
 struct vc4_resource *ztex =
 vc4_resource(zsurf-base.texture);
 if (ztex-bo == bo) {
-vc4_flush(pctx);
-return;
+return true;
 }
 }
+
+return false;
 }
 
 static void
diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index d0b280a..9eaff8f 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -278,7 +278,7 @@ void vc4_write_uniforms(struct vc4_context *vc4,
 struct vc4_texture_stateobj *texstate);
 
 void vc4_flush(struct pipe_context *pctx);
-void vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo);
+bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo);
 void vc4_emit_state(struct pipe_context *pctx);
 void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c);
 struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct 
vc4_compile *c);
diff --git a/src/gallium/drivers/vc4/vc4_resource.c 
b/src/gallium/drivers/vc4/vc4_resource.c
index c198ab9..62667bf 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -80,8 +80,10 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
 rsc-bo = vc4_bo_alloc(vc4-screen, size, resource);
 }
 
-if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED))
-vc4_flush_for_bo(pctx, rsc-bo);
+if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED)) {
+if (vc4_cl_references_bo(pctx, rsc-bo))
+vc4_flush(pctx);
+}
 
 if (usage  PIPE_TRANSFER_WRITE)
 rsc-writes++;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Reuse uniform_data/ contents indices when making uniforms.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 8c7ac377b7a859705479a0b421d1dacc53ca240a
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c7ac377b7a859705479a0b421d1dacc53ca240a

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 24 17:16:59 2014 +0100

vc4: Reuse uniform_data/contents indices when making uniforms.

This allows vc4_opt_cse.c to CSE-away operations involving the same
uniform values.

total instructions in shared programs: 37341 - 36906 (-1.16%)
instructions in affected programs: 10233 - 9798 (-4.25%)
total uniforms in shared programs: 10523 - 10320 (-1.93%)
uniforms in affected programs: 2467 - 2264 (-8.23%)

---

 src/gallium/drivers/vc4/vc4_program.c |7 +++
 1 file changed, 7 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 0674e4f..c6b7edb 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -114,6 +114,13 @@ add_uniform(struct vc4_compile *c,
 enum quniform_contents contents,
 uint32_t data)
 {
+for (int i = 0; i  c-num_uniforms; i++) {
+if (c-uniform_contents[i] == contents 
+c-uniform_data[i] == data) {
+return (struct qreg) { QFILE_UNIF, i };
+}
+}
+
 uint32_t uniform = c-num_uniforms++;
 struct qreg u = { QFILE_UNIF, uniform };
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Translate 4-byte index buffers to 2 bytes.

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6212d2402df4ad0658cbb98ce889e35ef5f32fa3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6212d2402df4ad0658cbb98ce889e35ef5f32fa3

Author: Eric Anholt e...@anholt.net
Date:   Sat Oct 18 12:50:05 2014 +0100

vc4: Translate 4-byte index buffers to 2 bytes.

Fixes assertion failures in 14 piglit tests (half of which now pass).

---

 src/gallium/drivers/vc4/vc4_draw.c |   14 ++
 src/gallium/drivers/vc4/vc4_resource.c |   46 
 src/gallium/drivers/vc4/vc4_resource.h |   17 +++-
 src/gallium/drivers/vc4/vc4_state.c|   25 ++---
 4 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index f53caf7..e70506b 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -209,19 +209,23 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
  */
 if (info-indexed) {
 struct vc4_resource *rsc = vc4_resource(vc4-indexbuf.buffer);
-
-assert(vc4-indexbuf.index_size == 1 ||
-   vc4-indexbuf.index_size == 2);
+uint32_t offset = vc4-indexbuf.offset;
+uint32_t index_size = vc4-indexbuf.index_size;
+if (rsc-shadow_parent) {
+vc4_update_shadow_index_buffer(pctx, vc4-indexbuf);
+offset = 0;
+index_size = 2;
+}
 
 cl_start_reloc(vc4-bcl, 1);
 cl_u8(vc4-bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE);
 cl_u8(vc4-bcl,
   info-mode |
-  (vc4-indexbuf.index_size == 2 ?
+  (index_size == 2 ?
VC4_INDEX_BUFFER_U16:
VC4_INDEX_BUFFER_U8));
 cl_u32(vc4-bcl, info-count);
-cl_reloc(vc4, vc4-bcl, rsc-bo, vc4-indexbuf.offset);
+cl_reloc(vc4, vc4-bcl, rsc-bo, offset);
 cl_u32(vc4-bcl, max_index);
 } else {
 cl_u8(vc4-bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE);
diff --git a/src/gallium/drivers/vc4/vc4_resource.c 
b/src/gallium/drivers/vc4/vc4_resource.c
index 803d357..c198ab9 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -512,6 +512,52 @@ vc4_update_shadow_baselevel_texture(struct pipe_context 
*pctx,
 shadow-writes = orig-writes;
 }
 
+/**
+ * Converts a 4-byte index buffer to 2 bytes.
+ *
+ * Since GLES2 only has support for 1 and 2-byte indices, the hardware doesn't
+ * include 4-byte index support, and we have to shrink it down.
+ *
+ * There's no fallback support for when indices end up being larger than 2^16,
+ * though it will at least assertion fail.  Also, if the original index data
+ * was in user memory, it would be nice to not have uploaded it to a VBO
+ * before translating.
+ */
+void
+vc4_update_shadow_index_buffer(struct pipe_context *pctx,
+   const struct pipe_index_buffer *ib)
+{
+struct vc4_resource *shadow = vc4_resource(ib-buffer);
+struct vc4_resource *orig = vc4_resource(shadow-shadow_parent);
+uint32_t count = shadow-base.b.width0 / 2;
+
+if (shadow-writes == orig-writes)
+return;
+
+struct pipe_transfer *src_transfer;
+uint32_t *src = pipe_buffer_map_range(pctx, orig-base.b,
+  ib-offset,
+  count * 4,
+  PIPE_TRANSFER_READ, 
src_transfer);
+
+struct pipe_transfer *dst_transfer;
+uint16_t *dst = pipe_buffer_map_range(pctx, shadow-base.b,
+  0,
+  count * 2,
+  PIPE_TRANSFER_WRITE, 
dst_transfer);
+
+for (int i = 0; i  count; i++) {
+uint32_t src_index = src[i];
+assert(src_index = 0x);
+dst[i] = src_index;
+}
+
+pctx-transfer_unmap(pctx, dst_transfer);
+pctx-transfer_unmap(pctx, src_transfer);
+
+shadow-writes = orig-writes;
+}
+
 void
 vc4_resource_screen_init(struct pipe_screen *pscreen)
 {
diff --git a/src/gallium/drivers/vc4/vc4_resource.h 
b/src/gallium/drivers/vc4/vc4_resource.h
index 7cc16a4..90b58e1 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -67,9 +67,22 @@ struct vc4_resource {
  *
  * This is used to track when we need to update this shadow resource
  * from its parent in the case of GL_TEXTURE_BASE_LEVEL (which we
- * can't support in hardware).
+ * can't support in hardware) or GL_UNSIGNED_INTEGER index buffers

Mesa (master): vc4: Apply a Newton-Raphson step to improve RSQ

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 15eb4c59f6504473989e6a064fda11d6c009ed8f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=15eb4c59f6504473989e6a064fda11d6c009ed8f

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 15:28:02 2014 +0100

vc4: Apply a Newton-Raphson step to improve RSQ

Fixes all the piglit built-in-functions/*sqrt tests, among others.

---

 src/gallium/drivers/vc4/vc4_program.c |   22 --
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 0046b22..66dff97 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -313,6 +313,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c,
 }
 
 static struct qreg
+tgsi_to_qir_rsq(struct vc4_compile *c,
+struct tgsi_full_instruction *tgsi_inst,
+enum qop op, struct qreg *src, int i)
+{
+struct qreg x = src[0 * 4 + 0];
+struct qreg r = qir_RSQ(c, x);
+
+/* Apply a Newton-Raphson step to improve the accuracy. */
+r = qir_FMUL(c, r, qir_FSUB(c,
+qir_uniform_f(c, 1.5),
+qir_FMUL(c,
+ qir_uniform_f(c, 0.5),
+ qir_FMUL(c, x,
+  qir_FMUL(c, r, r);
+
+return r;
+}
+
+static struct qreg
 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
 {
 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
@@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c,
 [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
 [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
 
-[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
@@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
 [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
-[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar },
+[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq },
 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },
 [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp },

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: correctly include the source files

2014-10-24 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 79d09a4b125e745cc89f9bca336619cbd44d9f95
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d09a4b125e745cc89f9bca336619cbd44d9f95

Author: Emil Velikov emil.l.veli...@gmail.com
Date:   Tue Oct 14 16:10:50 2014 +0100

vc4: correctly include the source files

The kernel files are built into a separate static library and
all the functions that require it are already wrapped in ifdef
USE_VC4_SIMULATOR. Don't forget the header file :)

Signed-off-by: Emil Velikov emil.l.veli...@gmail.com
Reviewed-by: Eric Anholt e...@anholt.net

---

 src/gallium/drivers/vc4/Makefile.sources|3 ---
 src/gallium/drivers/vc4/kernel/Makefile.sources |1 +
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/Makefile.sources 
b/src/gallium/drivers/vc4/Makefile.sources
index 1befb9f..2336565 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -36,7 +36,4 @@ C_SOURCES := \
vc4_state.c \
vc4_tiling.c \
vc4_tiling.h \
-   kernel/vc4_gem.c \
-   kernel/vc4_validate.c \
-   kernel/vc4_validate_shaders.c \
$()
diff --git a/src/gallium/drivers/vc4/kernel/Makefile.sources 
b/src/gallium/drivers/vc4/kernel/Makefile.sources
index d1d70dd..7d17a89 100644
--- a/src/gallium/drivers/vc4/kernel/Makefile.sources
+++ b/src/gallium/drivers/vc4/kernel/Makefile.sources
@@ -1,4 +1,5 @@
 C_SOURCES := \
+   vc4_drv.h \
vc4_gem.c \
vc4_validate.c \
vc4_validate_shaders.c \

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Don' t look at back stencil state unless two-sided stencil is enabled.

2014-10-21 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 48f6351940ff62c29fff618cec722e845acc86d5
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=48f6351940ff62c29fff618cec722e845acc86d5

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 20 22:53:07 2014 +0100

vc4: Don't look at back stencil state unless two-sided stencil is enabled.

Fixes regressions in the next bugfix, because gallium util stuff leaves
the back stencil state as 0 if !back-enabled.

---

 src/gallium/drivers/vc4/vc4_state.c |8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_state.c 
b/src/gallium/drivers/vc4/vc4_state.c
index 31f2424..99b5b3c 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -203,12 +203,16 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context 
*pctx,
 
 uint8_t front_writemask_bits =
 tlb_stencil_setup_writemask(front-writemask);
-uint8_t back_writemask_bits =
-tlb_stencil_setup_writemask(back-writemask);
+uint8_t back_writemask = front-writemask;
+uint8_t back_writemask_bits = front_writemask_bits;
 
 so-stencil_uniforms[0] =
 tlb_stencil_setup_bits(front, front_writemask_bits);
 if (back-enabled) {
+back_writemask = back-writemask;
+back_writemask_bits =
+tlb_stencil_setup_writemask(back-writemask);
+
 so-stencil_uniforms[0] |= (1  30);
 so-stencil_uniforms[1] =
 tlb_stencil_setup_bits(back, 
back_writemask_bits);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix stencil writemask handling.

2014-10-21 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: cc298023c9b302a7a24ee01fe2da1c93f5b44666
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc298023c9b302a7a24ee01fe2da1c93f5b44666

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 20 21:14:57 2014 +0100

vc4: Fix stencil writemask handling.

If the writemask doesn't compress, then we want to put in the uncompressed
writemask, not the compressed writemask failure value (all-on).

Fixes glean's stencil2 and fbo-clear-formats on stencil.

---

 src/gallium/drivers/vc4/vc4_state.c |4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_state.c 
b/src/gallium/drivers/vc4/vc4_state.c
index 99b5b3c..73d 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -223,8 +223,8 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context 
*pctx,
 
 if (front_writemask_bits == 0xff ||
 back_writemask_bits == 0xff) {
-so-stencil_uniforms[2] = (front_writemask_bits |
-   (back_writemask_bits  8));
+so-stencil_uniforms[2] = (front-writemask |
+   (back_writemask  8));
 }
 }
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix SRC_ALPHA_SATURATE blending.

2014-10-21 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: ef280c95f2623357452f5ca8e65367b7ac30699f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef280c95f2623357452f5ca8e65367b7ac30699f

Author: Eric Anholt e...@anholt.net
Date:   Tue Oct 21 15:46:48 2014 +0100

vc4: Fix SRC_ALPHA_SATURATE blending.

Fixes glean blendFunc.

---

 src/gallium/drivers/vc4/vc4_program.c |   14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 7a2a975..c6d9fb3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1308,9 +1308,17 @@ vc4_blend_channel(struct vc4_compile *c,
 case PIPE_BLENDFACTOR_DST_COLOR:
 return qir_FMUL(c, val, dst[channel]);
 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
-return qir_FMIN(c, src[3], qir_FSUB(c,
-qir_uniform_f(c, 1.0),
-dst[3]));
+if (channel != 3) {
+return qir_FMUL(c,
+val,
+qir_FMIN(c,
+ src[3],
+ qir_FSUB(c,
+  qir_uniform_f(c, 
1.0),
+  dst[3])));
+} else {
+return val;
+}
 case PIPE_BLENDFACTOR_CONST_COLOR:
 return qir_FMUL(c, val,
 get_temp_for_uniform(c,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add support for rebasing texture levels so firstlevel == 0.

2014-10-19 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 572fba95e4cd85aebdde9bd757c17f719af2af04
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=572fba95e4cd85aebdde9bd757c17f719af2af04

Author: Eric Anholt e...@anholt.net
Date:   Thu Oct  2 22:14:03 2014 -0700

vc4: Add support for rebasing texture levels so firstlevel == 0.

GLES2 doesn't have GL_TEXTURE_BASE_LEVEL, so the hardware doesn't.  Fixes
piglit levelclamp, tex-miplevel-selection, and texture-storage/2D mipmap
rendering.

---

 src/gallium/drivers/vc4/vc4_context.c  |6 ++
 src/gallium/drivers/vc4/vc4_program.c  |3 ++-
 src/gallium/drivers/vc4/vc4_resource.c |   37 +++-
 src/gallium/drivers/vc4/vc4_resource.h |   14 
 src/gallium/drivers/vc4/vc4_state.c|   26 +-
 5 files changed, 83 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index cc57486..87f0251 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -253,6 +253,12 @@ vc4_setup_rcl(struct vc4_context *vc4)
 assert(!coords_emitted);
 }
 }
+
+if (vc4-resolve  PIPE_CLEAR_COLOR0)
+ctex-writes++;
+
+if (vc4-resolve  (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
+ztex-writes++;
 }
 
 void
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 66dff97..7a2a975 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2265,7 +2265,8 @@ write_texture_p0(struct vc4_context *vc4,
 
 cl_reloc(vc4, vc4-uniforms, rsc-bo,
  VC4_SET_FIELD(rsc-slices[0].offset  12, VC4_TEX_P0_OFFSET) 
|
- VC4_SET_FIELD(texture-u.tex.last_level, VC4_TEX_P0_MIPLVLS) |
+ VC4_SET_FIELD(texture-u.tex.last_level -
+   texture-u.tex.first_level, VC4_TEX_P0_MIPLVLS) 
|
  VC4_SET_FIELD(texture-target == PIPE_TEXTURE_CUBE,
VC4_TEX_P0_CMMODE) |
  VC4_SET_FIELD(rsc-vc4_format  7, VC4_TEX_P0_TYPE));
diff --git a/src/gallium/drivers/vc4/vc4_resource.c 
b/src/gallium/drivers/vc4/vc4_resource.c
index 7006af3..803d357 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -83,6 +83,9 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
 if (!(usage  PIPE_TRANSFER_UNSYNCHRONIZED))
 vc4_flush_for_bo(pctx, rsc-bo);
 
+if (usage  PIPE_TRANSFER_WRITE)
+rsc-writes++;
+
 trans = util_slab_alloc(vc4-transfer_pool);
 if (!trans)
 return NULL;
@@ -168,6 +171,7 @@ vc4_resource_destroy(struct pipe_screen *pscreen,
  struct pipe_resource *prsc)
 {
 struct vc4_resource *rsc = vc4_resource(prsc);
+pipe_resource_reference(rsc-shadow_parent, NULL);
 vc4_bo_unreference(rsc-bo);
 free(rsc);
 }
@@ -297,7 +301,7 @@ get_resource_texture_format(struct pipe_resource *prsc)
 return format;
 }
 
-static struct pipe_resource *
+struct pipe_resource *
 vc4_resource_create(struct pipe_screen *pscreen,
 const struct pipe_resource *tmpl)
 {
@@ -478,6 +482,37 @@ vc4_blit(struct pipe_context *pctx, const struct 
pipe_blit_info *blit_info)
 }
 
 void
+vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
+struct pipe_sampler_view *view)
+{
+struct vc4_resource *shadow = vc4_resource(view-texture);
+struct vc4_resource *orig = vc4_resource(shadow-shadow_parent);
+assert(orig);
+
+if (shadow-writes == orig-writes)
+return;
+
+for (int i = 0; i = shadow-base.b.last_level; i++) {
+struct pipe_box box = {
+.x = 0,
+.y = 0,
+.z = 0,
+.width = u_minify(shadow-base.b.width0, i),
+.height = u_minify(shadow-base.b.height0, i),
+.depth = 1,
+};
+
+util_resource_copy_region(pctx,
+  shadow-base.b, i, 0, 0, 0,
+  orig-base.b,
+  view-u.tex.first_level + i,
+  box);
+}
+
+shadow-writes = orig-writes;
+}
+
+void
 vc4_resource_screen_init(struct pipe_screen *pscreen)
 {
 pscreen-resource_create = vc4_resource_create;
diff --git a/src/gallium/drivers/vc4/vc4_resource.h 
b/src/gallium/drivers/vc4/vc4_resource.h
index 01f481d..7cc16a4 100644
--- a/src/gallium/drivers/vc4/vc4_resource.h
+++ b/src/gallium/drivers/vc4/vc4_resource.h
@@ -61,6 +61,16 @@ struct vc4_resource {
 bool tiled;
 /** One

Mesa (master): vc4: Apply a Newton-Raphson step to improve RCP.

2014-10-18 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 1fc124b80f228319ded06f80a51681c75dc0a4f3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1fc124b80f228319ded06f80a51681c75dc0a4f3

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 14:01:15 2014 +0100

vc4: Apply a Newton-Raphson step to improve RCP.

Fixes all the piglit floating-point *-op-div tests, among others.

---

 src/gallium/drivers/vc4/vc4_program.c |   18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 70a2b86..0046b22 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -297,6 +297,22 @@ tgsi_to_qir_scalar(struct vc4_compile *c,
 }
 
 static struct qreg
+tgsi_to_qir_rcp(struct vc4_compile *c,
+struct tgsi_full_instruction *tgsi_inst,
+enum qop op, struct qreg *src, int i)
+{
+struct qreg x = src[0 * 4 + 0];
+struct qreg r = qir_RCP(c, x);
+
+/* Apply a Newton-Raphson step to improve the accuracy. */
+r = qir_FMUL(c, r, qir_FSUB(c,
+qir_uniform_f(c, 2.0),
+qir_FMUL(c, x, r)));
+
+return r;
+}
+
+static struct qreg
 qir_srgb_decode(struct vc4_compile *c, struct qreg srgb)
 {
 struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92));
@@ -1165,7 +1181,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
 
 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
-[TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_scalar },
+[TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp },
 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar },
 [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar },
 [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar },

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add a little bit more packet parsing to make dump reading easier.

2014-10-18 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 0fdc5111b4e659de8258ae8f3eb8e33ef466beb3
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fdc5111b4e659de8258ae8f3eb8e33ef466beb3

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 15:04:27 2014 +0100

vc4: Add a little bit more packet parsing to make dump reading easier.

Probably should have done this *before* staring at all those render lists
today.

---

 src/gallium/drivers/vc4/vc4_cl_dump.c |  133 -
 1 file changed, 114 insertions(+), 19 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c 
b/src/gallium/drivers/vc4/vc4_cl_dump.c
index a55c04f..6c38021 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -25,11 +25,101 @@
 #include util/macros.h
 #include vc4_context.h
 
-#define PACKET(name, size) [name] = { #name, size }
+#define dump_VC4_PACKET_LINE_WIDTH dump_float
+#define dump_VC4_PACKET_POINT_SIZE dump_float
+
+static void
+dump_float(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+void *f = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  %f (0x%08x)\n,
+offset, hw_offset, *(float *)f, *(uint32_t *)f);
+}
+
+static void
+dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t 
hw_offset)
+{
+uint32_t *addr = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  addr 0x%08x\n,
+offset, hw_offset, *addr);
+}
+
+static void
+dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+uint32_t *bits = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  bits 0x%08x\n,
+offset, hw_offset, *bits);
+}
+
+static void
+dump_VC4_PACKET_VIEWPORT_OFFSET(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+uint16_t *o = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  %f, %f (0x%04x, 0x%04x)\n,
+offset, hw_offset,
+o[0] / 16.0, o[1] / 16.0,
+o[0], o[1]);
+}
+
+static void
+dump_VC4_PACKET_CLIPPER_XY_SCALING(void *cl, uint32_t offset, uint32_t 
hw_offset)
+{
+uint32_t *scale = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  %f, %f (%f, %f, 0x%08x, 
0x%08x)\n,
+offset, hw_offset,
+uif(scale[0]) / 16.0, uif(scale[1]) / 16.0,
+uif(scale[0]), uif(scale[1]),
+scale[0], scale[1]);
+}
+
+static void
+dump_VC4_PACKET_CLIPPER_Z_SCALING(void *cl, uint32_t offset, uint32_t 
hw_offset)
+{
+uint32_t *translate = cl + offset;
+uint32_t *scale = cl + offset + 8;
+
+fprintf(stderr, 0x%08x 0x%08x:  %f, %f (0x%08x, 0x%08x)\n,
+offset, hw_offset,
+uif(translate[0]), uif(translate[1]),
+translate[0], translate[1]);
+
+fprintf(stderr, 0x%08x 0x%08x:  %f, %f (0x%08x, 0x%08x)\n,
+offset + 8, hw_offset + 8,
+uif(scale[0]), uif(scale[1]),
+scale[0], scale[1]);
+}
+
+static void
+dump_VC4_PACKET_TILE_COORDINATES(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+uint8_t *tilecoords = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  %d, %d\n,
+offset, hw_offset, tilecoords[0], tilecoords[1]);
+}
+
+static void
+dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset)
+{
+uint32_t *handles = cl + offset;
+
+fprintf(stderr, 0x%08x 0x%08x:  handle 0: %d, handle 1: %d\n,
+offset, hw_offset, handles[0], handles[1]);
+}
+
+#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name }
+#define PACKET(name, size) [name] = { #name, size, NULL }
 
 static const struct packet_info {
 const char *name;
 uint8_t size;
+void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset);
 } packet_info[] = {
 PACKET(VC4_PACKET_HALT, 1),
 PACKET(VC4_PACKET_NOP, 1),
@@ -41,7 +131,7 @@ static const struct packet_info {
 PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1),
 
 PACKET(VC4_PACKET_BRANCH, 5),
-PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
+PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5),
 
 PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1),
 PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1),
@@ -63,23 +153,23 @@ static const struct packet_info {
 PACKET(VC4_PACKET_VG_SHADER_STATE, 5),
 
 PACKET(VC4_PACKET_CONFIGURATION_BITS, 4),
-PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
-PACKET(VC4_PACKET_POINT_SIZE, 5),
-PACKET(VC4_PACKET_LINE_WIDTH, 5),
+PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5),
+PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5),
+PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5),
 PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3),
 PACKET(VC4_PACKET_DEPTH_OFFSET, 5),
 PACKET(VC4_PACKET_CLIP_WINDOW, 9),
-PACKET(VC4_PACKET_VIEWPORT_OFFSET, 5),
+PACKET_DUMP

Mesa (master): vc4: Add some comments about state management.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 000976ed9926266b52827108c666d4c05e4f4a35
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=000976ed9926266b52827108c666d4c05e4f4a35

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 09:40:12 2014 +0100

vc4: Add some comments about state management.

---

 src/gallium/drivers/vc4/vc4_context.c |6 ++
 src/gallium/drivers/vc4/vc4_draw.c|5 +
 2 files changed, 11 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index bcbb004..205f81d 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -319,6 +319,12 @@ vc4_flush(struct pipe_context *pctx)
 
 vc4-needs_flush = false;
 vc4-draw_call_queued = false;
+
+/* We have no hardware context saved between our draw calls, so we
+ * need to flag the next draw as needing all state emitted.  Emitting
+ * all state at the start of our draws is also what ensures that we
+ * return to the state we need after a previous tile has finished.
+ */
 vc4-dirty = ~0;
 vc4-resolve = 0;
 vc4-cleared = 0;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index 0938a76..f3c225d 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -78,6 +78,11 @@ vc4_start_draw(struct vc4_context *vc4)
   VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 |
   VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32);
 
+/* START_TILE_BINNING resets the statechange counters in the hardware,
+ * which are what is used when a primitive is binned to a tile to
+ * figure out what new state packets need to be written to that tile's
+ * command list.
+ */
 cl_u8(vc4-bcl, VC4_PACKET_START_TILE_BINNING);
 
 vc4-needs_flush = true;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Set the primitive list format at the start of rendering.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: afc3aa373d45775d08babffa49b566f952689efc
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=afc3aa373d45775d08babffa49b566f952689efc

Author: Eric Anholt e...@anholt.net
Date:   Thu Oct 16 10:17:57 2014 +0100

vc4: Set the primitive list format at the start of rendering.

The other driver does this manually before calling into each tile, but we
can just let it get binned into the tiles (saving repeated kernel
validation on the packet).

Fixes simulator assertion failures on polygon-mode and non-auto texwrap.

---

 src/gallium/drivers/vc4/vc4_draw.c   |9 +
 src/gallium/drivers/vc4/vc4_packet.h |6 ++
 2 files changed, 15 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index f3c225d..f53caf7 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -85,6 +85,15 @@ vc4_start_draw(struct vc4_context *vc4)
  */
 cl_u8(vc4-bcl, VC4_PACKET_START_TILE_BINNING);
 
+/* Reset the current compressed primitives format.  This gets modified
+ * by VC4_PACKET_GL_INDEXED_PRIMITIVE and
+ * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start
+ * of every tile.
+ */
+cl_u8(vc4-bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT);
+cl_u8(vc4-bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX |
+  VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES));
+
 vc4-needs_flush = true;
 vc4-draw_call_queued = true;
 }
diff --git a/src/gallium/drivers/vc4/vc4_packet.h 
b/src/gallium/drivers/vc4/vc4_packet.h
index e9abfd1..181f2e0 100644
--- a/src/gallium/drivers/vc4/vc4_packet.h
+++ b/src/gallium/drivers/vc4/vc4_packet.h
@@ -234,6 +234,12 @@ enum vc4_packet {
 #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT(1  1)
 #define VC4_RENDER_CONFIG_MS_MODE_4X   (1  0)
 
+#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1  4)
+#define VC4_PRIMITIVE_LIST_FORMAT_32_XY(3  4)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS  (0  0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES   (1  0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES   (2  0)
+#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3  0)
 
 enum vc4_texture_data_type {
 VC4_TEXTURE_TYPE_RGBA = 0,

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Make sure there' s exactly 1 tile store per tile coords packet.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 135287db175de9496b76f8edce04871ca6444d72
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=135287db175de9496b76f8edce04871ca6444d72

Author: Eric Anholt e...@anholt.net
Date:   Thu Oct 16 10:42:04 2014 +0100

vc4: Make sure there's exactly 1 tile store per tile coords packet.

It's not documented that I can see, but the other driver does it (check
vg_hw_4.c), and one of the HW guys confirmed that you really do need to do
it.

---

 src/gallium/drivers/vc4/vc4_context.c |   79 ++---
 1 file changed, 64 insertions(+), 15 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index 72ac344..bcbb004 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -36,6 +36,49 @@
 #include vc4_context.h
 #include vc4_resource.h
 
+/**
+ * Emits a no-op STORE_TILE_BUFFER_GENERAL.
+ *
+ * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of
+ * some sort before another load is triggered.
+ */
+static void
+vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted)
+{
+if (!*coords_emitted)
+return;
+
+cl_u8(vc4-rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+cl_u8(vc4-rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
+cl_u8(vc4-rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR |
+  VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR |
+  VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR));
+cl_u32(vc4-rcl, 0); /* no address, since we're in None mode */
+
+*coords_emitted = false;
+}
+
+/**
+ * Emits a PACKET_TILE_COORDINATES if one isn't already pending.
+ *
+ * The tile coordinates packet triggers a pending load if there is one, are
+ * used for clipping during rendering, and determine where loads/stores happen
+ * relative to their base address.
+ */
+static void
+vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y,
+   bool *coords_emitted)
+{
+if (*coords_emitted)
+return;
+
+cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES);
+cl_u8(vc4-rcl, x);
+cl_u8(vc4-rcl, y);
+
+*coords_emitted = true;
+}
+
 static void
 vc4_setup_rcl(struct vc4_context *vc4)
 {
@@ -116,9 +159,12 @@ vc4_setup_rcl(struct vc4_context *vc4)
 bool coords_emitted = false;
 
 /* Note that the load doesn't actually occur until the
- * tile coords packet is processed.
+ * tile coords packet is processed, and only one load
+ * may be outstanding at a time.
  */
 if (resolve_uncleared  PIPE_CLEAR_COLOR) {
+vc4_store_before_load(vc4, coords_emitted);
+
 cl_start_reloc(vc4-rcl, 1);
 cl_u8(vc4-rcl, 
VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
 cl_u8(vc4-rcl,
@@ -132,13 +178,12 @@ vc4_setup_rcl(struct vc4_context *vc4)
 cl_reloc(vc4, vc4-rcl, ctex-bo,
  csurf-offset);
 
-cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES);
-cl_u8(vc4-rcl, x);
-cl_u8(vc4-rcl, y);
-coords_emitted = true;
+vc4_tile_coordinates(vc4, x, y, 
coords_emitted);
 }
 
 if (resolve_uncleared  (PIPE_CLEAR_DEPTH | 
PIPE_CLEAR_STENCIL)) {
+vc4_store_before_load(vc4, coords_emitted);
+
 cl_start_reloc(vc4-rcl, 1);
 cl_u8(vc4-rcl, 
VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
 cl_u8(vc4-rcl,
@@ -149,21 +194,14 @@ vc4_setup_rcl(struct vc4_context *vc4)
 cl_reloc(vc4, vc4-rcl, ztex-bo,
  zsurf-offset);
 
-cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES);
-cl_u8(vc4-rcl, x);
-cl_u8(vc4-rcl, y);
-coords_emitted = true;
+vc4_tile_coordinates(vc4, x, y, 
coords_emitted);
 }
 
 /* Clipping depends on tile coordinates having been
  * emitted, so make sure it's happened even if
  * everything was cleared to start.
  */
-if (!coords_emitted) {
-cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES);
-cl_u8(vc4-rcl, x);
-cl_u8(vc4-rcl

Mesa (master): vc4: Replace the FLUSH_ALL with FLUSH.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 895c90410314103814ca4d2684f94463bd8f243f
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=895c90410314103814ca4d2684f94463bd8f243f

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 09:42:35 2014 +0100

vc4: Replace the FLUSH_ALL with FLUSH.

We don't need to emit all of our current state at the end of each bin
list.  We're going to be smashing it all at the start of the next tile's
bin list, anyway.

---

 src/gallium/drivers/vc4/vc4_context.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index 205f81d..7779c46 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -263,7 +263,9 @@ vc4_flush(struct pipe_context *pctx)
 if (!vc4-needs_flush)
 return;
 
-cl_u8(vc4-bcl, VC4_PACKET_FLUSH_ALL);
+/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */
+cl_u8(vc4-bcl, VC4_PACKET_FLUSH);
+
 cl_u8(vc4-bcl, VC4_PACKET_NOP);
 cl_u8(vc4-bcl, VC4_PACKET_HALT);
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Make some assertions about how many flushes/ EOFs the simulator sees.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 9ebfb3014ea6d2bde9d4322f2537f21be97b280b
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ebfb3014ea6d2bde9d4322f2537f21be97b280b

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 09:43:54 2014 +0100

vc4: Make some assertions about how many flushes/EOFs the simulator sees.

This caught the previous commit's bug in the kernel validator.

---

 src/gallium/drivers/vc4/vc4_cl.h|2 +-
 src/gallium/drivers/vc4/vc4_cl_dump.c   |8 
 src/gallium/drivers/vc4/vc4_context.c   |4 ++--
 src/gallium/drivers/vc4/vc4_simulator.c |   21 +++--
 4 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h
index 2cdd77d..634a4b0 100644
--- a/src/gallium/drivers/vc4/vc4_cl.h
+++ b/src/gallium/drivers/vc4/vc4_cl.h
@@ -43,7 +43,7 @@ struct vc4_cl {
 void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl);
 void vc4_grow_cl(struct vc4_cl *cl);
 void vc4_reset_cl(struct vc4_cl *cl);
-void vc4_dump_cl(struct vc4_cl *cl, bool is_render);
+void vc4_dump_cl(void *cl, uint32_t size, bool is_render);
 uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo);
 
 static inline void
diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c 
b/src/gallium/drivers/vc4/vc4_cl_dump.c
index 40bcf01..a55c04f 100644
--- a/src/gallium/drivers/vc4/vc4_cl_dump.c
+++ b/src/gallium/drivers/vc4/vc4_cl_dump.c
@@ -83,12 +83,12 @@ static const struct packet_info {
 };
 
 void
-vc4_dump_cl(struct vc4_cl *cl, bool is_render)
+vc4_dump_cl(void *cl, uint32_t size, bool is_render)
 {
 uint32_t offset = 0, hw_offset = 0;
-uint8_t *cmds = cl-base;
+uint8_t *cmds = cl;
 
-while (offset  cl-end - cl-base) {
+while (offset  size) {
 uint8_t header = cmds[offset];
 
 if (header  ARRAY_SIZE(packet_info) ||
@@ -105,7 +105,7 @@ vc4_dump_cl(struct vc4_cl *cl, bool is_render)
 header, p-name);
 
 for (uint32_t i = 1; i  p-size; i++) {
-if (offset + i = cl-end - cl-base) {
+if (offset + i = size) {
 fprintf(stderr, 0x%08x 0x%08x: CL 
overflow!\n,
 offset + i, hw_offset + i);
 return;
diff --git a/src/gallium/drivers/vc4/vc4_context.c 
b/src/gallium/drivers/vc4/vc4_context.c
index 7779c46..cc57486 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -273,9 +273,9 @@ vc4_flush(struct pipe_context *pctx)
 
 if (vc4_debug  VC4_DEBUG_CL) {
 fprintf(stderr, BCL:\n);
-vc4_dump_cl(vc4-bcl, false);
+vc4_dump_cl(vc4-bcl.base, vc4-bcl.end - vc4-bcl.base, 
false);
 fprintf(stderr, RCL:\n);
-vc4_dump_cl(vc4-rcl, true);
+vc4_dump_cl(vc4-rcl.base, vc4-rcl.end - vc4-rcl.base, true);
 }
 
 struct drm_vc4_submit_cl submit;
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c 
b/src/gallium/drivers/vc4/vc4_simulator.c
index 34262f5..1040ae8 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -108,6 +108,7 @@ vc4_simulator_unpin_bos(struct exec_info *exec)
 int
 vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
 {
+struct vc4_screen *screen = vc4-screen;
 struct vc4_surface *csurf = vc4_surface(vc4-framebuffer.cbufs[0]);
 struct vc4_resource *ctex = csurf ? vc4_resource(csurf-base.texture) 
: NULL;
 uint32_t winsys_stride = ctex ? ctex-bo-simulator_winsys_stride : 0;
@@ -149,8 +150,24 @@ vc4_simulator_flush(struct vc4_context *vc4, struct 
drm_vc4_submit_cl *args)
 if (ret)
 return ret;
 
-simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
-simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
+int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
+if (bfc != 1) {
+fprintf(stderr, Binning returned %d flushes, should be 1.\n,
+bfc);
+fprintf(stderr, Relocated binning command list:\n);
+vc4_dump_cl(screen-simulator_mem_base + exec.ct0ca,
+exec.ct0ea - exec.ct0ca, false);
+abort();
+}
+int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea);
+if (rfc != 1) {
+fprintf(stderr, Rendering returned %d frames, should be 1.\n,
+rfc);
+fprintf(stderr, Relocated render command list:\n);
+vc4_dump_cl(screen-simulator_mem_base + exec.ct1ca,
+exec.ct1ea - exec.ct1ca, true);
+abort();
+}
 
 ret = vc4_simulator_unpin_bos(exec);
 if (ret

Mesa (master): vc4: Fix accidental dropping of the low bits of the store tilebuffer packet.

2014-10-17 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 1f7048419ed6ad4d25e89efa885fdc58d36c4213
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f7048419ed6ad4d25e89efa885fdc58d36c4213

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 17 12:14:11 2014 +0100

vc4: Fix accidental dropping of the low bits of the store tilebuffer packet.

Notably this included the EOF flag (the other bits are the full buffer
dump selection, but we don't do full dumps), which caused the kernel
checking for frame completion to trigger.

---

 src/gallium/drivers/vc4/kernel/vc4_validate.c |8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c 
b/src/gallium/drivers/vc4/kernel/vc4_validate.c
index fe2cd44..86b8fa5 100644
--- a/src/gallium/drivers/vc4/kernel/vc4_validate.c
+++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c
@@ -268,7 +268,7 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
uint32_t packet_b1 = *(uint8_t *)(untrusted + 1);
struct drm_gem_cma_object *fbo;
uint32_t buffer_type = packet_b0  0xf;
-   uint32_t offset, cpp;
+   uint32_t untrusted_address, offset, cpp;
 
switch (buffer_type) {
case VC4_LOADSTORE_TILE_BUFFER_NONE:
@@ -295,7 +295,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, fbo))
return -EINVAL;
 
-   offset = *(uint32_t *)(untrusted + 2)  ~0xf;
+   untrusted_address = *(uint32_t *)(untrusted + 2);
+   offset = untrusted_address  ~0xf;
 
if (!check_tex_size(exec, fbo, offset,
((packet_b0 
@@ -305,7 +306,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS)
return -EINVAL;
}
 
-   *(uint32_t *)(validated + 2) = offset + fbo-paddr;
+   *(uint32_t *)(validated + 2) = (offset + fbo-paddr +
+   (untrusted_address  0xf));
 
return 0;
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Fix the uniform debug output.

2014-10-15 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 57de9bbb632ed7639d07d37965dcee5a1fe3bf30
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=57de9bbb632ed7639d07d37965dcee5a1fe3bf30

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct 15 16:16:09 2014 +0100

vc4: Fix the uniform debug output.

I dropped the shader index when moving to the compiled shader struct, but
didn't update the format string here.

---

 src/gallium/drivers/vc4/vc4_program.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index f4b723a..70a2b86 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2466,7 +2466,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct 
vc4_compiled_shader *shader,
 }
 #if 0
 uint32_t written_val = *(uint32_t *)(vc4-uniforms.next - 4);
-fprintf(stderr, %p/%d: %d: 0x%08x (%f)\n,
+fprintf(stderr, %p: %d / 0x%08x (%f)\n,
 shader, i, written_val, uif(written_val));
 #endif
 }

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add support for user clip plane and gl_ClipVertex.

2014-10-15 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 201d4c0b2a6f7f0c1d59c4fd5cce4916fc48a2d2
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=201d4c0b2a6f7f0c1d59c4fd5cce4916fc48a2d2

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct 15 15:25:57 2014 +0100

vc4: Add support for user clip plane and gl_ClipVertex.

Fixes about 15 piglit tests about interpolation and clipping.

---

 src/gallium/drivers/vc4/vc4_context.h |2 +
 src/gallium/drivers/vc4/vc4_program.c |   84 +++--
 src/gallium/drivers/vc4/vc4_qir.c |2 +
 src/gallium/drivers/vc4/vc4_qir.h |3 ++
 src/gallium/drivers/vc4/vc4_state.c   |4 +-
 5 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index 56cfc7b..45dfa02 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -58,6 +58,7 @@
 #define VC4_DIRTY_SCISSOR   (1  17)
 #define VC4_DIRTY_FLAT_SHADE_FLAGS (1  18)
 #define VC4_DIRTY_PRIM_MODE (1  19)
+#define VC4_DIRTY_CLIP  (1  20)
 
 #define VC4_SHADER_DIRTY_VP (1  0)
 #define VC4_SHADER_DIRTY_FP (1  1)
@@ -207,6 +208,7 @@ struct vc4_context {
 unsigned sample_mask;
 struct pipe_framebuffer_state framebuffer;
 struct pipe_poly_stipple stipple;
+struct pipe_clip_state clip;
 struct pipe_viewport_state viewport;
 struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
 struct vc4_vertexbuf_stateobj vertexbuf;
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index a79e354..f4b723a 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -53,6 +53,7 @@ struct vc4_key {
 unsigned wrap_t:3;
 uint8_t swizzle[4];
 } tex[VC4_MAX_TEXTURE_SAMPLERS];
+uint8_t ucp_enables;
 };
 
 struct vc4_fs_key {
@@ -1097,6 +1098,9 @@ emit_tgsi_declaration(struct vc4_compile *c,
 case TGSI_SEMANTIC_POSITION:
 c-output_position_index = decl-Range.First * 4;
 break;
+case TGSI_SEMANTIC_CLIPVERTEX:
+c-output_clipvertex_index = decl-Range.First * 4;
+break;
 case TGSI_SEMANTIC_COLOR:
 c-output_color_index = decl-Range.First * 4;
 break;
@@ -1398,6 +1402,28 @@ vc4_blend(struct vc4_compile *c, struct qreg *result,
 }
 
 static void
+clip_distance_discard(struct vc4_compile *c)
+{
+for (int i = 0; i  PIPE_MAX_CLIP_PLANES; i++) {
+if (!(c-key-ucp_enables  (1  i)))
+continue;
+
+struct qreg dist = emit_fragment_varying(c,
+ 
TGSI_SEMANTIC_CLIPDIST,
+ i,
+ TGSI_SWIZZLE_X);
+
+qir_SF(c, dist);
+
+if (c-discard.file == QFILE_NULL)
+c-discard = qir_uniform_f(c, 0.0);
+
+c-discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0),
+c-discard);
+}
+}
+
+static void
 alpha_test_discard(struct vc4_compile *c)
 {
 struct qreg src_alpha;
@@ -1456,6 +1482,7 @@ alpha_test_discard(struct vc4_compile *c)
 static void
 emit_frag_end(struct vc4_compile *c)
 {
+clip_distance_discard(c);
 alpha_test_discard(c);
 
 enum pipe_format color_format = c-fs_key-color_format;
@@ -1655,6 +1682,45 @@ emit_stub_vpm_read(struct vc4_compile *c)
 }
 
 static void
+emit_ucp_clipdistance(struct vc4_compile *c)
+{
+struct qreg *clipvertex;
+
+if (c-output_clipvertex_index != -1)
+clipvertex = c-outputs[c-output_clipvertex_index];
+else if (c-output_position_index != -1)
+clipvertex = c-outputs[c-output_position_index];
+else
+return;
+
+for (int plane = 0; plane  PIPE_MAX_CLIP_PLANES; plane++) {
+if (!(c-key-ucp_enables  (1  plane)))
+continue;
+
+/* Pick the next outputs[] that hasn't been written to, since
+ * there are no other program writes left to be processed at
+ * this point.  If something had been declared but not written
+ * (like a w component), we'll just smash over the top of it.
+ */
+uint32_t output_index = c-num_outputs++;
+add_output(c, output_index,
+   TGSI_SEMANTIC_CLIPDIST,
+   plane,
+   TGSI_SWIZZLE_X);
+
+struct qreg dist = qir_uniform_f(c, 0.0);
+for (int i = 0; i  4; i++) {
+struct

Mesa (master): vc4: Move the output semantics setup to a helper.

2014-10-15 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 6a0bf67048d508f907db6bb05e5e367308c21511
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a0bf67048d508f907db6bb05e5e367308c21511

Author: Eric Anholt e...@anholt.net
Date:   Wed Oct 15 16:39:54 2014 +0100

vc4: Move the output semantics setup to a helper.

I want to reuse it elsewhere to set up outputs that aren't in the TGSI.

---

 src/gallium/drivers/vc4/vc4_program.c |   44 +
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index c603425..a79e354 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1021,6 +1021,29 @@ emit_face_input(struct vc4_compile *c, int attr)
 }
 
 static void
+add_output(struct vc4_compile *c,
+   uint32_t decl_offset,
+   uint8_t semantic_name,
+   uint8_t semantic_index,
+   uint8_t semantic_swizzle)
+{
+uint32_t old_array_size = c-outputs_array_size;
+resize_qreg_array(c, c-outputs, c-outputs_array_size,
+  decl_offset + 1);
+
+if (old_array_size != c-outputs_array_size) {
+c-output_semantics = reralloc(c,
+   c-output_semantics,
+   struct vc4_varying_semantic,
+   c-outputs_array_size);
+}
+
+c-output_semantics[decl_offset].semantic = semantic_name;
+c-output_semantics[decl_offset].index = semantic_index;
+c-output_semantics[decl_offset].swizzle = semantic_swizzle;
+}
+
+static void
 emit_tgsi_declaration(struct vc4_compile *c,
   struct tgsi_full_declaration *decl)
 {
@@ -1062,23 +1085,12 @@ emit_tgsi_declaration(struct vc4_compile *c,
 break;
 
 case TGSI_FILE_OUTPUT: {
-uint32_t old_array_size = c-outputs_array_size;
-resize_qreg_array(c, c-outputs, c-outputs_array_size,
-  (decl-Range.Last + 1) * 4);
-
-if (old_array_size != c-outputs_array_size) {
-c-output_semantics = reralloc(c,
-   c-output_semantics,
-   struct 
vc4_varying_semantic,
-   c-outputs_array_size);
-}
-
-struct vc4_varying_semantic *sem =
-c-output_semantics[decl-Range.First * 4];
 for (int i = 0; i  4; i++) {
-sem[i].semantic = decl-Semantic.Name;
-sem[i].index = decl-Semantic.Index;
-sem[i].swizzle = i;
+add_output(c,
+   decl-Range.First * 4 + i,
+   decl-Semantic.Name,
+   decl-Semantic.Index,
+   i);
 }
 
 switch (decl-Semantic.Name) {

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Add support for having 0 vertex elements used.

2014-10-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: b5fc9d5664d08d2e47ae89bf580e43732346a694
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5fc9d5664d08d2e47ae89bf580e43732346a694

Author: Eric Anholt e...@anholt.net
Date:   Thu Sep 25 14:57:01 2014 -0700

vc4: Add support for having 0 vertex elements used.

You have to load at least 1, according to the simulator.  Fixes 4 piglit
tests and even more ES2 conformance tests.

---

 src/gallium/drivers/vc4/vc4_draw.c|   27 +--
 src/gallium/drivers/vc4/vc4_program.c |   26 ++
 2 files changed, 47 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index 1a0c0dc..0938a76 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -120,8 +120,12 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
vc4-constbuf[PIPE_SHADER_VERTEX],
vc4-verttex);
 
+/* The simulator throws a fit if VS or CS don't read an attribute, so
+ * we emit a dummy read.
+ */
+uint32_t num_elements_emit = MAX2(vtx-num_elements, 1);
 /* Emit the shader record. */
-cl_start_shader_reloc(vc4-shader_rec, 3 + vtx-num_elements);
+cl_start_shader_reloc(vc4-shader_rec, 3 + num_elements_emit);
 cl_u16(vc4-shader_rec,
VC4_SHADER_FLAG_ENABLE_CLIPPING |
((info-mode == PIPE_PRIM_POINTS 
@@ -133,14 +137,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
 cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */
 
 cl_u16(vc4-shader_rec, 0); /* vs num uniforms */
-cl_u8(vc4-shader_rec, (1  vtx-num_elements) - 1); /* vs attribute 
array bitfield */
-cl_u8(vc4-shader_rec, 16 * vtx-num_elements); /* vs total attribute 
size */
+cl_u8(vc4-shader_rec, (1  num_elements_emit) - 1); /* vs attribute 
array bitfield */
+cl_u8(vc4-shader_rec, 16 * num_elements_emit); /* vs total attribute 
size */
 cl_reloc(vc4, vc4-shader_rec, vc4-prog.vs-bo, 0);
 cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */
 
 cl_u16(vc4-shader_rec, 0); /* cs num uniforms */
-cl_u8(vc4-shader_rec, (1  vtx-num_elements) - 1); /* cs attribute 
array bitfield */
-cl_u8(vc4-shader_rec, 16 * vtx-num_elements); /* cs total attribute 
size */
+cl_u8(vc4-shader_rec, (1  num_elements_emit) - 1); /* cs attribute 
array bitfield */
+cl_u8(vc4-shader_rec, 16 * num_elements_emit); /* cs total attribute 
size */
 cl_reloc(vc4, vc4-shader_rec, vc4-prog.cs-bo, 0);
 cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */
 
@@ -167,13 +171,24 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
 }
 }
 
+if (vtx-num_elements == 0) {
+assert(num_elements_emit == 1);
+struct vc4_bo *bo = vc4_bo_alloc(vc4-screen, 4096, scratch 
VBO);
+cl_reloc(vc4, vc4-shader_rec, bo, 0);
+cl_u8(vc4-shader_rec, 16 - 1); /* element size */
+cl_u8(vc4-shader_rec, 0); /* stride */
+cl_u8(vc4-shader_rec, 0); /* VS VPM offset */
+cl_u8(vc4-shader_rec, 0); /* CS VPM offset */
+vc4_bo_unreference(bo);
+}
+
 /* the actual draw call. */
 cl_u8(vc4-bcl, VC4_PACKET_GL_SHADER_STATE);
 assert(vtx-num_elements = 8);
 /* Note that number of attributes == 0 in the packet means 8
  * attributes.  This field also contains the offset into shader_rec.
  */
-cl_u32(vc4-bcl, vtx-num_elements  0x7);
+cl_u32(vc4-bcl, num_elements_emit  0x7);
 
 /* Note that the primitive type fields match with OpenGL/gallium
  * definitions, up to but not including QUADS.
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 3056c67..c603425 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1619,6 +1619,29 @@ emit_point_size_write(struct vc4_compile *c)
 qir_VPM_WRITE(c, point_size);
 }
 
+/**
+ * Emits a VPM read of the stub vertex attribute set up by vc4_draw.c.
+ *
+ * The simulator insists that there be at least one vertex attribute, so
+ * vc4_draw.c will emit one if it wouldn't have otherwise.  The simulator also
+ * insists that all vertex attributes loaded get read by the VS/CS, so we have
+ * to consume it here.
+ */
+static void
+emit_stub_vpm_read(struct vc4_compile *c)
+{
+if (c-num_inputs)
+return;
+
+for (int i = 0; i  4; i++) {
+qir_emit(c, qir_inst(QOP_VPM_READ,
+ qir_get_temp(c),
+ c-undef,
+ c

Mesa (master): vc4: Fix render target NPOT alignment at small miplevels.

2014-10-14 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: a2d8b6dbd5359e5dc930e22ac21a92bf20587401
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2d8b6dbd5359e5dc930e22ac21a92bf20587401

Author: Eric Anholt e...@anholt.net
Date:   Tue Oct 14 14:28:14 2014 +0100

vc4: Fix render target NPOT alignment at small miplevels.

The texturing hardware takes the POT level 0 width/height and minifies
those.  This is different from what we were doing, for example, for
273-wide's level 5: POT(2735) == 8, while POT(273)5 == 16.

Fixes piglit-depthstencil-render-miplevels 273.

---

 src/gallium/drivers/vc4/vc4_state.c |   15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_state.c 
b/src/gallium/drivers/vc4/vc4_state.c
index 2a123eb..7ccffeb 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -400,9 +400,18 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
  * framebuffer.  Note that if the z/color buffers were mismatched
  * sizes, we wouldn't be able to do this.
  */
-if ((cso-cbufs[0]  cso-cbufs[0]-u.tex.level) ||
- (cso-zsbuf  cso-zsbuf-u.tex.level)) {
-cso-width = util_next_power_of_two(cso-width);
+if (cso-cbufs[0]  cso-cbufs[0]-u.tex.level) {
+struct vc4_resource *rsc =
+vc4_resource(cso-cbufs[0]-texture);
+cso-width =
+(rsc-slices[cso-cbufs[0]-u.tex.level].stride /
+ rsc-cpp);
+} else if (cso-zsbuf  cso-zsbuf-u.tex.level){
+struct vc4_resource *rsc =
+vc4_resource(cso-zsbuf-texture);
+cso-width =
+(rsc-slices[cso-zsbuf-u.tex.level].stride /
+ rsc-cpp);
 }
 
 vc4-dirty |= VC4_DIRTY_FRAMEBUFFER;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Match VS outputs to FS inputs.

2014-10-13 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5d72a1c95662109b1338605da83329dd25e00859
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d72a1c95662109b1338605da83329dd25e00859

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 13 08:24:57 2014 +0100

vc4: Match VS outputs to FS inputs.

If the VS doesn't output a value that the FS needs, we still need to read
the right contents for the remaining FS inputs, by emitting padding.  And
if the VS outputs something the FS doesn't need, we shouldn't put it in
the VPM at all (so the code producing it can get DCEed).

Fixes 77 piglit tests.

---

 src/gallium/drivers/vc4/vc4_context.h |   10 +++
 src/gallium/drivers/vc4/vc4_program.c |  113 -
 src/gallium/drivers/vc4/vc4_qir.h |   30 -
 3 files changed, 135 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index 31dec04..56cfc7b 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -83,6 +83,7 @@ struct vc4_uncompiled_shader {
 };
 
 struct vc4_compiled_shader {
+uint64_t program_id;
 struct vc4_bo *bo;
 
 struct vc4_shader_uniform_info uniforms;
@@ -91,6 +92,14 @@ struct vc4_compiled_shader {
 uint32_t color_inputs;
 
 uint8_t num_inputs;
+
+/**
+ * Array of the meanings of the VPM inputs this shader needs.
+ *
+ * It doesn't include those that aren't part of the VPM, like
+ * point/line coordinates.
+ */
+struct vc4_varying_semantic *input_semantics;
 };
 
 struct vc4_program_stateobj {
@@ -173,6 +182,7 @@ struct vc4_context {
 struct primconvert_context *primconvert;
 
 struct util_hash_table *fs_cache, *vs_cache;
+uint64_t next_compiled_program_id;
 
 struct ra_regs *regs;
 unsigned int reg_class_any;
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index b7c51a7..455b5d7 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -75,6 +75,13 @@ struct vc4_fs_key {
 
 struct vc4_vs_key {
 struct vc4_key base;
+
+/**
+ * This is a proxy for the array of FS input semantics, which is
+ * larger than we would want to put in the key.
+ */
+uint64_t compiled_fs_id;
+
 enum pipe_format attr_formats[8];
 bool is_coord;
 bool per_vertex_point_size;
@@ -948,13 +955,28 @@ emit_point_coord_input(struct vc4_compile *c, int attr)
 }
 
 static struct qreg
-emit_fragment_varying(struct vc4_compile *c, int index)
+emit_fragment_varying(struct vc4_compile *c, uint8_t semantic,
+  uint8_t index, uint8_t swizzle)
 {
+uint32_t i = c-num_input_semantics++;
 struct qreg vary = {
 QFILE_VARY,
-index
+i
 };
 
+if (c-num_input_semantics = c-input_semantics_array_size) {
+c-input_semantics_array_size =
+MAX2(4, c-input_semantics_array_size * 2);
+
+c-input_semantics = reralloc(c, c-input_semantics,
+  struct vc4_varying_semantic,
+  c-input_semantics_array_size);
+}
+
+c-input_semantics[i].semantic = semantic;
+c-input_semantics[i].index = index;
+c-input_semantics[i].swizzle = swizzle;
+
 return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c)));
 }
 
@@ -964,12 +986,11 @@ emit_fragment_input(struct vc4_compile *c, int attr,
 {
 for (int i = 0; i  4; i++) {
 c-inputs[attr * 4 + i] =
-emit_fragment_varying(c, attr * 4 + i);
+emit_fragment_varying(c,
+  decl-Semantic.Name,
+  decl-Semantic.Index,
+  i);
 c-num_inputs++;
-
-if (decl-Semantic.Name == TGSI_SEMANTIC_COLOR ||
-decl-Semantic.Name == TGSI_SEMANTIC_BCOLOR)
-c-color_inputs |= 1  i;
 }
 }
 
@@ -1027,10 +1048,26 @@ emit_tgsi_declaration(struct vc4_compile *c,
 }
 break;
 
-case TGSI_FILE_OUTPUT:
+case TGSI_FILE_OUTPUT: {
+uint32_t old_array_size = c-outputs_array_size;
 resize_qreg_array(c, c-outputs, c-outputs_array_size,
   (decl-Range.Last + 1) * 4);
 
+if (old_array_size != c-outputs_array_size) {
+c-output_semantics = reralloc(c,
+   c-output_semantics,
+   struct 
vc4_varying_semantic

Mesa (master): vc4: Add support for the TXL opcode.

2014-10-13 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: e1d1c396265ce3b363e99422b46275275723ee21
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1d1c396265ce3b363e99422b46275275723ee21

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 13 14:38:10 2014 +0100

vc4: Add support for the TXL opcode.

There's a bit at the bottom of cube map stride (which has some formatting
bugs in the docs) which flips the bias coordinate to being an absolute
LOD.

---

 src/gallium/drivers/vc4/vc4_program.c |   20 +++-
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 1bbdba5..3056c67 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -528,6 +528,7 @@ tgsi_to_qir_tex(struct vc4_compile *c,
 struct qreg t = src[0 * 4 + 1];
 struct qreg r = src[0 * 4 + 2];
 uint32_t unit = tgsi_inst-Src[1].Register.Index;
+bool is_txl = tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXL;
 
 struct qreg proj = c-undef;
 if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXP) {
@@ -561,6 +562,13 @@ tgsi_to_qir_tex(struct vc4_compile *c,
 }
 
 if (tgsi_inst-Texture.Texture == TGSI_TEXTURE_CUBE ||
+tgsi_inst-Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
+is_txl) {
+texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2,
+   unit | (is_txl  16));
+}
+
+if (tgsi_inst-Texture.Texture == TGSI_TEXTURE_CUBE ||
tgsi_inst-Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) {
 struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r);
 struct qreg rcp_ma = qir_RCP(c, ma);
@@ -568,8 +576,6 @@ tgsi_to_qir_tex(struct vc4_compile *c,
 t = qir_FMUL(c, t, rcp_ma);
 r = qir_FMUL(c, r, rcp_ma);
 
-texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, 
unit);
-
 qir_TEX_R(c, r, texture_u[next_texture_u++]);
 } else if (c-key-tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
c-key-tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
@@ -591,7 +597,8 @@ tgsi_to_qir_tex(struct vc4_compile *c,
 
 qir_TEX_T(c, t, texture_u[next_texture_u++]);
 
-if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXB)
+if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXB ||
+tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXL)
 qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]);
 
 qir_TEX_S(c, s, texture_u[next_texture_u++]);
@@ -1175,6 +1182,7 @@ emit_tgsi_instruction(struct vc4_compile *c,
 case TGSI_OPCODE_TEX:
 case TGSI_OPCODE_TXP:
 case TGSI_OPCODE_TXB:
+case TGSI_OPCODE_TXL:
 tgsi_to_qir_tex(c, tgsi_inst,
 op_trans[tgsi_op].op, src_regs);
 return;
@@ -2163,15 +2171,17 @@ write_texture_p1(struct vc4_context *vc4,
 static void
 write_texture_p2(struct vc4_context *vc4,
  struct vc4_texture_stateobj *texstate,
- uint32_t unit)
+ uint32_t data)
 {
+uint32_t unit = data  0x;
 struct pipe_sampler_view *texture = texstate-textures[unit];
 struct vc4_resource *rsc = vc4_resource(texture-texture);
 
 cl_u32(vc4-uniforms,
VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE,
  VC4_TEX_P2_PTYPE) |
-   VC4_SET_FIELD(rsc-cube_map_stride  12, VC4_TEX_P2_CMST));
+   VC4_SET_FIELD(rsc-cube_map_stride  12, VC4_TEX_P2_CMST) |
+   VC4_SET_FIELD((data  16)  1, VC4_TEX_P2_BSLOD));
 }
 
 

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Improve the accuracy of SIN and COS.

2014-10-13 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 5bc91b6e322354d0964c07375c9a3a28b0083a38
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=5bc91b6e322354d0964c07375c9a3a28b0083a38

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 13 14:11:28 2014 +0100

vc4: Improve the accuracy of SIN and COS.

This gets them to pass glsl-sin/cos.  There was an obvious problem that I
was using the FRC code on the scaled input value, which means that we had
a range in [0, 1], while our taylor is most accurate across [-0.5, 0.5].
We can just slide things over, but that means flipping the sign of the
coefficients.  After that, it was just a matter of stuffing more
coefficients in.

---

 src/gallium/drivers/vc4/vc4_program.c |   28 +---
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index 455b5d7..1bbdba5 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -766,10 +766,11 @@ tgsi_to_qir_sin(struct vc4_compile *c,
 enum qop op, struct qreg *src, int i)
 {
 float coeff[] = {
-2.0 * M_PI,
--pow(2.0 * M_PI, 3) / (3 * 2 * 1),
-pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
--pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
+-2.0 * M_PI,
+pow(2.0 * M_PI, 3) / (3 * 2 * 1),
+-pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1),
+pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1),
+-pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
 };
 
 struct qreg scaled_x =
@@ -777,8 +778,9 @@ tgsi_to_qir_sin(struct vc4_compile *c,
  src[0 * 4 + 0],
  qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
 
-
-struct qreg x = tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0);
+struct qreg x = qir_FADD(c,
+ tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0),
+ qir_uniform_f(c, -0.5));
 struct qreg x2 = qir_FMUL(c, x, x);
 struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0]));
 for (int i = 1; i  ARRAY_SIZE(coeff); i++) {
@@ -799,16 +801,20 @@ tgsi_to_qir_cos(struct vc4_compile *c,
 enum qop op, struct qreg *src, int i)
 {
 float coeff[] = {
-1.0f,
--pow(2.0 * M_PI, 2) / (2 * 1),
-pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
--pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+-1.0f,
+pow(2.0 * M_PI, 2) / (2 * 1),
+-pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1),
+pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1),
+-pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
+pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1),
 };
 
 struct qreg scaled_x =
 qir_FMUL(c, src[0 * 4 + 0],
  qir_uniform_f(c, 1.0f / (M_PI * 2.0f)));
-struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0);
+struct qreg x_frac = qir_FADD(c,
+  tgsi_to_qir_frc(c, NULL, 0, scaled_x, 
0),
+  qir_uniform_f(c, -0.5));
 
 struct qreg sum = qir_uniform_f(c, coeff[0]);
 struct qreg x2 = qir_FMUL(c, x_frac, x_frac);

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Write the VPM read setup multiple times to queue all the inputs.

2014-10-13 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 615bbf0ca641d356d975f12a5491f2fd56549ed8
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=615bbf0ca641d356d975f12a5491f2fd56549ed8

Author: Eric Anholt e...@anholt.net
Date:   Mon Oct 13 16:20:01 2014 +0100

vc4: Write the VPM read setup multiple times to queue all the inputs.

There's a 4-element fifo, and the size (number of dwords per vertex) field
is just 4 bits.

Fixes glsl-routing on sim.

---

 src/gallium/drivers/vc4/vc4_qpu_emit.c |   21 ++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c 
b/src/gallium/drivers/vc4/vc4_qpu_emit.c
index 397e6f2..99e634e 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_emit.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c
@@ -238,15 +238,30 @@ vc4_generate_code(struct vc4_context *vc4, struct 
vc4_compile *c)
 {
 struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c);
 bool discard = false;
+uint32_t inputs_remaining = c-num_inputs;
+uint32_t vpm_read_fifo_count = 0;
+uint32_t vpm_read_offset = 0;
 
 make_empty_list(c-qpu_inst_list);
 
 switch (c-stage) {
 case QSTAGE_VERT:
 case QSTAGE_COORD:
-queue(c, qpu_load_imm_ui(qpu_vrsetup(),
- (0x1a00 +
-  0x0010 * c-num_inputs)));
+/* There's a 4-entry FIFO for VPMVCD reads, each of which can
+ * load up to 16 dwords (4 vec4s) per vertex.
+ */
+while (inputs_remaining) {
+uint32_t num_entries = MIN2(inputs_remaining, 16);
+queue(c, qpu_load_imm_ui(qpu_vrsetup(),
+ vpm_read_offset |
+ 0x1a00 |
+ ((num_entries  0xf)  20)));
+inputs_remaining -= num_entries;
+vpm_read_offset += num_entries;
+vpm_read_fifo_count++;
+}
+assert(vpm_read_fifo_count = 4);
+
 queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x1a00));
 break;
 case QSTAGE_FRAG:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Don' t look up the compiled shaders unless state has changed.

2014-10-10 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: d09509da2aa5ff709c48bdc4163a2c57811c70ab
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=d09509da2aa5ff709c48bdc4163a2c57811c70ab

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 10 14:17:15 2014 +0200

vc4: Don't look up the compiled shaders unless state has changed.

Improves simulated norast performance on a little benchmark by 38.0965%
+/- 3.27534% (n=11).

---

 src/gallium/drivers/vc4/vc4_context.h |3 +++
 src/gallium/drivers/vc4/vc4_draw.c|5 +
 src/gallium/drivers/vc4/vc4_program.c |   20 
 3 files changed, 28 insertions(+)

diff --git a/src/gallium/drivers/vc4/vc4_context.h 
b/src/gallium/drivers/vc4/vc4_context.h
index da07cfa..31dec04 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -57,6 +57,7 @@
 #define VC4_DIRTY_INDEXBUF  (1  16)
 #define VC4_DIRTY_SCISSOR   (1  17)
 #define VC4_DIRTY_FLAT_SHADE_FLAGS (1  18)
+#define VC4_DIRTY_PRIM_MODE (1  19)
 
 #define VC4_SHADER_DIRTY_VP (1  0)
 #define VC4_SHADER_DIRTY_FP (1  1)
@@ -177,6 +178,8 @@ struct vc4_context {
 unsigned int reg_class_any;
 unsigned int reg_class_a;
 
+uint8_t prim_mode;
+
 /** @{ Current pipeline state objects */
 struct pipe_scissor_state scissor;
 struct pipe_blend_state *blend;
diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index bc08da3..1a0c0dc 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -99,6 +99,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
 struct vc4_vertex_stateobj *vtx = vc4-vtx;
 struct vc4_vertexbuf_stateobj *vertexbuf = vc4-vertexbuf;
 
+if (vc4-prim_mode != info-mode) {
+vc4-prim_mode = info-mode;
+vc4-dirty |= VC4_DIRTY_PRIM_MODE;
+}
+
 vc4_start_draw(vc4);
 vc4_update_compiled_shaders(vc4, info-mode);
 
diff --git a/src/gallium/drivers/vc4/vc4_program.c 
b/src/gallium/drivers/vc4/vc4_program.c
index bf76acf..b22426c 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1792,6 +1792,17 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t 
prim_mode)
 struct vc4_fs_key local_key;
 struct vc4_fs_key *key = local_key;
 
+if (!(vc4-dirty  (VC4_DIRTY_PRIM_MODE |
+VC4_DIRTY_BLEND |
+VC4_DIRTY_FRAMEBUFFER |
+VC4_DIRTY_ZSA |
+VC4_DIRTY_RASTERIZER |
+VC4_DIRTY_FRAGTEX |
+VC4_DIRTY_TEXSTATE |
+VC4_DIRTY_PROG))) {
+return;
+}
+
 memset(key, 0, sizeof(*key));
 vc4_setup_shared_key(key-base, vc4-fragtex);
 key-base.shader_state = vc4-prog.bind_fs;
@@ -1840,6 +1851,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t 
prim_mode)
 struct vc4_vs_key local_key;
 struct vc4_vs_key *key = local_key;
 
+if (!(vc4-dirty  (VC4_DIRTY_PRIM_MODE |
+VC4_DIRTY_RASTERIZER |
+VC4_DIRTY_VERTTEX |
+VC4_DIRTY_TEXSTATE |
+VC4_DIRTY_VTXSTATE |
+VC4_DIRTY_PROG))) {
+return;
+}
+
 memset(key, 0, sizeof(*key));
 vc4_setup_shared_key(key-base, vc4-verttex);
 key-base.shader_state = vc4-prog.bind_vs;

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Actually clear the context's dirty flags.

2014-10-10 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: c6f50c4086a96c3a8d84522ed7ee5c75a8b0868c
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6f50c4086a96c3a8d84522ed7ee5c75a8b0868c

Author: Eric Anholt e...@anholt.net
Date:   Fri Oct 10 14:24:06 2014 +0200

vc4: Actually clear the context's dirty flags.

I was trying to skip state updates when !dirty, and suspiciously
everything was always dirty.

---

 src/gallium/drivers/vc4/vc4_draw.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/src/gallium/drivers/vc4/vc4_draw.c 
b/src/gallium/drivers/vc4/vc4_draw.c
index 2cac7ef..bc08da3 100644
--- a/src/gallium/drivers/vc4/vc4_draw.c
+++ b/src/gallium/drivers/vc4/vc4_draw.c
@@ -103,6 +103,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct 
pipe_draw_info *info)
 vc4_update_compiled_shaders(vc4, info-mode);
 
 vc4_emit_state(pctx);
+vc4-dirty = 0;
 
 vc4_write_uniforms(vc4, vc4-prog.fs,
vc4-constbuf[PIPE_SHADER_FRAGMENT],

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

Mesa (master): vc4: Optimize the other case of SEL_X_Y wih a 0 - SEL_X_0( a).

2014-10-10 Thread Eric Anholt

Module: Mesa
Branch: master
Commit: 7c474f9f2e5e3161ad27129844139ee14d916726
URL:
http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c474f9f2e5e3161ad27129844139ee14d916726

Author: Eric Anholt e...@anholt.net
Date:   Thu Oct  9 09:40:51 2014 +0200

vc4: Optimize the other case of SEL_X_Y wih a 0 - SEL_X_0(a).

Cleans up some output to be more obvious in a piglit test I'm looking at.

---

 src/gallium/drivers/vc4/vc4_opt_algebraic.c |   24 +++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c 
b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
index c114458..ec526fb 100644
--- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c
+++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c
@@ -174,7 +174,10 @@ qir_opt_algebraic(struct vc4_compile *c)
  */
 replace_with_mov(c, inst, inst-src[1]);
 progress = true;
-} else if (is_zero(c, defs, inst-src[1])) {
+break;
+}
+
+if (is_zero(c, defs, inst-src[1])) {
 /* Replace references to a 0 uniform value
  * with the SEL_X_0 equivalent.
  */
@@ -183,7 +186,26 @@ qir_opt_algebraic(struct vc4_compile *c)
 inst-src[1] = c-undef;
 progress = true;
 dump_to(c, inst);
+break;
 }
+
+if (is_zero(c, defs, inst-src[0])) {
+/* Replace references to a 0 uniform value
+ * with the SEL_X_0 equivalent, flipping the
+ * condition being evaluated since the operand
+ * order is flipped.
+ */
+dump_from(c, inst);
+inst-op -= QOP_SEL_X_Y_ZS;
+inst-op ^= 1;
+inst-op += QOP_SEL_X_0_ZS;
+inst-src[0] = inst-src[1];
+inst-src[1] = c-undef;
+progress = true;
+dump_to(c, inst);
+break;
+}
+
 break;
 
 case QOP_FSUB:

___
mesa-commit mailing list
mesa-commit@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-commit

< 6 7 8 9 10 11 12 13 14 15 >

1001 - 1100 of 4718 matches

Mail list logo