Mesa (master): vc4: Fix leak of the compiled shader programs in the cache.
Module: Mesa Branch: master Commit: 80ed075e6033eba68b034fbd748da4e0b82a27f4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=80ed075e6033eba68b034fbd748da4e0b82a27f4 Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:29:10 2014 -0800 vc4: Fix leak of the compiled shader programs in the cache. --- src/gallium/drivers/vc4/vc4_context.c |2 ++ src/gallium/drivers/vc4/vc4_context.h |1 + src/gallium/drivers/vc4/vc4_program.c | 21 + 3 files changed, 24 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index b26c071..3535ebb 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -431,6 +431,8 @@ vc4_context_destroy(struct pipe_context *pctx) util_slab_destroy(vc4-transfer_pool); +vc4_program_fini(pctx); + ralloc_free(vc4); } diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index ba92cb3..962abbf 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -293,6 +293,7 @@ struct pipe_context *vc4_context_create(struct pipe_screen *pscreen, void vc4_draw_init(struct pipe_context *pctx); void vc4_state_init(struct pipe_context *pctx); void vc4_program_init(struct pipe_context *pctx); +void vc4_program_fini(struct pipe_context *pctx); void vc4_query_init(struct pipe_context *pctx); void vc4_simulator_init(struct vc4_screen *screen); int vc4_simulator_flush(struct vc4_context *vc4, diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 007c181..3af738f 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2777,3 +2777,24 @@ vc4_program_init(struct pipe_context *pctx) vc4-vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, vs_cache_compare); } + +void +vc4_program_fini(struct pipe_context *pctx) +{ +struct vc4_context *vc4 = vc4_context(pctx); + +struct hash_entry *entry; +hash_table_foreach(vc4-fs_cache, entry) { +struct vc4_compiled_shader *shader = entry-data; +vc4_bo_unreference(shader-bo); +ralloc_free(shader); +_mesa_hash_table_remove(vc4-fs_cache, entry); +} + +hash_table_foreach(vc4-vs_cache, entry) { +struct vc4_compiled_shader *shader = entry-data; +vc4_bo_unreference(shader-bo); +ralloc_free(shader); +_mesa_hash_table_remove(vc4-vs_cache, entry); +} +} ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix leak of a copy of the scheduled QPU instructions.
Module: Mesa Branch: master Commit: 4da9e3d80556253a05179c398ffb1c3120fa3089 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=4da9e3d80556253a05179c398ffb1c3120fa3089 Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:28:13 2014 -0800 vc4: Fix leak of a copy of the scheduled QPU instructions. They're copied into a vc4_bo after compiling is done. --- src/gallium/drivers/vc4/vc4_qpu.c |5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index faf8790..83bf105 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -22,6 +22,7 @@ */ #include stdbool.h +#include util/ralloc.h #include vc4_qir.h #include vc4_qpu.h @@ -460,8 +461,8 @@ qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) { if (c-qpu_inst_count = c-qpu_inst_size) { c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2); -c-qpu_insts = realloc(c-qpu_insts, - c-qpu_inst_size * sizeof(uint64_t)); +c-qpu_insts = reralloc(c, c-qpu_insts, +uint64_t, c-qpu_inst_size); } c-qpu_insts[c-qpu_inst_count++] = inst; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix leaks of the CL contents.
Module: Mesa Branch: master Commit: 667719fcb2296d73e1897d4071da6dd30b2cc6ac URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=667719fcb2296d73e1897d4071da6dd30b2cc6ac Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:41:25 2014 -0800 vc4: Fix leaks of the CL contents. --- src/gallium/drivers/vc4/vc4_cl.c |5 - src/gallium/drivers/vc4/vc4_context.c |2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.c b/src/gallium/drivers/vc4/vc4_cl.c index 29b956e..36dd28c 100644 --- a/src/gallium/drivers/vc4/vc4_cl.c +++ b/src/gallium/drivers/vc4/vc4_cl.c @@ -22,11 +22,14 @@ */ #include util/u_math.h +#include util/ralloc.h #include vc4_context.h void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl) { +cl-base = ralloc_size(vc4, 1); +cl-end = cl-next = cl-base; } void @@ -35,7 +38,7 @@ vc4_grow_cl(struct vc4_cl *cl) uint32_t size = MAX2((cl-end - cl-base) * 2, 4096); uint32_t offset = cl-next -cl-base; -cl-base = realloc(cl-base, size); +cl-base = reralloc(ralloc_parent(cl-base), cl-base, uint8_t, size); cl-end = cl-base + size; cl-next = cl-base + offset; } diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index fd65f43..e49d654 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -472,7 +472,9 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv) vc4_init_cl(vc4, vc4-bcl); vc4_init_cl(vc4, vc4-rcl); vc4_init_cl(vc4, vc4-shader_rec); +vc4_init_cl(vc4, vc4-uniforms); vc4_init_cl(vc4, vc4-bo_handles); +vc4_init_cl(vc4, vc4-bo_pointers); vc4-dirty = ~0; vc4-fd = screen-fd; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix leak of vc4_bos stashed in the context.
Module: Mesa Branch: master Commit: 1f1ca8b2ea80f6b538b7f7c0de2ebe8eba862edc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f1ca8b2ea80f6b538b7f7c0de2ebe8eba862edc Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:39:49 2014 -0800 vc4: Fix leak of vc4_bos stashed in the context. --- src/gallium/drivers/vc4/vc4_context.c |5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 3535ebb..fd65f43 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -431,6 +431,11 @@ vc4_context_destroy(struct pipe_context *pctx) util_slab_destroy(vc4-transfer_pool); +pipe_surface_reference(vc4-framebuffer.cbufs[0], NULL); +pipe_surface_reference(vc4-framebuffer.zsbuf, NULL); +vc4_bo_unreference(vc4-tile_alloc); +vc4_bo_unreference(vc4-tile_state); + vc4_program_fini(pctx); ralloc_free(vc4); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): hash_table: Fix compiler warnings from the renaming.
Module: Mesa Branch: master Commit: 6c3115af852834476a451688734c07f0dbe13ec9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6c3115af852834476a451688734c07f0dbe13ec9 Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:21:32 2014 -0800 hash_table: Fix compiler warnings from the renaming. Not sure how we both missed this. None of the callers were using the return value, though. --- src/util/hash_table.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/hash_table.c b/src/util/hash_table.c index e85ebe3..0ad0383 100644 --- a/src/util/hash_table.c +++ b/src/util/hash_table.c @@ -330,7 +330,7 @@ struct hash_entry * _mesa_hash_table_insert(struct hash_table *ht, const void *key, void *data) { assert(ht-key_hash_function); - hash_table_insert(ht, ht-key_hash_function(key), key, data); + return hash_table_insert(ht, ht-key_hash_function(key), key, data); } struct hash_entry * @@ -338,7 +338,7 @@ _mesa_hash_table_insert_with_hash(struct hash_table *ht, uint32_t hash, const void *key, void *data) { assert(ht-key_hash_function == NULL || hash == ht-key_hash_function(key)); - hash_table_insert(ht, hash, key, data); + return hash_table_insert(ht, hash, key, data); } /** ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix a leak of the simulator's exec BO's actual vc4_bo.
Module: Mesa Branch: master Commit: f519c3bff14dfc4555c511a4d3709affa056d688 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f519c3bff14dfc4555c511a4d3709affa056d688 Author: Eric Anholt e...@anholt.net Date: Sat Dec 13 16:37:28 2014 -0800 vc4: Fix a leak of the simulator's exec BO's actual vc4_bo. --- src/gallium/drivers/vc4/vc4_simulator.c |1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 1040ae8..bb4a669 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -173,6 +173,7 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) if (ret) return ret; +vc4_bo_unreference(exec.exec_bo-bo); free(exec.exec_bo); if (ctex ctex-bo-simulator_winsys_map) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Switch to using the util/ hash table.
Module: Mesa Branch: master Commit: 5c9b8eace2b7e92899aae2cad131c0ca05b389e7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5c9b8eace2b7e92899aae2cad131c0ca05b389e7 Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 19:48:51 2014 -0800 vc4: Switch to using the util/ hash table. No performance difference on a microbenchmark with norast that should hit it enough to have mattered, n=220. --- src/gallium/drivers/vc4/vc4_context.h |2 +- src/gallium/drivers/vc4/vc4_program.c | 85 + 2 files changed, 33 insertions(+), 54 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 207a7b4..ba92cb3 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -209,7 +209,7 @@ struct vc4_context { struct primconvert_context *primconvert; -struct util_hash_table *fs_cache, *vs_cache; +struct hash_table *fs_cache, *vs_cache; uint32_t next_uncompiled_program_id; uint64_t next_compiled_program_id; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b48c2c4..007c181 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -25,7 +25,6 @@ #include inttypes.h #include pipe/p_state.h #include util/u_format.h -#include util/u_hash_table.h #include util/u_hash.h #include util/u_memory.h #include util/u_pack_color.h @@ -2164,7 +2163,7 @@ static struct vc4_compiled_shader * vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, struct vc4_key *key) { -struct util_hash_table *ht; +struct hash_table *ht; uint32_t key_size; if (stage == QSTAGE_FRAG) { ht = vc4-fs_cache; @@ -2175,9 +2174,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, } struct vc4_compiled_shader *shader; -shader = util_hash_table_get(ht, key); -if (shader) -return shader; +struct hash_entry *entry = _mesa_hash_table_search(ht, key); +if (entry) +return entry-data; struct vc4_compile *c = vc4_shader_tgsi_to_qir(vc4, stage, key); shader = rzalloc(NULL, struct vc4_compiled_shader); @@ -2257,7 +2256,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, struct vc4_key *dup_key; dup_key = malloc(key_size); memcpy(dup_key, key, key_size); -util_hash_table_set(ht, dup_key, shader); +_mesa_hash_table_insert(ht, dup_key, shader); return shader; } @@ -2389,65 +2388,43 @@ vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) vc4_update_compiled_vs(vc4, prim_mode); } -static unsigned -fs_cache_hash(void *key) +static uint32_t +fs_cache_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct vc4_fs_key)); } -static unsigned -vs_cache_hash(void *key) +static uint32_t +vs_cache_hash(const void *key) { return _mesa_hash_data(key, sizeof(struct vc4_vs_key)); } -static int -fs_cache_compare(void *key1, void *key2) +static bool +fs_cache_compare(const void *key1, const void *key2) { -return memcmp(key1, key2, sizeof(struct vc4_fs_key)); +return memcmp(key1, key2, sizeof(struct vc4_fs_key)) == 0; } -static int -vs_cache_compare(void *key1, void *key2) +static bool +vs_cache_compare(const void *key1, const void *key2) { -return memcmp(key1, key2, sizeof(struct vc4_vs_key)); +return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0; } -struct delete_state { -struct vc4_context *vc4; -struct vc4_uncompiled_shader *shader_state; -}; - -static enum pipe_error -fs_delete_from_cache(void *in_key, void *in_value, void *data) -{ -struct delete_state *del = data; -struct vc4_fs_key *key = in_key; -struct vc4_compiled_shader *shader = in_value; - -if (key-base.shader_state == data) { -util_hash_table_remove(del-vc4-fs_cache, key); -vc4_bo_unreference(shader-bo); -ralloc_free(shader); -} - -return 0; -} - -static enum pipe_error -vs_delete_from_cache(void *in_key, void *in_value, void *data) +static void +delete_from_cache_if_matches(struct hash_table *ht, + struct hash_entry *entry, + struct vc4_uncompiled_shader *so) { -struct delete_state *del = data; -struct vc4_vs_key *key = in_key; -struct vc4_compiled_shader *shader = in_value; +struct vc4_key *key = entry-data; -if (key-base.shader_state == data) { -util_hash_table_remove(del-vc4-vs_cache, key); +if (key-shader_state == so) { +struct vc4_compiled_shader *shader = entry-data
Mesa (master): vc4: Fix leak of simulator memory on screen cleanup.
Module: Mesa Branch: master Commit: c84306fdc2f2f1aefd7c526e92d66fafda5b306c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c84306fdc2f2f1aefd7c526e92d66fafda5b306c Author: Eric Anholt e...@anholt.net Date: Sat Dec 13 16:40:54 2014 -0800 vc4: Fix leak of simulator memory on screen cleanup. --- src/gallium/drivers/vc4/vc4_screen.c|5 +++-- src/gallium/drivers/vc4/vc4_simulator.c |4 +++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 18451bd..7a1840a 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -30,6 +30,7 @@ #include util/u_debug.h #include util/u_memory.h #include util/u_format.h +#include util/ralloc.h #include vc4_screen.h #include vc4_context.h @@ -75,7 +76,7 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen) static void vc4_screen_destroy(struct pipe_screen *pscreen) { -free(pscreen); +ralloc_free(pscreen); } static int @@ -402,7 +403,7 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen, struct pipe_screen * vc4_screen_create(int fd) { -struct vc4_screen *screen = CALLOC_STRUCT(vc4_screen); +struct vc4_screen *screen = ralloc(NULL, struct vc4_screen); struct pipe_screen *pscreen; pscreen = screen-base; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index bb4a669..ee4ad96 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -24,6 +24,7 @@ #ifdef USE_VC4_SIMULATOR #include util/u_memory.h +#include util/ralloc.h #include vc4_screen.h #include vc4_context.h @@ -191,7 +192,8 @@ void vc4_simulator_init(struct vc4_screen *screen) { screen-simulator_mem_size = 256 * 1024 * 1024; -screen-simulator_mem_base = malloc(screen-simulator_mem_size); +screen-simulator_mem_base = ralloc_size(screen, + screen-simulator_mem_size); /* We supply our own memory so that we can have more aperture * available (256MB instead of simpenrose's default 64MB). ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix leaks of the compiled shaders' keys.
Module: Mesa Branch: master Commit: e108442bb10088607f2a67b648c42c0dece2e595 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e108442bb10088607f2a67b648c42c0dece2e595 Author: Eric Anholt e...@anholt.net Date: Sun Dec 14 20:50:15 2014 -0800 vc4: Fix leaks of the compiled shaders' keys. --- src/gallium/drivers/vc4/vc4_program.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3af738f..781e7e2 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2254,7 +2254,7 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, qir_compile_destroy(c); struct vc4_key *dup_key; -dup_key = malloc(key_size); +dup_key = ralloc_size(shader, key_size); memcpy(dup_key, key, key_size); _mesa_hash_table_insert(ht, dup_key, shader); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix referencing of sync objects.
Module: Mesa Branch: master Commit: e5eaf8ec600b73de7f52555ce1c9a74883dba941 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e5eaf8ec600b73de7f52555ce1c9a74883dba941 Author: Eric Anholt e...@anholt.net Date: Fri Dec 12 00:06:21 2014 -0800 vc4: Fix referencing of sync objects. While the pipe_reference_* helpers set the pointer, a bare pipe_reference doesn't. Fixes 5 ARB_sync tests. --- src/gallium/drivers/vc4/vc4_fence.c |1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/vc4/vc4_fence.c b/src/gallium/drivers/vc4/vc4_fence.c index c081d51..01906cd 100644 --- a/src/gallium/drivers/vc4/vc4_fence.c +++ b/src/gallium/drivers/vc4/vc4_fence.c @@ -56,6 +56,7 @@ vc4_fence_reference(struct pipe_screen *pscreen, if (pipe_reference((*p)-reference, f-reference)) { free(old); } +*p = f; } static boolean ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix a memory leak in setting up QPU instructions for scheduling.
Module: Mesa Branch: master Commit: 87db578268012ffc7ca09b737441490144340c6e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=87db578268012ffc7ca09b737441490144340c6e Author: Eric Anholt e...@anholt.net Date: Thu Dec 11 19:56:42 2014 -0800 vc4: Fix a memory leak in setting up QPU instructions for scheduling. --- src/gallium/drivers/vc4/vc4_qpu_emit.c |3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index f2620c0..c4359e7 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -26,6 +26,7 @@ #include vc4_context.h #include vc4_qir.h #include vc4_qpu.h +#include util/ralloc.h static void vc4_dump_program(struct vc4_compile *c) @@ -44,7 +45,7 @@ vc4_dump_program(struct vc4_compile *c) static void queue(struct vc4_compile *c, uint64_t inst) { -struct queued_qpu_inst *q = calloc(1, sizeof(*q)); +struct queued_qpu_inst *q = rzalloc(c, struct queued_qpu_inst); q-inst = inst; insert_at_tail(c-qpu_inst_list, q-link); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don' t throw out the index offset in the shadow index buffer path.
Module: Mesa Branch: master Commit: d78eb5752838979057a8a1210d6951c084904c0f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d78eb5752838979057a8a1210d6951c084904c0f Author: Eric Anholt e...@anholt.net Date: Thu Dec 11 20:34:06 2014 -0800 vc4: Don't throw out the index offset in the shadow index buffer path. When we upload shadow indices at draw time, we need the source offset. Fixes the piglit draw-elements test. --- src/gallium/drivers/vc4/vc4_state.c |3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 73d..630ac4d 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -300,12 +300,11 @@ vc4_set_index_buffer(struct pipe_context *pctx, pipe_resource_reference(vc4-indexbuf.buffer, NULL); vc4-indexbuf.buffer = pshadow; vc4-indexbuf.index_size = 2; -vc4-indexbuf.offset = 0; } else { pipe_resource_reference(vc4-indexbuf.buffer, ib-buffer); vc4-indexbuf.index_size = ib-index_size; -vc4-indexbuf.offset = ib-offset; } +vc4-indexbuf.offset = ib-offset; } else { pipe_resource_reference(vc4-indexbuf.buffer, NULL); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix triangle-guardband-viewport piglit test.
Module: Mesa Branch: master Commit: 0ae5e002e090c5e1bd237fa3f23549f9ab105a44 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0ae5e002e090c5e1bd237fa3f23549f9ab105a44 Author: Eric Anholt e...@anholt.net Date: Thu Dec 11 20:11:21 2014 -0800 vc4: Fix triangle-guardband-viewport piglit test. The original Broadcom driver also did this with the viewport. --- src/gallium/drivers/vc4/vc4_emit.c | 19 ++- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_emit.c b/src/gallium/drivers/vc4/vc4_emit.c index 8df7073..6856441 100644 --- a/src/gallium/drivers/vc4/vc4_emit.c +++ b/src/gallium/drivers/vc4/vc4_emit.c @@ -28,12 +28,21 @@ vc4_emit_state(struct pipe_context *pctx) { struct vc4_context *vc4 = vc4_context(pctx); -if (vc4-dirty VC4_DIRTY_SCISSOR) { +if (vc4-dirty (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) { +float *vpscale = vc4-viewport.scale; +float *vptranslate = vc4-viewport.translate; +float vp_minx = -fabs(vpscale[0]) + vptranslate[0]; +float vp_maxx = fabs(vpscale[0]) + vptranslate[0]; +float vp_miny = -fabs(vpscale[1]) + vptranslate[1]; +float vp_maxy = fabs(vpscale[1]) + vptranslate[1]; +uint32_t minx = MAX2(vc4-scissor.minx, vp_minx); +uint32_t miny = MAX2(vc4-scissor.miny, vp_miny); + cl_u8(vc4-bcl, VC4_PACKET_CLIP_WINDOW); -cl_u16(vc4-bcl, vc4-scissor.minx); -cl_u16(vc4-bcl, vc4-scissor.miny); -cl_u16(vc4-bcl, vc4-scissor.maxx - vc4-scissor.minx); -cl_u16(vc4-bcl, vc4-scissor.maxy - vc4-scissor.miny); +cl_u16(vc4-bcl, minx); +cl_u16(vc4-bcl, miny); +cl_u16(vc4-bcl, MIN2(vc4-scissor.maxx, vp_maxx) - minx); +cl_u16(vc4-bcl, MIN2(vc4-scissor.maxy, vp_maxy) - miny); } if (vc4-dirty (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Drop redundant index size setting.
Module: Mesa Branch: master Commit: 5b3c0d999c38ef51ece422010cdb836fbbaffe0e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5b3c0d999c38ef51ece422010cdb836fbbaffe0e Author: Eric Anholt e...@anholt.net Date: Thu Dec 11 20:34:57 2014 -0800 vc4: Drop redundant index size setting. This is already done at set_index_buffer() time. --- src/gallium/drivers/vc4/vc4_draw.c |1 - 1 file changed, 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 2d82462..79d7d73 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -218,7 +218,6 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) if (rsc-shadow_parent) { vc4_update_shadow_index_buffer(pctx, vc4-indexbuf); offset = 0; -index_size = 2; } cl_start_reloc(vc4-bcl, 1); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Consider FS backface color loads as color inputs as well.
Module: Mesa Branch: master Commit: 92b85fba89851b85e7a50b1f6f90c05ecaacdcd5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=92b85fba89851b85e7a50b1f6f90c05ecaacdcd5 Author: Eric Anholt e...@anholt.net Date: Thu Dec 11 21:28:12 2014 -0800 vc4: Consider FS backface color loads as color inputs as well. This fixes flatshading of backface color in 4 of the piglit interpolation tests. --- src/gallium/drivers/vc4/vc4_program.c |5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index cea2a49..b48c2c4 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2210,8 +2210,11 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, if (sem-semantic == (uint8_t)~0) continue; -if (sem-semantic == TGSI_SEMANTIC_COLOR) +if (sem-semantic == TGSI_SEMANTIC_COLOR || +sem-semantic == TGSI_SEMANTIC_BCOLOR) { shader-color_inputs |= (1 shader-num_inputs); +} + shader-input_semantics[shader-num_inputs] = *sem; shader-num_inputs++; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Prioritize allocating accumulators to short-lived values.
Module: Mesa Branch: master Commit: ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ab1b1fa6fbd72b05c48f83c9df5036c2bfe893a3 Author: Eric Anholt e...@anholt.net Date: Mon Dec 8 17:43:29 2014 -0800 vc4: Prioritize allocating accumulators to short-lived values. The register allocator walks from the end of the nodes array looking for trivially-allocatable things to put on the stack, meaning (assuming everything is trivially colorable and gets put on the stack in a single pass) the low node numbers get allocated first. The things allocated first happen to get the lower-numbered registers, which is to say the fast accumulators that can be paired more easily. When we previously made the nodes match the temporary register numbers, we'd end up putting the shader inputs (VS or FS) in the accumulators, which are often long-lived values. By prioritizing the shortest-lived values for allocation, we can get a lot more instructions that involve accumulators, and thus fewer conflicts for raddr and WS. total instructions in shared programs: 52870 - 46428 (-12.18%) instructions in affected programs: 52260 - 45818 (-12.33%) --- src/gallium/drivers/vc4/vc4_register_allocate.c | 73 ++- 1 file changed, 59 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index b62669f..3001900 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -139,6 +139,20 @@ vc4_alloc_reg_set(struct vc4_context *vc4) ra_set_finalize(vc4-regs, NULL); } +struct node_to_temp_map { +uint32_t temp; +uint32_t priority; +}; + +static int +node_to_temp_priority(const void *in_a, const void *in_b) +{ +const struct node_to_temp_map *a = in_a; +const struct node_to_temp_map *b = in_b; + +return a-priority - b-priority; +} + /** * Returns a mapping from QFILE_TEMP indices to struct qpu_regs. * @@ -148,6 +162,8 @@ struct qpu_reg * vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) { struct simple_node *node; +struct node_to_temp_map map[c-num_temps]; +uint32_t temp_to_node[c-num_temps]; uint32_t def[c-num_temps]; uint32_t use[c-num_temps]; struct qpu_reg *temp_registers = calloc(c-num_temps, @@ -166,11 +182,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) struct ra_graph *g = ra_alloc_interference_graph(vc4-regs, c-num_temps); -for (uint32_t i = 0; i c-num_temps; i++) +for (uint32_t i = 0; i c-num_temps; i++) { ra_set_node_class(g, i, vc4-reg_class_any); +} -/* Compute the live ranges so we can figure out interference, and - * figure out our register classes and preallocated registers. +/* Compute the live ranges so we can figure out interference. */ uint32_t ip = 0; foreach(node, c-instructions) { @@ -188,27 +204,54 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) switch (inst-op) { case QOP_FRAG_Z: +case QOP_FRAG_W: +/* The payload registers have values implicitly loaded + * at the start of the program. + */ def[inst-dst.index] = 0; -ra_set_node_reg(g, inst-dst.index, +break; +default: +break; +} + +ip++; +} + +for (uint32_t i = 0; i c-num_temps; i++) { +map[i].temp = i; +map[i].priority = use[i] - def[i]; +} +qsort(map, c-num_temps, sizeof(map[0]), node_to_temp_priority); +for (uint32_t i = 0; i c-num_temps; i++) { +temp_to_node[map[i].temp] = i; +} + +/* Figure out our register classes and preallocated registers*/ +foreach(node, c-instructions) { +struct qinst *inst = (struct qinst *)node; + +switch (inst-op) { +case QOP_FRAG_Z: +ra_set_node_reg(g, temp_to_node[inst-dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1); break; case QOP_FRAG_W: -def[inst-dst.index] = 0; -ra_set_node_reg(g, inst-dst.index, +ra_set_node_reg(g, temp_to_node[inst-dst.index], AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2); break; case QOP_TEX_RESULT: case QOP_TLB_COLOR_READ: assert
Mesa (master): vc4: Reserve rb31 instead of r3 for raddr conflict spills.
Module: Mesa Branch: master Commit: 8420a956924c720b3c4932a577623f836758c21c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8420a956924c720b3c4932a577623f836758c21c Author: Eric Anholt e...@anholt.net Date: Mon Dec 8 16:52:53 2014 -0800 vc4: Reserve rb31 instead of r3 for raddr conflict spills. This increases the cost of a raddr b conflict spill (save r3 to rb31, move src1 to r3, move rb31 back to r3 when done, instead of just move src1 to r3), but on average thanks to instruction pairing it's more worthwhile to have another accumulator. total instructions in shared programs: 46428 - 46171 (-0.55%) instructions in affected programs: 38030 - 37773 (-0.68%) --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 50 +++ src/gallium/drivers/vc4/vc4_register_allocate.c |6 +-- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 856f844..f2620c0 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -93,21 +93,41 @@ swap_file(struct qpu_reg *src) * In that case, we need to move one to a temporary that can be used in the * instruction, instead. */ -static void +static bool fixup_raddr_conflict(struct vc4_compile *c, - struct qpu_reg *src0, struct qpu_reg *src1) + struct qpu_reg dst, + struct qpu_reg *src0, struct qpu_reg *src1, + bool r3_live) { if ((src0-mux != QPU_MUX_A src0-mux != QPU_MUX_B) || src0-mux != src1-mux || src0-addr == src1-addr) { -return; +return false; } if (swap_file(src0) || swap_file(src1)) -return; +return false; + +if (src0-mux == QPU_MUX_A) { +/* If we're conflicting over the A regfile, then we can just + * use the reserved rb31. + */ +queue(c, qpu_a_MOV(qpu_rb(31), *src1)); +*src1 = qpu_rb(31); +return false; +} else { +/* Otherwise, we need a non-B regfile. So, we spill r3 out to + * rb31, then store our desired value in r3, and tell the + * caller to put rb31 back into r3 when we're done. + */ +if (r3_live) +queue(c, qpu_a_MOV(qpu_rb(31), qpu_r3())); +queue(c, qpu_a_MOV(qpu_r3(), *src1)); + +*src1 = qpu_r3(); -queue(c, qpu_a_MOV(qpu_r3(), *src1)); -*src1 = qpu_r3(); +return r3_live dst.mux != QPU_MUX_R3; +} } void @@ -118,6 +138,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) uint32_t inputs_remaining = c-num_inputs; uint32_t vpm_read_fifo_count = 0; uint32_t vpm_read_offset = 0; +bool written_r3 = false; +bool needs_restore; make_empty_list(c-qpu_inst_list); @@ -416,8 +438,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; case QOP_TEX_DIRECT: -fixup_raddr_conflict(c, src[0], src[1]); +needs_restore = fixup_raddr_conflict(c, dst, + src[0], src[1], + written_r3); queue(c, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S), src[0], src[1])); +if (needs_restore) +queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31))); break; case QOP_TEX_RESULT: @@ -477,7 +503,9 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) if (qir_get_op_nsrc(qinst-op) == 1) src[1] = src[0]; -fixup_raddr_conflict(c, src[0], src[1]); +needs_restore = fixup_raddr_conflict(c, dst, + src[0], src[1], + written_r3); if (translate[qinst-op].is_mul) { queue(c, qpu_m_alu2(translate[qinst-op].op, @@ -488,8 +516,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) dst, src[0], src[1])); } +if (needs_restore) +queue(c, qpu_a_MOV(qpu_r3(), qpu_rb(31))); + break; } + +if (dst.mux == QPU_MUX_R3) +written_r3 = true
Mesa (master): vc4: Populate the delay field better, and schedule high delay first.
Module: Mesa Branch: master Commit: c5b544403fbc955dd441fb5a2e11f0de2a75e9e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c5b544403fbc955dd441fb5a2e11f0de2a75e9e4 Author: Eric Anholt e...@anholt.net Date: Tue Dec 9 14:05:52 2014 -0800 vc4: Populate the delay field better, and schedule high delay first. This is a standard scheduling heuristic, and clearly helps. total instructions in shared programs: 46418 - 44467 (-4.20%) instructions in affected programs: 42531 - 40580 (-4.59%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 50 +++- 1 file changed, 49 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 8df816f..c733e6e 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -49,7 +49,19 @@ struct schedule_node { uint32_t child_count; uint32_t child_array_size; uint32_t parent_count; + +/** + * Minimum number of cycles from scheduling this instruction until the + * end of the program, based on the slowest dependency chain through + * the children. + */ uint32_t delay; + +/** + * cycles between this instruction being scheduled and when its result + * can be consumed. + */ +uint32_t latency; }; struct schedule_node_child { @@ -548,6 +560,13 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, } else if (prio chosen_prio) { continue; } + +if (n-delay chosen-delay) { +chosen = n; +chosen_prio = prio; +} else if (n-delay chosen-delay) { +continue; +} } return chosen; @@ -612,7 +631,7 @@ compute_delay(struct schedule_node *n) if (!n-children[i].node-delay) compute_delay(n-children[i].node); n-delay = MAX2(n-delay, -n-children[i].node-delay + 1); +n-children[i].node-delay + n-latency); } } } @@ -734,6 +753,33 @@ schedule_instructions(struct vc4_compile *c, struct simple_node *schedule_list) } } +static uint32_t waddr_latency(uint32_t waddr) +{ +if (waddr 32) +return 2; + +/* Some huge number, really. */ +if (waddr = QPU_W_TMU0_S waddr = QPU_W_TMU1_B) +return 10; + +switch(waddr) { +case QPU_W_SFU_RECIP: +case QPU_W_SFU_RECIPSQRT: +case QPU_W_SFU_EXP: +case QPU_W_SFU_LOG: +return 3; +default: +return 1; +} +} + +static uint32_t +instruction_latency(uint64_t inst) +{ +return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)), +waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL))); +} + void qpu_schedule_instructions(struct vc4_compile *c) { @@ -761,6 +807,8 @@ qpu_schedule_instructions(struct vc4_compile *c) struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node); n-inst = inst; +n-latency = instruction_latency(inst-inst); + remove_from_list(inst-link); insert_at_tail(schedule_list, n-link); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Mark VPM read setup as impacting VPM reads, not writes.
Module: Mesa Branch: master Commit: f431b4f110946a0f6e3822f870fbcd3d23c8317a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f431b4f110946a0f6e3822f870fbcd3d23c8317a Author: Eric Anholt e...@anholt.net Date: Tue Dec 9 14:20:54 2014 -0800 vc4: Mark VPM read setup as impacting VPM reads, not writes. Fixes assertion failures if we adjust scheduling priorities to emphasize VPM reads more. --- src/gallium/drivers/vc4/vc4_qpu_schedule.c |8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 6bba66a..4bb9b3a 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -260,10 +260,16 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, break; case QPU_W_VPM: -case QPU_W_VPMVCD_SETUP: add_write_dep(state, state-last_vpm, n); break; +case QPU_W_VPMVCD_SETUP: +if (is_a) +add_write_dep(state, state-last_vpm_read, n); +else +add_write_dep(state, state-last_vpm, n); +break; + case QPU_W_SFU_RECIP: case QPU_W_SFU_RECIPSQRT: case QPU_W_SFU_EXP: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Refuse to merge instructions involving 32-bit immediate loads.
Module: Mesa Branch: master Commit: cff8c96a0d418f41e00aa97a13dc55e3ed213eb7 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cff8c96a0d418f41e00aa97a13dc55e3ed213eb7 Author: Eric Anholt e...@anholt.net Date: Tue Dec 9 16:34:37 2014 -0800 vc4: Refuse to merge instructions involving 32-bit immediate loads. An immediate load overwrites the mul and add operations, so you can't merge with them. --- src/gallium/drivers/vc4/vc4_qpu.c |5 + 1 file changed, 5 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 6daa072..faf8790 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -356,6 +356,11 @@ qpu_merge_inst(uint64_t a, uint64_t b) if (qpu_num_sf_accesses(a) qpu_num_sf_accesses(b)) return 0; +if (QPU_GET_FIELD(a, QPU_SIG) == QPU_SIG_LOAD_IMM || +QPU_GET_FIELD(b, QPU_SIG) == QPU_SIG_LOAD_IMM) { +return 0; +} + ok = ok merge_fields(merge, a, b, QPU_SIG_MASK, QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Skip raddr dependencies for 32-bit immediate loads.
Module: Mesa Branch: master Commit: 45a89237711acff7ee31c854361f8f580ccdcc9f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=45a89237711acff7ee31c854361f8f580ccdcc9f Author: Eric Anholt e...@anholt.net Date: Tue Dec 9 14:23:39 2014 -0800 vc4: Skip raddr dependencies for 32-bit immediate loads. These don't have raddr fields. --- src/gallium/drivers/vc4/vc4_qpu_schedule.c |7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 4bb9b3a..8df816f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -334,8 +334,11 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) uint32_t mul_b = QPU_GET_FIELD(inst, QPU_MUL_B); uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); -process_raddr_deps(state, n, raddr_a, true); -process_raddr_deps(state, n, raddr_b, false); +if (sig != QPU_SIG_LOAD_IMM) { +process_raddr_deps(state, n, raddr_a, true); +process_raddr_deps(state, n, raddr_b, false); +} + if (add_op != QPU_A_NOP) { process_mux_deps(state, n, add_a); process_mux_deps(state, n, add_b); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Do QPU scheduling across uniform loads.
Module: Mesa Branch: master Commit: 8812dc503eb48bac5b9c9b5740f76025c046f90d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8812dc503eb48bac5b9c9b5740f76025c046f90d Author: Eric Anholt e...@anholt.net Date: Tue Dec 9 18:54:29 2014 -0800 vc4: Do QPU scheduling across uniform loads. This means another pass of reordering the uniform data store, but it lets us pair up a lot more instructions. total instructions in shared programs: 44639 - 43176 (-3.28%) instructions in affected programs: 36938 - 35475 (-3.96%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 88 +++- 1 file changed, 60 insertions(+), 28 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index c733e6e..0700b0d 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -62,6 +62,12 @@ struct schedule_node { * can be consumed. */ uint32_t latency; + +/** + * Which uniform from uniform_data[] this instruction read, or -1 if + * not reading a uniform. + */ +int uniform; }; struct schedule_node_child { @@ -80,7 +86,6 @@ struct schedule_state { struct schedule_node *last_rb[32]; struct schedule_node *last_sf; struct schedule_node *last_vpm_read; -struct schedule_node *last_unif_read; struct schedule_node *last_tmu_write; struct schedule_node *last_tlb; struct schedule_node *last_vpm; @@ -174,9 +179,6 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n, break; case QPU_R_UNIF: -add_write_dep(state, state-last_unif_read, n); -break; - case QPU_R_NOP: case QPU_R_ELEM_QPU: case QPU_R_XY_PIXEL_COORD: @@ -215,6 +217,18 @@ is_tmu_write(uint32_t waddr) } } +static bool +reads_uniform(uint64_t inst) +{ +if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_LOAD_IMM) +return false; + +return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF || +QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF || +is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || +is_tmu_write(QPU_GET_FIELD(inst, QPU_WADDR_MUL))); +} + static void process_mux_deps(struct schedule_state *state, struct schedule_node *n, uint32_t mux) @@ -224,17 +238,6 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n, } -static bool -is_direct_tmu_read(uint64_t inst) -{ -/* If it's a direct read, we happen to structure the code such that - * there's an explicit uniform read in the instruction (for kernel - * texture reloc processing). - */ -return (QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_UNIF || -QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_UNIF); -} - static void process_waddr_deps(struct schedule_state *state, struct schedule_node *n, uint32_t waddr, bool is_add) @@ -250,14 +253,6 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n, } } else if (is_tmu_write(waddr)) { add_write_dep(state, state-last_tmu_write, n); - -/* There is an implicit uniform read in texture ops in - * hardware, unless this is a direct-addressed uniform read, - * so we need to keep it in the same order as the other - * uniforms. - */ -if (!is_direct_tmu_read(n-inst-inst)) -add_write_dep(state, state-last_unif_read, n); } else if (qpu_waddr_is_tlb(waddr)) { add_write_dep(state, state-last_tlb, n); } else { @@ -509,7 +504,7 @@ get_instruction_priority(uint64_t inst) static struct schedule_node * choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, struct simple_node *schedule_list, - uint64_t prev_inst) + struct schedule_node *prev_inst) { struct schedule_node *chosen = NULL; struct simple_node *node; @@ -537,8 +532,11 @@ choose_instruction_to_schedule(struct choose_scoreboard *scoreboard, /* If we're trying to pair with another instruction, check * that they're compatible. */ -if (prev_inst != 0) { -inst = qpu_merge_inst(prev_inst, inst); +if (prev_inst) { +if (prev_inst-uniform != -1 n-uniform != -1) +continue; + +inst = qpu_merge_inst(prev_inst-inst-inst, inst); if (!inst) continue; } @@ -668,6 +666,17
Mesa (master): vc4: Drop dependency on r3 for color packing.
Module: Mesa Branch: master Commit: 24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=24c5ab7bbbd2a4e9207c6cba66945f72ca5c7a3e Author: Eric Anholt e...@anholt.net Date: Sun Sep 7 14:38:24 2014 -0700 vc4: Drop dependency on r3 for color packing. We can avoid it by carefully ordering the packing. This is important as a step in giving r3 to the register allocator. total instructions in shared programs: 56087 - 55957 (-0.23%) instructions in affected programs: 18368 - 18238 (-0.71%) --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 31 +++ 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 3cb709f..856f844 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -296,17 +296,40 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) break; -case QOP_PACK_COLORS: +case QOP_PACK_COLORS: { +/* We have to be careful not to start writing over one + * of our source values when incrementally writing the + * destination. So, if the dst is one of the srcs, we + * pack that one first (and we pack 4 channels at once + * for the first pack). + */ +struct qpu_reg first_pack = src[0]; +for (int i = 0; i 4; i++) { +if (src[i].mux == dst.mux +src[i].addr == dst.addr) { +first_pack = dst; +break; +} +} +queue(c, qpu_m_MOV(dst, first_pack)); +*last_inst(c) |= QPU_PM; +*last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_, + QPU_PACK); + for (int i = 0; i 4; i++) { -queue(c, qpu_m_MOV(qpu_r3(), src[i])); +if (src[i].mux == first_pack.mux +src[i].addr == first_pack.addr) { +continue; +} + +queue(c, qpu_m_MOV(dst, src[i])); *last_inst(c) |= QPU_PM; *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A + i, QPU_PACK); } -queue(c, qpu_a_MOV(dst, qpu_r3())); - break; +} case QOP_FRAG_X: queue(c, qpu_a_ITOF(dst, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Interleave register allocation from regfile A and B.
Module: Mesa Branch: master Commit: 70dd3df344ddeb4b6d0f2e990dd1afaf4e46e39f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=70dd3df344ddeb4b6d0f2e990dd1afaf4e46e39f Author: Eric Anholt e...@anholt.net Date: Fri Dec 5 17:08:28 2014 -0800 vc4: Interleave register allocation from regfile A and B. The register allocator prefers low-index registers from vc4_regs[] in the configuration we're using, which is good because it means we prioritize allocating the accumulators (which are faster). On the other hand, it was causing raddr conflicts because everything beyond r0-r2 ended up in regfile A until you got massive register pressure. By interleaving, we end up getting more instruction pairing from getting non-conflicting raddrs and QPU_WSes. total instructions in shared programs: 55957 - 52719 (-5.79%) instructions in affected programs: 46855 - 43617 (-6.91%) --- src/gallium/drivers/vc4/vc4_register_allocate.c | 77 +++ 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_register_allocate.c b/src/gallium/drivers/vc4/vc4_register_allocate.c index 72f5271..b62669f 100644 --- a/src/gallium/drivers/vc4/vc4_register_allocate.c +++ b/src/gallium/drivers/vc4/vc4_register_allocate.c @@ -36,80 +36,79 @@ static const struct qpu_reg vc4_regs[] = { { QPU_MUX_R3, 0}, { QPU_MUX_R4, 0}, QPU_R(A, 0), -QPU_R(A, 1), -QPU_R(A, 2), -QPU_R(A, 3), -QPU_R(A, 4), -QPU_R(A, 5), -QPU_R(A, 6), -QPU_R(A, 7), -QPU_R(A, 8), -QPU_R(A, 9), -QPU_R(A, 10), -QPU_R(A, 11), -QPU_R(A, 12), -QPU_R(A, 13), -QPU_R(A, 14), -QPU_R(A, 15), -QPU_R(A, 16), -QPU_R(A, 17), -QPU_R(A, 18), -QPU_R(A, 19), -QPU_R(A, 20), -QPU_R(A, 21), -QPU_R(A, 22), -QPU_R(A, 23), -QPU_R(A, 24), -QPU_R(A, 25), -QPU_R(A, 26), -QPU_R(A, 27), -QPU_R(A, 28), -QPU_R(A, 29), -QPU_R(A, 30), -QPU_R(A, 31), QPU_R(B, 0), +QPU_R(A, 1), QPU_R(B, 1), +QPU_R(A, 2), QPU_R(B, 2), +QPU_R(A, 3), QPU_R(B, 3), +QPU_R(A, 4), QPU_R(B, 4), +QPU_R(A, 5), QPU_R(B, 5), +QPU_R(A, 6), QPU_R(B, 6), +QPU_R(A, 7), QPU_R(B, 7), +QPU_R(A, 8), QPU_R(B, 8), +QPU_R(A, 9), QPU_R(B, 9), +QPU_R(A, 10), QPU_R(B, 10), +QPU_R(A, 11), QPU_R(B, 11), +QPU_R(A, 12), QPU_R(B, 12), +QPU_R(A, 13), QPU_R(B, 13), +QPU_R(A, 14), QPU_R(B, 14), +QPU_R(A, 15), QPU_R(B, 15), +QPU_R(A, 16), QPU_R(B, 16), +QPU_R(A, 17), QPU_R(B, 17), +QPU_R(A, 18), QPU_R(B, 18), +QPU_R(A, 19), QPU_R(B, 19), +QPU_R(A, 20), QPU_R(B, 20), +QPU_R(A, 21), QPU_R(B, 21), +QPU_R(A, 22), QPU_R(B, 22), +QPU_R(A, 23), QPU_R(B, 23), +QPU_R(A, 24), QPU_R(B, 24), +QPU_R(A, 25), QPU_R(B, 25), +QPU_R(A, 26), QPU_R(B, 26), +QPU_R(A, 27), QPU_R(B, 27), +QPU_R(A, 28), QPU_R(B, 28), +QPU_R(A, 29), QPU_R(B, 29), +QPU_R(A, 30), QPU_R(B, 30), +QPU_R(A, 31), QPU_R(B, 31), }; #define ACC_INDEX 0 -#define A_INDEX (ACC_INDEX + 5) -#define B_INDEX (A_INDEX + 32) +#define AB_INDEX (ACC_INDEX + 5) static void vc4_alloc_reg_set(struct vc4_context *vc4) { -assert(vc4_regs[A_INDEX].addr == 0); -assert(vc4_regs[B_INDEX].addr == 0); -STATIC_ASSERT(ARRAY_SIZE(vc4_regs) == B_INDEX + 32); +assert(vc4_regs[AB_INDEX].addr == 0); +assert(vc4_regs[AB_INDEX + 1].addr == 0); +STATIC_ASSERT(ARRAY_SIZE(vc4_regs) == AB_INDEX + 64); if (vc4-regs) return; @@ -134,7 +133,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4) } vc4-reg_class_a = ra_alloc_reg_class(vc4-regs); -for (uint32_t i = A_INDEX; i A_INDEX + 32; i++) +for (uint32_t i = AB_INDEX; i AB_INDEX + 64; i += 2) ra_class_add_reg(vc4-regs, vc4-reg_class_a, i); ra_set_finalize(vc4-regs, NULL); @@ -191,13 +190,13 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c) case QOP_FRAG_Z: def[inst-dst.index] = 0; ra_set_node_reg(g, inst-dst.index, -B_INDEX + QPU_R_FRAG_PAYLOAD_ZW); +AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2 + 1); break; case QOP_FRAG_W: def[inst-dst.index] = 0
Mesa (master): vc4: Add support for GL 1.0 logic ops.
Module: Mesa Branch: master Commit: dfbf58c439870d46abcc8868b8ca145318aee125 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dfbf58c439870d46abcc8868b8ca145318aee125 Author: Eric Anholt e...@anholt.net Date: Mon Dec 8 12:40:58 2014 -0800 vc4: Add support for GL 1.0 logic ops. --- src/gallium/drivers/vc4/vc4_program.c | 62 +++-- 1 file changed, 60 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index d21522f..cea2a49 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -69,6 +69,7 @@ struct vc4_fs_key { bool point_coord_upper_left; bool light_twoside; uint8_t alpha_test_func; +uint8_t logicop_func; uint32_t point_sprite_mask; struct pipe_rt_blend_state blend; @@ -1629,6 +1630,46 @@ alpha_test_discard(struct vc4_compile *c) } } +static struct qreg +vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst) +{ +switch (c-fs_key-logicop_func) { +case PIPE_LOGICOP_CLEAR: +return qir_uniform_f(c, 0.0); +case PIPE_LOGICOP_NOR: +return qir_NOT(c, qir_OR(c, src, dst)); +case PIPE_LOGICOP_AND_INVERTED: +return qir_AND(c, qir_NOT(c, src), dst); +case PIPE_LOGICOP_COPY_INVERTED: +return qir_NOT(c, src); +case PIPE_LOGICOP_AND_REVERSE: +return qir_AND(c, src, qir_NOT(c, dst)); +case PIPE_LOGICOP_INVERT: +return qir_NOT(c, dst); +case PIPE_LOGICOP_XOR: +return qir_XOR(c, src, dst); +case PIPE_LOGICOP_NAND: +return qir_NOT(c, qir_AND(c, src, dst)); +case PIPE_LOGICOP_AND: +return qir_AND(c, src, dst); +case PIPE_LOGICOP_EQUIV: +return qir_NOT(c, qir_XOR(c, src, dst)); +case PIPE_LOGICOP_NOOP: +return dst; +case PIPE_LOGICOP_OR_INVERTED: +return qir_OR(c, qir_NOT(c, src), dst); +case PIPE_LOGICOP_OR_REVERSE: +return qir_OR(c, src, qir_NOT(c, dst)); +case PIPE_LOGICOP_OR: +return qir_OR(c, src, dst); +case PIPE_LOGICOP_SET: +return qir_uniform_ui(c, ~0); +case PIPE_LOGICOP_COPY: +default: +return src; +} +} + static void emit_frag_end(struct vc4_compile *c) { @@ -1640,8 +1681,11 @@ emit_frag_end(struct vc4_compile *c) struct qreg tlb_read_color[4] = { c-undef, c-undef, c-undef, c-undef }; struct qreg dst_color[4] = { c-undef, c-undef, c-undef, c-undef }; struct qreg linear_dst_color[4] = { c-undef, c-undef, c-undef, c-undef }; +struct qreg packed_dst_color = c-undef; + if (c-fs_key-blend.blend_enable || -c-fs_key-blend.colormask != 0xf) { +c-fs_key-blend.colormask != 0xf || +c-fs_key-logicop_func != PIPE_LOGICOP_COPY) { struct qreg r4 = qir_TLB_COLOR_READ(c); for (int i = 0; i 4; i++) tlb_read_color[i] = qir_R4_UNPACK(c, r4, i); @@ -1656,6 +1700,11 @@ emit_frag_end(struct vc4_compile *c) linear_dst_color[i] = dst_color[i]; } } + +/* Save the packed value for logic ops. Can't reuse r4 + * becuase other things might smash it (like sRGB) + */ +packed_dst_color = qir_MOV(c, r4); } struct qreg blend_color[4]; @@ -1748,6 +1797,11 @@ emit_frag_end(struct vc4_compile *c) packed_color = qir_uniform_ui(c, 0); } + +if (c-fs_key-logicop_func != PIPE_LOGICOP_COPY) { +packed_color = vc4_logicop(c, packed_color, packed_dst_color); +} + qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c-undef, packed_color, c-undef)); } @@ -2254,7 +2308,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) key-is_lines = (prim_mode = PIPE_PRIM_LINES prim_mode = PIPE_PRIM_LINE_STRIP); key-blend = vc4-blend-rt[0]; - +if (vc4-blend-logicop_enable) { +key-logicop_func = vc4-blend-logicop_func; +} else { +key-logicop_func = PIPE_LOGICOP_COPY; +} if (vc4-framebuffer.cbufs[0]) key-color_format = vc4-framebuffer.cbufs[0]-format; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix decision for whether the MIN operation writes to the B regfile.
Module: Mesa Branch: master Commit: 46741c1b872e47a76c152b0c36064f465da6e001 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=46741c1b872e47a76c152b0c36064f465da6e001 Author: Eric Anholt e...@anholt.net Date: Mon Dec 8 11:27:50 2014 -0800 vc4: Fix decision for whether the MIN operation writes to the B regfile. --- src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c |6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index e797c59..f5e152b 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -222,7 +222,7 @@ check_register_write(uint64_t inst, uint32_t waddr = (is_mul ? QPU_GET_FIELD(inst, QPU_WADDR_MUL) : QPU_GET_FIELD(inst, QPU_WADDR_ADD)); - bool is_b = is_mul != ((inst QPU_PM) != 0); + bool is_b = is_mul != ((inst QPU_WS) != 0); uint32_t live_reg_index; switch (waddr) { @@ -298,7 +298,7 @@ track_live_clamps(uint64_t inst, uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); - bool pm = inst QPU_PM; + bool is_b = inst QPU_WS; uint32_t live_reg_index; if (QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_MIN) @@ -309,7 +309,7 @@ track_live_clamps(uint64_t inst, return; } - live_reg_index = waddr_to_live_reg_index(waddr_add, pm); + live_reg_index = waddr_to_live_reg_index(waddr_add, is_b); if (live_reg_index != ~0) { validation_state-live_clamp_offsets[live_reg_index] = validated_shader-uniforms_size; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add support for TGSI_OPCODE_UCMP.
Module: Mesa Branch: master Commit: 5045d8ca421168a8ba2eb861298fc67be4f1eb03 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5045d8ca421168a8ba2eb861298fc67be4f1eb03 Author: Eric Anholt e...@anholt.net Date: Mon Dec 8 11:57:15 2014 -0800 vc4: Add support for TGSI_OPCODE_UCMP. This is being emitted now from st_glsl_to_tgsi.cpp. --- src/gallium/drivers/vc4/vc4_program.c | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 5fab1de..d21522f 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -569,6 +569,17 @@ tgsi_to_qir_cmp(struct vc4_compile *c, } static struct qreg +tgsi_to_qir_ucmp(struct vc4_compile *c, + struct tgsi_full_instruction *tgsi_inst, + enum qop op, struct qreg *src, int i) +{ +qir_SF(c, src[0 * 4 + i]); +return qir_SEL_X_Y_ZC(c, + src[1 * 4 + i], + src[2 * 4 + i]); +} + +static struct qreg tgsi_to_qir_mad(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) @@ -1292,6 +1303,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt }, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, +[TGSI_OPCODE_UCMP] = { 0, tgsi_to_qir_ucmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq }, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add a debug flag for waiting for sync on submit.
Module: Mesa Branch: master Commit: 34cf86bdc48e1b350437fe831fedd4632f4e06c2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=34cf86bdc48e1b350437fe831fedd4632f4e06c2 Author: Eric Anholt e...@anholt.net Date: Tue Dec 2 13:18:56 2014 -0800 vc4: Add a debug flag for waiting for sync on submit. This is nice when you're tracking down which command list is hanging the GPU. --- src/gallium/drivers/vc4/vc4_context.c |8 src/gallium/drivers/vc4/vc4_screen.c |2 ++ src/gallium/drivers/vc4/vc4_screen.h |1 + 3 files changed, 11 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index bb30c0e..b26c071 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -324,6 +324,14 @@ vc4_flush(struct pipe_context *pctx) vc4-last_emit_seqno = submit.seqno; +if (vc4_debug VC4_DEBUG_ALWAYS_SYNC) { +if (!vc4_wait_seqno(vc4-screen, vc4-last_emit_seqno, +PIPE_TIMEOUT_INFINITE)) { +fprintf(stderr, Wait failed.\n); +abort(); +} +} + vc4_reset_cl(vc4-bcl); vc4_reset_cl(vc4-rcl); vc4_reset_cl(vc4-shader_rec); diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index abd44ee..18451bd 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -52,6 +52,8 @@ static const struct debug_named_value debug_options[] = { Skip actual hardware execution of commands }, { always_flush, VC4_DEBUG_ALWAYS_FLUSH, Flush after each draw call }, +{ always_sync, VC4_DEBUG_ALWAYS_SYNC, + Wait for finish after each flush }, { NULL } }; diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h index ba07490..3819bda 100644 --- a/src/gallium/drivers/vc4/vc4_screen.h +++ b/src/gallium/drivers/vc4/vc4_screen.h @@ -37,6 +37,7 @@ struct vc4_bo; #define VC4_DEBUG_PERF 0x0020 #define VC4_DEBUG_NORAST0x0040 #define VC4_DEBUG_ALWAYS_FLUSH 0x0080 +#define VC4_DEBUG_ALWAYS_SYNC 0x0100 #define VC4_MAX_MIP_LEVELS 12 #define VC4_MAX_TEXTURE_SAMPLERS 16 ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix inverted priority of instructions for QPU scheduling.
Module: Mesa Branch: master Commit: 042962df2d058c4dd4e45b7deaa3b4519141758e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=042962df2d058c4dd4e45b7deaa3b4519141758e Author: Eric Anholt e...@anholt.net Date: Tue Dec 2 16:31:29 2014 -0800 vc4: Fix inverted priority of instructions for QPU scheduling. We were scheduling TLB operations as early as possible, and texture setup as late as possible. When I introduced prioritization, I visually inspected that an independent operation got moved above texture results collection, which tricked me into thinking it was working (but it was just because texture setup was being pushed late). total instructions in shared programs: 57651 - 57486 (-0.29%) instructions in affected programs: 18532 - 18367 (-0.89%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 20 ++-- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 8aa8374..2b0a632 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -439,24 +439,24 @@ get_instruction_priority(uint64_t inst) uint32_t baseline_score; uint32_t next_score = 0; -/* Schedule texture read setup early to hide their latency better. */ -if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) +/* Schedule TLB operations as late as possible, to get more + * parallelism between shaders. + */ +if (qpu_inst_is_tlb(inst)) return next_score; next_score++; -/* Default score for things that aren't otherwise special. */ -baseline_score = next_score; -next_score++; - /* Schedule texture read results collection late to hide latency. */ if (sig == QPU_SIG_LOAD_TMU0 || sig == QPU_SIG_LOAD_TMU1) return next_score; next_score++; -/* Schedule TLB operations as late as possible, to get more - * parallelism between shaders. - */ -if (qpu_inst_is_tlb(inst)) +/* Default score for things that aren't otherwise special. */ +baseline_score = next_score; +next_score++; + +/* Schedule texture read setup early to hide their latency better. */ +if (is_tmu_write(waddr_add) || is_tmu_write(waddr_mul)) return next_score; next_score++; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Refuse to merge two ops that both access shared functions.
Module: Mesa Branch: master Commit: bd4057a5d74fd1801c55ee98346af9c1095d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=bd4057a5d74fd1801c55ee98346af9c1095d Author: Eric Anholt e...@anholt.net Date: Tue Dec 2 16:23:40 2014 -0800 vc4: Refuse to merge two ops that both access shared functions. Avoids assertion failures in vc4_qpu_validate.c if we happen to find the right set of operations available. --- src/gallium/drivers/vc4/vc4_qpu.c | 53 src/gallium/drivers/vc4/vc4_qpu.h |1 + src/gallium/drivers/vc4/vc4_qpu_validate.c | 37 +-- 3 files changed, 55 insertions(+), 36 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 54c79e9..fc5d4b0 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -209,6 +209,56 @@ merge_fields(uint64_t *merge, return true; } +int +qpu_num_sf_accesses(uint64_t inst) +{ +int accesses = 0; +static const uint32_t specials[] = { +QPU_W_TLB_COLOR_MS, +QPU_W_TLB_COLOR_ALL, +QPU_W_TLB_Z, +QPU_W_TMU0_S, +QPU_W_TMU0_T, +QPU_W_TMU0_R, +QPU_W_TMU0_B, +QPU_W_TMU1_S, +QPU_W_TMU1_T, +QPU_W_TMU1_R, +QPU_W_TMU1_B, +QPU_W_SFU_RECIP, +QPU_W_SFU_RECIPSQRT, +QPU_W_SFU_EXP, +QPU_W_SFU_LOG, +}; +uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); +uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); +uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); +uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); + +for (int j = 0; j ARRAY_SIZE(specials); j++) { +if (waddr_add == specials[j]) +accesses++; +if (waddr_mul == specials[j]) +accesses++; +} + +if (raddr_a == QPU_R_MUTEX_ACQUIRE) +accesses++; +if (raddr_b == QPU_R_MUTEX_ACQUIRE) +accesses++; + +/* XXX: semaphore, combined color read/write? */ +switch (QPU_GET_FIELD(inst, QPU_SIG)) { +case QPU_SIG_COLOR_LOAD: +case QPU_SIG_COLOR_LOAD_END: +case QPU_SIG_LOAD_TMU0: +case QPU_SIG_LOAD_TMU1: +accesses++; +} + +return accesses; +} + uint64_t qpu_merge_inst(uint64_t a, uint64_t b) { @@ -223,6 +273,9 @@ qpu_merge_inst(uint64_t a, uint64_t b) QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) return 0; +if (qpu_num_sf_accesses(a) qpu_num_sf_accesses(b)) +return 0; + ok = ok merge_fields(merge, a, b, QPU_SIG_MASK, QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index eb06d1a..e1307eb 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -137,6 +137,7 @@ uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond); bool qpu_waddr_is_tlb(uint32_t waddr); bool qpu_inst_is_tlb(uint64_t inst); +int qpu_num_sf_accesses(uint64_t inst); void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst); static inline uint64_t diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c index 16de82a..ffd1b47 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_validate.c +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -255,42 +255,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) */ for (int i = 0; i num_inst - 1; i++) { uint64_t inst = insts[i]; -int accesses = 0; -static const uint32_t specials[] = { -QPU_W_TLB_COLOR_MS, -QPU_W_TLB_COLOR_ALL, -QPU_W_TLB_Z, -QPU_W_TMU0_S, -QPU_W_TMU0_T, -QPU_W_TMU0_R, -QPU_W_TMU0_B, -QPU_W_TMU1_S, -QPU_W_TMU1_T, -QPU_W_TMU1_R, -QPU_W_TMU1_B, -QPU_W_SFU_RECIP, -QPU_W_SFU_RECIPSQRT, -QPU_W_SFU_EXP, -QPU_W_SFU_LOG, -}; - -for (int j = 0; j ARRAY_SIZE(specials); j++) { -if (writes_reg(inst, specials[j])) -accesses++; -} - -if (reads_reg(inst, QPU_R_MUTEX_ACQUIRE)) -accesses++; - -/* XXX: semaphore, combined color read/write? */ -switch (QPU_GET_FIELD(inst, QPU_SIG
Mesa (master): vc4: Allow dead code elimination of color reads.
Module: Mesa Branch: master Commit: dadc32ac8072cf78b405d1b54414e1f020b0de41 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dadc32ac8072cf78b405d1b54414e1f020b0de41 Author: Eric Anholt e...@anholt.net Date: Tue Dec 2 12:58:27 2014 -0800 vc4: Allow dead code elimination of color reads. This might happen if the blending functions are set up to not actually use the destination color/alpha, for example. --- src/gallium/drivers/vc4/vc4_qir.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index cd731bc..c43b9b6 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -80,7 +80,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_TLB_STENCIL_SETUP] = { tlb_stencil_setup, 0, 1, true }, [QOP_TLB_Z_WRITE] = { tlb_z, 0, 1, true }, [QOP_TLB_COLOR_WRITE] = { tlb_color, 0, 1, true }, -[QOP_TLB_COLOR_READ] = { tlb_color_read, 1, 0, true }, +[QOP_TLB_COLOR_READ] = { tlb_color_read, 1, 0 }, [QOP_VARY_ADD_C] = { vary_add_c, 1, 1 }, [QOP_FRAG_X] = { frag_x, 1, 0 }, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add separate write-after-read dependency tracking for pairing.
Module: Mesa Branch: master Commit: 6f32deb538b1b62ff6d5d1212105bbe8d6adce72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f32deb538b1b62ff6d5d1212105bbe8d6adce72 Author: Eric Anholt e...@anholt.net Date: Tue Dec 2 15:42:58 2014 -0800 vc4: Add separate write-after-read dependency tracking for pairing. If an operation is the last one to read a register, the instruction containing it can also include the op that has the next write to that register. total instructions in shared programs: 57486 - 56995 (-0.85%) instructions in affected programs: 43004 - 42513 (-1.14%) --- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 78 +--- 1 file changed, 58 insertions(+), 20 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index 2b0a632..6bba66a 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c @@ -40,16 +40,23 @@ static bool debug; +struct schedule_node_child; + struct schedule_node { struct simple_node link; struct queued_qpu_inst *inst; -struct schedule_node **children; +struct schedule_node_child *children; uint32_t child_count; uint32_t child_array_size; uint32_t parent_count; uint32_t delay; }; +struct schedule_node_child { +struct schedule_node *node; +bool write_after_read; +}; + /* When walking the instructions in reverse, we need to swap before/after in * add_dep(). */ @@ -71,8 +78,11 @@ struct schedule_state { static void add_dep(struct schedule_state *state, struct schedule_node *before, -struct schedule_node *after) +struct schedule_node *after, +bool write) { +bool write_after_read = !write state-dir == R; + if (!before || !after) return; @@ -85,28 +95,40 @@ add_dep(struct schedule_state *state, } for (int i = 0; i before-child_count; i++) { -if (before-children[i] == after) +if (before-children[i].node == after +(before-children[i].write_after_read == write_after_read)) { return; +} } if (before-child_array_size = before-child_count) { before-child_array_size = MAX2(before-child_array_size * 2, 16); before-children = reralloc(before, before-children, -struct schedule_node *, +struct schedule_node_child, before-child_array_size); } -before-children[before-child_count] = after; +before-children[before-child_count].node = after; +before-children[before-child_count].write_after_read = +write_after_read; before-child_count++; after-parent_count++; } static void +add_read_dep(struct schedule_state *state, + struct schedule_node *before, + struct schedule_node *after) +{ +add_dep(state, before, after, false); +} + +static void add_write_dep(struct schedule_state *state, struct schedule_node **before, struct schedule_node *after) { -add_dep(state, *before, after); +add_dep(state, *before, after, true); *before = after; } @@ -152,9 +174,9 @@ process_raddr_deps(struct schedule_state *state, struct schedule_node *n, default: if (raddr 32) { if (is_a) -add_dep(state, state-last_ra[raddr], n); +add_read_dep(state, state-last_ra[raddr], n); else -add_dep(state, state-last_rb[raddr], n); +add_read_dep(state, state-last_rb[raddr], n); } else { fprintf(stderr, unknown raddr %d\n, raddr); abort(); @@ -186,7 +208,7 @@ process_mux_deps(struct schedule_state *state, struct schedule_node *n, uint32_t mux) { if (mux != QPU_MUX_A mux != QPU_MUX_B) -add_dep(state, state-last_r[mux], n); +add_read_dep(state, state-last_r[mux], n); } @@ -278,7 +300,7 @@ process_cond_deps(struct schedule_state *state, struct schedule_node *n, case QPU_COND_ALWAYS: break; default: -add_dep(state, state-last_sf, n); +add_read_dep(state, state-last_sf, n); break; } } @@ -339,7 +361,7 @@ calculate_deps(struct schedule_state *state, struct schedule_node *n) break; case QPU_SIG_COLOR_LOAD: -add_dep(state, state-last_tlb, n); +add_read_dep(state, state-last_tlb, n
Mesa (master): vc4: Try swapping the regfile A to B to pair instructions.
Module: Mesa Branch: master Commit: befdff81423a1b6a05969dfde59bfa9c521c4621 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=befdff81423a1b6a05969dfde59bfa9c521c4621 Author: Eric Anholt e...@anholt.net Date: Fri Dec 5 13:23:17 2014 -0800 vc4: Try swapping the regfile A to B to pair instructions. total instructions in shared programs: 56995 - 56087 (-1.59%) instructions in affected programs: 40503 - 39595 (-2.24%) --- src/gallium/drivers/vc4/vc4_qpu.c | 64 +++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 0e38890..6daa072 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -290,6 +290,55 @@ qpu_waddr_ignores_pm(uint32_t waddr) return false; } +static void +swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift) +{ +uint64_t mux_mask = (uint64_t)0x7 mux_shift; +uint64_t mux_a_val = (uint64_t)QPU_MUX_A mux_shift; +uint64_t mux_b_val = (uint64_t)QPU_MUX_B mux_shift; + +if ((*a mux_mask) == mux_a_val) { +*a = (*a ~mux_mask) | mux_b_val; +*merge = (*merge ~mux_mask) | mux_b_val; +} +} + +static bool +try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b) +{ +uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A); +uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B); +uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A); +uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B); + +if (raddr_a_b != QPU_R_NOP) +return false; + +switch (raddr_a_a) { +case QPU_R_UNIF: +case QPU_R_VARY: +break; +default: +return false; +} + +if (raddr_b_b != QPU_R_NOP +raddr_b_b != raddr_a_a) +return false; + +/* Move raddr A to B in instruction a. */ +*a = (*a ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A); +*a = (*a ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B); +*merge = ((*merge ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(raddr_b_a, QPU_RADDR_A)); +*merge = ((*merge ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B)); +swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT); +swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT); +swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT); +swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT); + +return true; +} + uint64_t qpu_merge_inst(uint64_t a, uint64_t b) { @@ -314,8 +363,19 @@ qpu_merge_inst(uint64_t a, uint64_t b) ok = ok merge_fields(merge, a, b, QPU_SF | QPU_PM, ~0); -ok = ok merge_fields(merge, a, b, QPU_RADDR_A_MASK, -QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A)); +if (!merge_fields(merge, a, b, QPU_RADDR_A_MASK, + QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) { +/* Since we tend to use regfile A by default both for register + * allocation and for our special values (uniforms and + * varyings), try swapping uniforms and varyings to regfile B + * to resolve raddr A conflicts. + */ +if (!try_swap_ra_file(merge, a, b) +!try_swap_ra_file(merge, b, a)) { +return 0; +} +} + ok = ok merge_fields(merge, a, b, QPU_RADDR_B_MASK, QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Allow pairing of some instructions that disagree about the WS bit.
Module: Mesa Branch: master Commit: 7d8b79f398f18ed7bb48a74b1b82950e2f08abad URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7d8b79f398f18ed7bb48a74b1b82950e2f08abad Author: Eric Anholt e...@anholt.net Date: Fri Dec 5 12:34:30 2014 -0800 vc4: Allow pairing of some instructions that disagree about the WS bit. No difference on shader-db because we tend to have a lot of other conflicts going on as well (like RADDR_A disagreements) --- src/gallium/drivers/vc4/vc4_qpu.c | 48 - 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index fc5d4b0..0e38890 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -259,6 +259,37 @@ qpu_num_sf_accesses(uint64_t inst) return accesses; } +static bool +qpu_waddr_ignores_pm(uint32_t waddr) +{ +switch(waddr) { +case QPU_W_ACC0: +case QPU_W_ACC1: +case QPU_W_ACC2: +case QPU_W_ACC3: +case QPU_W_TLB_Z: +case QPU_W_TLB_COLOR_MS: +case QPU_W_TLB_COLOR_ALL: +case QPU_W_TLB_ALPHA_MASK: +case QPU_W_VPM: +case QPU_W_SFU_RECIP: +case QPU_W_SFU_RECIPSQRT: +case QPU_W_SFU_EXP: +case QPU_W_SFU_LOG: +case QPU_W_TMU0_S: +case QPU_W_TMU0_T: +case QPU_W_TMU0_R: +case QPU_W_TMU0_B: +case QPU_W_TMU1_S: +case QPU_W_TMU1_T: +case QPU_W_TMU1_R: +case QPU_W_TMU1_B: +return true; +} + +return false; +} + uint64_t qpu_merge_inst(uint64_t a, uint64_t b) { @@ -280,7 +311,7 @@ qpu_merge_inst(uint64_t a, uint64_t b) QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); /* Misc fields that have to match exactly. */ -ok = ok merge_fields(merge, a, b, QPU_SF | QPU_WS | QPU_PM, +ok = ok merge_fields(merge, a, b, QPU_SF | QPU_PM, ~0); ok = ok merge_fields(merge, a, b, QPU_RADDR_A_MASK, @@ -293,6 +324,21 @@ qpu_merge_inst(uint64_t a, uint64_t b) ok = ok merge_fields(merge, a, b, QPU_WADDR_MUL_MASK, QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); +/* Allow disagreement on WS (swapping A vs B physical reg file as the + * destination for ADD/MUL) if one of the original instructions + * ignores it (probably because it's just writing to accumulators). + */ +if (qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_ADD)) +qpu_waddr_ignores_pm(QPU_GET_FIELD(a, QPU_WADDR_MUL))) { +merge = (merge ~QPU_WS) | (b QPU_WS); +} else if (qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_ADD)) + qpu_waddr_ignores_pm(QPU_GET_FIELD(b, QPU_WADDR_MUL))) { +merge = (merge ~QPU_WS) | (a QPU_WS); +} else { +if ((a QPU_WS) != (b QPU_WS)) +return 0; +} + if (ok) return merge; else ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix assertion about SFU versus texturing.
Module: Mesa Branch: master Commit: a7b1a93137f4441822760f1cfe261065474163ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a7b1a93137f4441822760f1cfe261065474163ab Author: Eric Anholt e...@anholt.net Date: Sun Nov 30 13:25:16 2014 -0800 vc4: Fix assertion about SFU versus texturing. We're supposed to be checking that nothing else writes r4, which is done by the TMU result collection signal, not the coordinate setup. Avoids a regression when QPU instruction scheduling is introduced. --- src/gallium/drivers/vc4/vc4_qpu_validate.c |7 --- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c index 8fe5f41..16de82a 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_validate.c +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -202,12 +202,13 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) int last_sfu_inst = -10; for (int i = 0; i num_inst - 1; i++) { uint64_t inst = insts[i]; +uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); assert(i - last_sfu_inst 2 || (!writes_sfu(inst) -!writes_reg(inst, QPU_W_TMU0_S) -!writes_reg(inst, QPU_W_TMU1_S) -QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD)); +sig != QPU_SIG_LOAD_TMU0 +sig != QPU_SIG_LOAD_TMU1 +sig != QPU_SIG_COLOR_LOAD)); if (writes_sfu(inst)) last_sfu_inst = i; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Drop the explicit scoreboard wait.
Module: Mesa Branch: master Commit: 6958c404caf3f4b2219ef686e2beeeaf48664905 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6958c404caf3f4b2219ef686e2beeeaf48664905 Author: Eric Anholt e...@anholt.net Date: Wed Nov 26 12:51:27 2014 -0800 vc4: Drop the explicit scoreboard wait. This is actually implicitly handled by the TLB operations. --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 23 +++ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 3d7bdb4..e6e97cc 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -130,7 +130,6 @@ static void serialize_insts(struct vc4_compile *c) { int last_sfu_write = -10; -bool scoreboard_wait_emitted = false; while (!is_empty_list(c-qpu_inst_list)) { struct queued_qpu_inst *q = @@ -210,22 +209,18 @@ serialize_insts(struct vc4_compile *c) * explicit Wait for Scoreboard signal or an implicit wait * with the first tile-buffer read or write instruction. */ -if (!scoreboard_wait_emitted -(waddr_a == QPU_W_TLB_Z || waddr_m == QPU_W_TLB_Z || - waddr_a == QPU_W_TLB_COLOR_MS || - waddr_m == QPU_W_TLB_COLOR_MS || - waddr_a == QPU_W_TLB_COLOR_ALL || - waddr_m == QPU_W_TLB_COLOR_ALL || - QPU_GET_FIELD(q-inst, QPU_SIG) == QPU_SIG_COLOR_LOAD)) { +if (waddr_a == QPU_W_TLB_Z || +waddr_m == QPU_W_TLB_Z || +waddr_a == QPU_W_TLB_COLOR_MS || +waddr_m == QPU_W_TLB_COLOR_MS || +waddr_a == QPU_W_TLB_COLOR_ALL || +waddr_m == QPU_W_TLB_COLOR_ALL || +QPU_GET_FIELD(q-inst, QPU_SIG) == QPU_SIG_COLOR_LOAD) { while (c-qpu_inst_count 3 || QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1], QPU_SIG) != QPU_SIG_NONE) { serialize_one_inst(c, qpu_NOP()); } -c-qpu_insts[c-qpu_inst_count - 1] = -qpu_set_sig(c-qpu_insts[c-qpu_inst_count - 1], -QPU_SIG_WAIT_FOR_SCOREBOARD); -scoreboard_wait_emitted = true; } serialize_one_inst(c, q-inst); @@ -616,6 +611,10 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) serialize_one_inst(c, qpu_NOP()); } +/* thread end can't have TLB operations */ +if (qpu_inst_is_tlb(c-qpu_insts[c-qpu_inst_count - 1])) +serialize_one_inst(c, qpu_NOP()); + c-qpu_insts[c-qpu_inst_count - 1] = qpu_set_sig(c-qpu_insts[c-qpu_inst_count - 1], QPU_SIG_PROG_END); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Also deal with VPM reads at thread end.
Module: Mesa Branch: master Commit: 334036fb640741e51ecc54b823866710e99c853d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=334036fb640741e51ecc54b823866710e99c853d Author: Eric Anholt e...@anholt.net Date: Sun Nov 30 15:13:40 2014 -0800 vc4: Also deal with VPM reads at thread end. Prevents a regression with QPU scheduling, which happens to put the no-op reads for unused VPM contents end up at the end of the program. --- src/gallium/drivers/vc4/vc4_qpu_emit.c |8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 1d12d11..3d7bdb4 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -596,11 +596,15 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) serialize_insts(c); -/* thread end can't have VPM write */ +/* thread end can't have VPM write or read */ if (QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1], QPU_WADDR_ADD) == QPU_W_VPM || QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1], - QPU_WADDR_MUL) == QPU_W_VPM) { + QPU_WADDR_MUL) == QPU_W_VPM || +QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1], + QPU_RADDR_A) == QPU_R_VPM || +QPU_GET_FIELD(c-qpu_insts[c-qpu_inst_count - 1], + QPU_RADDR_B) == QPU_R_VPM) { serialize_one_inst(c, qpu_NOP()); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add another check for invalid TLB scoreboard handling.
Module: Mesa Branch: master Commit: 2d5784c8254b4a0e3e04dd0f1e46ab1eb85612dd URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2d5784c8254b4a0e3e04dd0f1e46ab1eb85612dd Author: Eric Anholt e...@anholt.net Date: Wed Nov 26 17:01:59 2014 -0800 vc4: Add another check for invalid TLB scoreboard handling. This was caught by an assertion in the simulator. --- src/gallium/drivers/vc4/vc4_qpu.c | 23 +++ src/gallium/drivers/vc4/vc4_qpu.h |3 +++ src/gallium/drivers/vc4/vc4_qpu_validate.c | 21 + 3 files changed, 39 insertions(+), 8 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index a551a0f..093ca07 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -244,3 +244,26 @@ qpu_set_cond_mul(uint64_t inst, uint32_t sig) return (inst ~QPU_COND_MUL_MASK) | QPU_SET_FIELD(sig, QPU_COND_MUL); } +bool +qpu_waddr_is_tlb(uint32_t waddr) +{ +switch (waddr) { +case QPU_W_TLB_COLOR_ALL: +case QPU_W_TLB_COLOR_MS: +case QPU_W_TLB_Z: +return true; +default: +return false; +} +} + +bool +qpu_inst_is_tlb(uint64_t inst) +{ +uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); + +return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) || +qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) || +sig == QPU_SIG_COLOR_LOAD || +sig == QPU_SIG_WAIT_FOR_SCOREBOARD); +} diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index 2f37108..5f4caab 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -133,6 +133,9 @@ uint64_t qpu_set_sig(uint64_t inst, uint32_t sig); uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond); uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond); +bool qpu_waddr_is_tlb(uint32_t waddr); +bool qpu_inst_is_tlb(uint64_t inst); + static inline uint64_t qpu_load_imm_f(struct qpu_reg dst, float val) { diff --git a/src/gallium/drivers/vc4/vc4_qpu_validate.c b/src/gallium/drivers/vc4/vc4_qpu_validate.c index d043733..8fe5f41 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_validate.c +++ b/src/gallium/drivers/vc4/vc4_qpu_validate.c @@ -91,11 +91,17 @@ writes_sfu(uint64_t inst) void vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) { +bool scoreboard_locked = false; + for (int i = 0; i num_inst; i++) { uint64_t inst = insts[i]; -if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) +if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) { +if (qpu_inst_is_tlb(inst)) +scoreboard_locked = true; + continue; +} /* The Thread End instruction must not write to either physical * regfile A or B. @@ -103,6 +109,11 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) = 32); assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) = 32); +/* Can't trigger an implicit wait on scoreboard in the program + * end instruction. + */ +assert(!qpu_inst_is_tlb(inst) || scoreboard_locked); + /* Two delay slots will be executed. */ assert(i + 2 = num_inst); @@ -141,13 +152,7 @@ vc4_qpu_validate(uint64_t *insts, uint32_t num_inst) for (int i = 0; i 2; i++) { uint64_t inst = insts[i]; -assert(QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_COLOR_LOAD); -assert(QPU_GET_FIELD(inst, QPU_SIG) != - QPU_SIG_WAIT_FOR_SCOREBOARD); -assert(!writes_reg(inst, QPU_W_TLB_COLOR_MS)); -assert(!writes_reg(inst, QPU_W_TLB_COLOR_ALL)); -assert(!writes_reg(inst, QPU_W_TLB_Z)); - +assert(!qpu_inst_is_tlb(inst)); } /* If TMU_NOSWAP is written, the write must be three instructions ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Introduce scheduling of QPU instructions.
Module: Mesa Branch: master Commit: 3fe4d8e1e39b47c9c5c4bfdd87300abd0c336a7e URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=3fe4d8e1e39b47c9c5c4bfdd87300abd0c336a7e Author: Eric Anholt e...@anholt.net Date: Wed Nov 26 12:44:19 2014 -0800 vc4: Introduce scheduling of QPU instructions. This doesn't reschedule much currently, just tries to fit things into the regfile A/B write-versus-read slots (the cause of the improvements in shader-db), and hide texture fetch latency by scheduling setup early and results collection late (haven't performance tested it). This infrastructure will be important for doing instruction pairing, though. shader-db2 results: total instructions in shared programs: 61874 - 59583 (-3.70%) instructions in affected programs: 50677 - 48386 (-4.52%) --- src/gallium/drivers/vc4/Makefile.sources |1 + src/gallium/drivers/vc4/vc4_qir.h |7 + src/gallium/drivers/vc4/vc4_qpu.c | 12 + src/gallium/drivers/vc4/vc4_qpu.h |3 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 132 +- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 693 6 files changed, 722 insertions(+), 126 deletions(-) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 6ec48ab..6bcb731 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -24,6 +24,7 @@ C_SOURCES := \ vc4_qpu_disasm.c \ vc4_qpu_emit.c \ vc4_qpu.h \ + vc4_qpu_schedule.c \ vc4_qpu_validate.c \ vc4_query.c \ vc4_register_allocate.c \ diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index cb02db5..0b76a2f 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -148,6 +148,11 @@ struct simple_node { struct simple_node *prev; }; +struct queued_qpu_inst { +struct simple_node link; +uint64_t inst; +}; + struct qinst { struct simple_node link; @@ -368,6 +373,8 @@ bool qir_opt_copy_propagation(struct vc4_compile *c); bool qir_opt_cse(struct vc4_compile *c); bool qir_opt_dead_code(struct vc4_compile *c); +void qpu_schedule_instructions(struct vc4_compile *c); + #define QIR_ALU0(name) \ static inline struct qreg\ qir_##name(struct vc4_compile *c)\ diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 093ca07..723b361 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -22,6 +22,7 @@ */ #include stdbool.h +#include vc4_qir.h #include vc4_qpu.h static uint64_t @@ -267,3 +268,14 @@ qpu_inst_is_tlb(uint64_t inst) sig == QPU_SIG_COLOR_LOAD || sig == QPU_SIG_WAIT_FOR_SCOREBOARD); } + +void +qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst) +{ +if (c-qpu_inst_count = c-qpu_inst_size) { +c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2); +c-qpu_insts = realloc(c-qpu_insts, + c-qpu_inst_size * sizeof(uint64_t)); +} +c-qpu_insts[c-qpu_inst_count++] = inst; +} diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index 5f4caab..bf41f72 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -30,6 +30,8 @@ #include vc4_qpu_defines.h +struct vc4_compile; + struct qpu_reg { enum qpu_mux mux; uint8_t addr; @@ -135,6 +137,7 @@ uint64_t qpu_set_cond_mul(uint64_t inst, uint32_t cond); bool qpu_waddr_is_tlb(uint32_t waddr); bool qpu_inst_is_tlb(uint64_t inst); +void qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst); static inline uint64_t qpu_load_imm_f(struct qpu_reg dst, float val) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index e6e97cc..3cb709f 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -41,11 +41,6 @@ vc4_dump_program(struct vc4_compile *c) } } -struct queued_qpu_inst { -struct simple_node link; -uint64_t inst; -}; - static void queue(struct vc4_compile *c, uint64_t inst) { @@ -115,121 +110,6 @@ fixup_raddr_conflict(struct vc4_compile *c, *src1 = qpu_r3(); } -static void -serialize_one_inst(struct vc4_compile *c, uint64_t inst) -{ -if (c-qpu_inst_count = c-qpu_inst_size) { -c-qpu_inst_size = MAX2(16, c-qpu_inst_size * 2); -c-qpu_insts = realloc(c-qpu_insts, - c-qpu_inst_size * sizeof(uint64_t)); -} -c-qpu_insts[c-qpu_inst_count++] = inst; -} - -static void -serialize_insts(struct vc4_compile *c) -{ -int last_sfu_write
Mesa (master): vc4: Pair up QPU instructions when scheduling.
Module: Mesa Branch: master Commit: 29c7cf2b2ba850cf467167548d53383e1338fd5c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=29c7cf2b2ba850cf467167548d53383e1338fd5c Author: Eric Anholt e...@anholt.net Date: Mon Dec 1 11:48:20 2014 -0800 vc4: Pair up QPU instructions when scheduling. We've got two mostly-independent operations in each QPU instruction, so try to pack two operations together. This is fairly naive (doesn't track read and write separately in instructions, doesn't convert ADD-based MOVs into MUL-based movs, doesn't reorder across uniform loads), but does show a decent improvement on shader-db-2. total instructions in shared programs: 59583 - 57651 (-3.24%) instructions in affected programs: 47361 - 45429 (-4.08%) --- src/gallium/drivers/vc4/vc4_qpu.c | 62 +++--- src/gallium/drivers/vc4/vc4_qpu.h |2 +- src/gallium/drivers/vc4/vc4_qpu_schedule.c | 79 ++-- 3 files changed, 105 insertions(+), 38 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu.c b/src/gallium/drivers/vc4/vc4_qpu.c index 723b361..54c79e9 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.c +++ b/src/gallium/drivers/vc4/vc4_qpu.c @@ -192,36 +192,58 @@ qpu_m_alu2(enum qpu_op_mul op, return inst; } -static uint64_t -merge_fields(uint64_t merge, - uint64_t add, uint64_t mul, +static bool +merge_fields(uint64_t *merge, + uint64_t a, uint64_t b, uint64_t mask, uint64_t ignore) { -if ((add mask) == ignore) -return (merge ~mask) | (mul mask); -else if ((mul mask) == ignore) -return (merge ~mask) | (add mask); -else { -assert((add mask) == (mul mask)); -return merge; +if ((a mask) == ignore) { +*merge = (*merge ~mask) | (b mask); +} else if ((b mask) == ignore) { +*merge = (*merge ~mask) | (a mask); +} else { +if ((a mask) != (b mask)) +return false; } + +return true; } uint64_t -qpu_inst(uint64_t add, uint64_t mul) +qpu_merge_inst(uint64_t a, uint64_t b) { -uint64_t merge = ((add ~QPU_WADDR_MUL_MASK) | - (mul ~QPU_WADDR_ADD_MASK)); +uint64_t merge = a | b; +bool ok = true; + +if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP +QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) +return 0; -merge = merge_fields(merge, add, mul, QPU_SIG_MASK, - QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); +if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP +QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP) +return 0; -merge = merge_fields(merge, add, mul, QPU_RADDR_A_MASK, - QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A)); -merge = merge_fields(merge, add, mul, QPU_RADDR_B_MASK, - QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); +ok = ok merge_fields(merge, a, b, QPU_SIG_MASK, +QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG)); -return merge; +/* Misc fields that have to match exactly. */ +ok = ok merge_fields(merge, a, b, QPU_SF | QPU_WS | QPU_PM, +~0); + +ok = ok merge_fields(merge, a, b, QPU_RADDR_A_MASK, +QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A)); +ok = ok merge_fields(merge, a, b, QPU_RADDR_B_MASK, +QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B)); + +ok = ok merge_fields(merge, a, b, QPU_WADDR_ADD_MASK, +QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD)); +ok = ok merge_fields(merge, a, b, QPU_WADDR_MUL_MASK, +QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL)); + +if (ok) +return merge; +else +return 0; } uint64_t diff --git a/src/gallium/drivers/vc4/vc4_qpu.h b/src/gallium/drivers/vc4/vc4_qpu.h index bf41f72..eb06d1a 100644 --- a/src/gallium/drivers/vc4/vc4_qpu.h +++ b/src/gallium/drivers/vc4/vc4_qpu.h @@ -129,7 +129,7 @@ uint64_t qpu_a_alu2(enum qpu_op_add op, struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1); uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1); -uint64_t qpu_inst(uint64_t add, uint64_t mul); +uint64_t qpu_merge_inst(uint64_t a, uint64_t b); uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val); uint64_t qpu_set_sig(uint64_t inst, uint32_t sig); uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond); diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c index f309034..8aa8374 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c +++ b/src/gallium/drivers/vc4
Mesa (master): vc4: Fix some inconsistent indentation.
Module: Mesa Branch: master Commit: 52a7cb2ec465497bf6a4e20558340b45fbe7d7c3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52a7cb2ec465497bf6a4e20558340b45fbe7d7c3 Author: Eric Anholt e...@anholt.net Date: Thu Nov 20 19:44:15 2014 -0800 vc4: Fix some inconsistent indentation. --- src/gallium/drivers/vc4/vc4_screen.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index 96536ac..abd44ee 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -284,8 +284,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, return 8; else return 16; - case PIPE_SHADER_CAP_MAX_OUTPUTS: - return shader == PIPE_SHADER_FRAGMENT ? 1 : 8; +case PIPE_SHADER_CAP_MAX_OUTPUTS: +return shader == PIPE_SHADER_FRAGMENT ? 1 : 8; case PIPE_SHADER_CAP_MAX_TEMPS: return 256; /* GL_MAX_PROGRAM_TEMPORARIES_ARB */ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: @@ -386,12 +386,12 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen, retval |= PIPE_BIND_TRANSFER_WRITE; #if 0 - if (retval != usage) { - fprintf(stderr, +if (retval != usage) { +fprintf(stderr, not supported: format=%s, target=%d, sample_count=%d, usage=0x%x, retval=0x%x\n, util_format_name(format), target, sample_count, usage, retval); - } +} #endif return retval == usage; @@ -416,7 +416,7 @@ vc4_screen_create(int fd) vc4_fence_init(screen); - vc4_debug = debug_get_option_vc4_debug(); +vc4_debug = debug_get_option_vc4_debug(); if (vc4_debug VC4_DEBUG_SHADERDB) vc4_debug |= VC4_DEBUG_NORAST; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add a note about a piece of errata I've learned about.
Module: Mesa Branch: master Commit: fa74ec7e98edb185806967fb17dd2d885727aa95 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fa74ec7e98edb185806967fb17dd2d885727aa95 Author: Eric Anholt e...@anholt.net Date: Thu Nov 20 19:41:26 2014 -0800 vc4: Add a note about a piece of errata I've learned about. Right now in my environment I've only got a small CMA area, so this constraint ends up holding. --- src/gallium/drivers/vc4/vc4_draw.c |4 1 file changed, 4 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index e70506b..2d82462 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -48,6 +48,10 @@ vc4_start_draw(struct vc4_context *vc4) * BO allocations align to that anyway), then for some reason the * simulator wants an extra page available, even if you have overflow * memory set up. + * + * XXX: The binner only does 28-bit addressing math, so the tile alloc + * and tile state should be in the same BO and that BO needs to not + * cross a 256MB boundary, somehow. */ uint32_t tile_alloc_size = 32 * tilew * tileh; tile_alloc_size = align(tile_alloc_size, 4096); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don't forget to actually connect the fence code.
Module: Mesa Branch: master Commit: 6f4adb7483c86eabfb1b89f19f6b48b4413a79d1 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6f4adb7483c86eabfb1b89f19f6b48b4413a79d1 Author: Eric Anholt e...@anholt.net Date: Thu Nov 20 19:43:07 2014 -0800 vc4: Don't forget to actually connect the fence code. I thought I'd tested this. --- src/gallium/drivers/vc4/vc4_screen.c |2 ++ 1 file changed, 2 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c index c18760c..96536ac 100644 --- a/src/gallium/drivers/vc4/vc4_screen.c +++ b/src/gallium/drivers/vc4/vc4_screen.c @@ -414,6 +414,8 @@ vc4_screen_create(int fd) screen-fd = fd; +vc4_fence_init(screen); + vc4_debug = debug_get_option_vc4_debug(); if (vc4_debug VC4_DEBUG_SHADERDB) vc4_debug |= VC4_DEBUG_NORAST; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): r600: Drop the /* gap */ notes.
Module: Mesa Branch: master Commit: 2f01cc8417243884ac4767b0e53a5df20db2dc96 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2f01cc8417243884ac4767b0e53a5df20db2dc96 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 13:24:59 2014 -0800 r600: Drop the /* gap */ notes. These are obviously the gaps already, due to the bare numbers with unsupported implementations. This makes inserting new gaps less irritating. --- src/gallium/drivers/r600/r600_shader.c | 19 --- 1 file changed, 19 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index a772dee..29d27ce 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -7210,7 +7210,6 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ {22,0, ALU_OP0_NOP, tgsi_unsupported}, {23,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, @@ -7221,7 +7220,6 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, - /* gap */ {32,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -7278,7 +7276,6 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2_trans}, - /* gap */ {88,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_AND, 0, ALU_OP2_AND_INT, tgsi_op2}, {TGSI_OPCODE_OR,0, ALU_OP2_OR_INT, tgsi_op2}, @@ -7295,7 +7292,6 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_ENDLOOP, 0, ALU_OP0_NOP, tgsi_endloop}, {TGSI_OPCODE_ENDSUB,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_TXQ_LZ,0, FETCH_OP_GET_TEXTURE_RESINFO, tgsi_tex}, - /* gap */ {104, 0, ALU_OP0_NOP, tgsi_unsupported}, {105, 0, ALU_OP0_NOP, tgsi_unsupported}, {106, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -7306,12 +7302,10 @@ static struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[] = { {TGSI_OPCODE_FSNE, 0, ALU_OP2_SETNE_DX10, tgsi_op2_swap}, {TGSI_OPCODE_NRM4, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_CALLNZ,0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ {114, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_BREAKC,0, ALU_OP0_NOP, tgsi_loop_breakc}, {TGSI_OPCODE_KILL_IF, 0, ALU_OP2_KILLGT, tgsi_kill}, /* conditional kill */ {TGSI_OPCODE_END, 0, ALU_OP0_NOP, tgsi_end}, /* aka HALT */ - /* gap */ {118, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_F2I, 0, ALU_OP1_FLT_TO_INT, tgsi_op2_trans}, {TGSI_OPCODE_IDIV, 0, ALU_OP0_NOP, tgsi_idiv}, @@ -7415,7 +7409,6 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_CND, 0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_SQRT, 0, ALU_OP1_SQRT_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_DP2A, 0, ALU_OP0_NOP, tgsi_unsupported}, - /* gap */ {22,0, ALU_OP0_NOP, tgsi_unsupported}, {23,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_FRC, 0, ALU_OP1_FRACT, tgsi_op2}, @@ -7426,7 +7419,6 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_LG2, 0, ALU_OP1_LOG_IEEE, tgsi_trans_srcx_replicate}, {TGSI_OPCODE_POW, 0, ALU_OP0_NOP, tgsi_pow}, {TGSI_OPCODE_XPD, 0, ALU_OP0_NOP, tgsi_xpd}, - /* gap */ {32,0, ALU_OP0_NOP, tgsi_unsupported}, {TGSI_OPCODE_ABS, 0, ALU_OP1_MOV, tgsi_op2}, {TGSI_OPCODE_RCC, 0, ALU_OP0_NOP, tgsi_unsupported}, @@ -7483,7 +7475,6 @@ static struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] = { {TGSI_OPCODE_NOT, 0, ALU_OP1_NOT_INT, tgsi_op2}, {TGSI_OPCODE_TRUNC, 0, ALU_OP1_TRUNC, tgsi_op2}, {TGSI_OPCODE_SHL, 0, ALU_OP2_LSHL_INT, tgsi_op2
Mesa (master): r300: Drop the /* gap */ notes.
Module: Mesa Branch: master Commit: 386c3fcb14354b131cd51b902d3aac7c15169658 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=386c3fcb14354b131cd51b902d3aac7c15169658 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 13:28:07 2014 -0800 r300: Drop the /* gap */ notes. This switch statement's code structure isn't dependent on the numbers of the opcodes at all. --- src/gallium/drivers/r300/r300_tgsi_to_rc.c |3 --- 1 file changed, 3 deletions(-) diff --git a/src/gallium/drivers/r300/r300_tgsi_to_rc.c b/src/gallium/drivers/r300/r300_tgsi_to_rc.c index 4448f88..7ea9cd2 100644 --- a/src/gallium/drivers/r300/r300_tgsi_to_rc.c +++ b/src/gallium/drivers/r300/r300_tgsi_to_rc.c @@ -53,7 +53,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_LRP: return RC_OPCODE_LRP; case TGSI_OPCODE_CND: return RC_OPCODE_CND; /* case TGSI_OPCODE_DP2A: return RC_OPCODE_DP2A; */ -/* gap */ case TGSI_OPCODE_FRC: return RC_OPCODE_FRC; case TGSI_OPCODE_CLAMP: return RC_OPCODE_CLAMP; case TGSI_OPCODE_FLR: return RC_OPCODE_FLR; @@ -62,7 +61,6 @@ static unsigned translate_opcode(unsigned opcode) case TGSI_OPCODE_LG2: return RC_OPCODE_LG2; case TGSI_OPCODE_POW: return RC_OPCODE_POW; case TGSI_OPCODE_XPD: return RC_OPCODE_XPD; -/* gap */ case TGSI_OPCODE_ABS: return RC_OPCODE_ABS; /* case TGSI_OPCODE_RCC: return RC_OPCODE_RCC; */ case TGSI_OPCODE_DPH: return RC_OPCODE_DPH; @@ -132,7 +130,6 @@ static unsigned translate_opcode(unsigned opcode) /* case TGSI_OPCODE_ENDLOOP2: return RC_OPCODE_ENDLOOP2; */ /* case TGSI_OPCODE_ENDSUB: return RC_OPCODE_ENDSUB; */ case TGSI_OPCODE_NOP: return RC_OPCODE_NOP; -/* gap */ /* case TGSI_OPCODE_NRM4: return RC_OPCODE_NRM4; */ /* case TGSI_OPCODE_CALLNZ: return RC_OPCODE_CALLNZ; */ /* case TGSI_OPCODE_BREAKC: return RC_OPCODE_BREAKC; */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): nine: Don't reference the dead TGSI_OPCODE_NRM.
Module: Mesa Branch: master Commit: 56fd7c63617ae9ec0f795c9685a89f54073cb0ea URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=56fd7c63617ae9ec0f795c9685a89f54073cb0ea Author: Jose Fonseca jfons...@vmware.com Date: Thu Nov 20 14:21:04 2014 -0800 nine: Don't reference the dead TGSI_OPCODE_NRM. The translation is lowering it to not using TGSI_OPCODE_NRM, anyway. v2: Extracted from a larger patch by Jose that also dropped DP2A usage. Reviewed-by: Jose Fonseca jfons...@vmware.com Reviewed-by: Axel Davy axel.d...@ens.fr Reviewed-by: David Heidelberg da...@ixit.cz --- src/gallium/state_trackers/nine/nine_shader.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 9b324c3..85cc190 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -2316,7 +2316,7 @@ struct sm1_op_info inst_table[] = _OPI(CRS, XPD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* XXX: .w */ _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ _OPI(ABS, ABS, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), -_OPI(NRM, NRM, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ +_OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ _OPI(SINCOS, SCS, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), _OPI(SINCOS, SCS, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: Drop unused X2D opcode.
Module: Mesa Branch: master Commit: 8c822b1e912de9af6bb16a052bf9f188c2f4a33f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c822b1e912de9af6bb16a052bf9f188c2f4a33f Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:30:03 2014 -0800 gallium: Drop unused X2D opcode. Nothing in the tree generates it. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 - src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |6 --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 45 --- src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 - src/gallium/docs/source/tgsi.rst| 16 src/gallium/drivers/ilo/shader/toy_tgsi.c |2 - src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +-- src/gallium/include/pipe/p_shader_tokens.h |1 - 10 files changed, 4 insertions(+), 77 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 44a44a6..c5d3679 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -211,7 +211,6 @@ lp_build_tgsi_inst_llvm( case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: - case TGSI_OPCODE_X2D: case TGSI_OPCODE_BRA: case TGSI_OPCODE_PUSHA: case TGSI_OPCODE_POPA: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index ed1798d..d1425bb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -792,12 +792,6 @@ lp_emit_instruction_aos( return FALSE; break; - case TGSI_OPCODE_X2D: - /* deprecated? */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ARR: src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(bld-bld_base.base, src0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 578d4d8..b191b5c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2784,47 +2784,6 @@ exec_scs(struct tgsi_exec_machine *mach, } static void -exec_x2d(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[4]; - union tgsi_exec_channel d[2]; - - fetch_source(mach, r[0], inst-Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - fetch_source(mach, r[1], inst-Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_XZ) { - fetch_source(mach, r[2], inst-Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[2], r[2], r[0]); - fetch_source(mach, r[3], inst-Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[3], r[3], r[1]); - micro_add(r[2], r[2], r[3]); - fetch_source(mach, r[3], inst-Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_add(d[0], r[2], r[3]); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_YW) { - fetch_source(mach, r[2], inst-Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[2], r[2], r[0]); - fetch_source(mach, r[3], inst-Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[3], r[3], r[1]); - micro_add(r[2], r[2], r[3]); - fetch_source(mach, r[3], inst-Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_add(d[1], r[2], r[3]); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_X) { - store_dest(mach, d[0], inst-Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_Y) { - store_dest(mach, d[1], inst-Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_Z) { - store_dest(mach, d[0], inst-Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_W) { - store_dest(mach, d[1], inst-Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } -} - -static void exec_rfl(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -3908,10 +3867,6 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_X2D: - exec_x2d(mach, inst); - break; - case TGSI_OPCODE_ARR: exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index b94f5ac..94de670 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -96,7 +96,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, COMP, UP2US, TGSI_OPCODE_UP2US
Mesa (master): gallium: Drop the NRM and NRM4 opcodes.
Module: Mesa Branch: master Commit: d4864cdf15ccd30f0e82d07fd0e9db8a0c115cda URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d4864cdf15ccd30f0e82d07fd0e9db8a0c115cda Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 13:13:59 2014 -0800 gallium: Drop the NRM and NRM4 opcodes. They weren't generated in tree, and as far as I know all hardware had to lower it to a DP, RSQ, MUL. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |5 -- src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 95 --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 72 - src/gallium/auxiliary/tgsi/tgsi_info.c |4 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|2 - src/gallium/docs/source/tgsi.rst| 34 src/gallium/drivers/ilo/shader/toy_tgsi.c | 89 - src/gallium/drivers/r300/r300_tgsi_to_rc.c |2 - src/gallium/drivers/r600/r600_shader.c | 12 +-- src/gallium/drivers/svga/svga_tgsi_insn.c | 38 - src/gallium/include/pipe/p_shader_tokens.h |4 +- 11 files changed, 10 insertions(+), 347 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index f2fc7b0..7829a7e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -852,11 +852,6 @@ lp_emit_instruction_aos( dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_LOD_BIAS); break; - case TGSI_OPCODE_NRM: - /* fall-through */ - case TGSI_OPCODE_NRM4: - return FALSE; - case TGSI_OPCODE_DIV: assert(0); return FALSE; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index 05618bc..76b9d69 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -3507,99 +3507,6 @@ cont_emit( lp_exec_continue(bld-exec_mask); } -/* XXX: Refactor and move it to lp_bld_tgsi_action.c - * - * XXX: What do the comments about xmm registers mean? Maybe they are left over - * from old code, but there is no garauntee that LLVM will use those registers - * for this code. - * - * XXX: There should be no calls to lp_build_emit_fetch in this function. This - * should be handled by the emit_data-fetch_args function. */ -static void -nrm_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef tmp0, tmp1; - LLVMValueRef tmp4 = NULL; - LLVMValueRef tmp5 = NULL; - LLVMValueRef tmp6 = NULL; - LLVMValueRef tmp7 = NULL; - struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); - - uint dims = (emit_data-inst-Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; - - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X) || - TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Y) || - TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Z) || - (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_W) dims == 4)) { - - /* NOTE: Cannot use xmm regs 2/3 here (see emit_rsqrt() above). */ - - /* xmm4 = src.x */ - /* xmm0 = src.x * src.x */ - tmp0 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, TGSI_CHAN_X); - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X)) { - tmp4 = tmp0; - } - tmp0 = lp_build_mul( bld-bld_base.base, tmp0, tmp0); - - /* xmm5 = src.y */ - /* xmm0 = xmm0 + src.y * src.y */ - tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, TGSI_CHAN_Y); - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Y)) { - tmp5 = tmp1; - } - tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1); - tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1); - - /* xmm6 = src.z */ - /* xmm0 = xmm0 + src.z * src.z */ - tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, TGSI_CHAN_Z); - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_Z)) { - tmp6 = tmp1; - } - tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1); - tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1); - - if (dims == 4) { - /* xmm7 = src.w */ - /* xmm0 = xmm0 + src.w * src.w */ - tmp1 = lp_build_emit_fetch(bld-bld_base, emit_data-inst, 0, TGSI_CHAN_W); - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_W)) { -tmp7 = tmp1; - } - tmp1 = lp_build_mul( bld-bld_base.base, tmp1, tmp1); - tmp0 = lp_build_add( bld-bld_base.base, tmp0, tmp1); - } - /* xmm1 = 1 / sqrt(xmm0) */ - tmp1 = lp_build_rsqrt( bld-bld_base.base, tmp0); - /* dst.x = xmm1 * src.x */ - if (TGSI_IS_DST0_CHANNEL_ENABLED(emit_data-inst, TGSI_CHAN_X
Mesa (master): ilo: Drop the explicit intialization of gaps in TGSI opcodes.
Module: Mesa Branch: master Commit: 7361d5ba63dda35683569e76caa33f886304958f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7361d5ba63dda35683569e76caa33f886304958f Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 13:40:50 2014 -0800 ilo: Drop the explicit intialization of gaps in TGSI opcodes. The nice thing about the good way of initializing arrays like this is that you don't need to initialize everything in order, or even everything at all. Taking advantage of that only needs a tiny fixup to deal with the default NULL value of the pointers. I haven't dropped the initialization of opcodes that exist and are unsupported. --- src/gallium/drivers/ilo/shader/toy_tgsi.c | 28 ++-- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index 7c74bad..1ba0606 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -853,8 +853,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_CND] = aos_CND, [TGSI_OPCODE_SQRT] = aos_simple, [TGSI_OPCODE_DP2A] = aos_DP2A, - [22] = aos_unsupported, - [23] = aos_unsupported, [TGSI_OPCODE_FRC] = aos_simple, [TGSI_OPCODE_CLAMP]= aos_CLAMP, [TGSI_OPCODE_FLR] = aos_simple, @@ -863,7 +861,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_LG2] = aos_simple, [TGSI_OPCODE_POW] = aos_simple, [TGSI_OPCODE_XPD] = aos_XPD, - [32] = aos_unsupported, [TGSI_OPCODE_ABS] = aos_simple, [TGSI_OPCODE_RCC] = aos_unsupported, [TGSI_OPCODE_DPH] = aos_simple, @@ -907,11 +904,8 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_BRK] = aos_BRK, [TGSI_OPCODE_IF] = aos_simple, [TGSI_OPCODE_UIF] = aos_simple, - [76] = aos_unsupported, [TGSI_OPCODE_ELSE] = aos_simple, [TGSI_OPCODE_ENDIF]= aos_simple, - [79] = aos_unsupported, - [80] = aos_unsupported, [TGSI_OPCODE_PUSHA]= aos_unsupported, [TGSI_OPCODE_POPA] = aos_unsupported, [TGSI_OPCODE_CEIL] = aos_CEIL, @@ -919,7 +913,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_NOT] = aos_simple, [TGSI_OPCODE_TRUNC]= aos_simple, [TGSI_OPCODE_SHL] = aos_simple, - [88] = aos_unsupported, [TGSI_OPCODE_AND] = aos_simple, [TGSI_OPCODE_OR] = aos_simple, [TGSI_OPCODE_MOD] = aos_simple, @@ -935,9 +928,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_ENDLOOP] = aos_ENDLOOP, [TGSI_OPCODE_ENDSUB] = aos_unsupported, [TGSI_OPCODE_TXQ_LZ] = aos_tex, - [104] = aos_unsupported, - [105] = aos_unsupported, - [106] = aos_unsupported, [TGSI_OPCODE_NOP] = aos_simple, [TGSI_OPCODE_FSEQ] = aos_set_on_cond, [TGSI_OPCODE_FSGE] = aos_set_on_cond, @@ -948,7 +938,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_BREAKC] = aos_unsupported, [TGSI_OPCODE_KILL_IF] = aos_simple, [TGSI_OPCODE_END] = aos_simple, - [118] = aos_unsupported, [TGSI_OPCODE_F2I] = aos_simple, [TGSI_OPCODE_IDIV] = aos_simple, [TGSI_OPCODE_IMAX] = aos_simple, @@ -1469,8 +1458,6 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_CND] = soa_per_channel, [TGSI_OPCODE_SQRT] = soa_scalar_replicate, [TGSI_OPCODE_DP2A] = soa_dot_product, - [22] = soa_unsupported, - [23] = soa_unsupported, [TGSI_OPCODE_FRC] = soa_per_channel, [TGSI_OPCODE_CLAMP]= soa_per_channel, [TGSI_OPCODE_FLR] = soa_per_channel, @@ -1479,7 +1466,6 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_LG2] = soa_scalar_replicate, [TGSI_OPCODE_POW] = soa_scalar_replicate, [TGSI_OPCODE_XPD] = soa_XPD, - [32] = soa_unsupported, [TGSI_OPCODE_ABS] = soa_per_channel, [TGSI_OPCODE_RCC] = soa_unsupported, [TGSI_OPCODE_DPH] = soa_dot_product, @@ -1523,11 +1509,8 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_BRK] = soa_passthrough, [TGSI_OPCODE_IF
Mesa (master): mesa: Drop unused SFL/STR opcodes.
Module: Mesa Branch: master Commit: 868f95f1da74cf6dd7468cba1b56664aad585ccb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=868f95f1da74cf6dd7468cba1b56664aad585ccb Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 16:35:05 2014 -0800 mesa: Drop unused SFL/STR opcodes. They're part of NV_vertex_program2, which I'm pretty sure we're never going to support. Reviewed-by: Jose Fonseca jfons...@vmware.com Reviewed-by: Ian Roamnick ian.d.roman...@intel.com --- src/mesa/program/prog_execute.c | 12 src/mesa/program/prog_instruction.c |2 -- src/mesa/program/prog_instruction.h |2 -- 3 files changed, 16 deletions(-) diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index fcc9ed5..e59ae70 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -1279,12 +1279,6 @@ _mesa_execute_program(struct gl_context * ctx, } } break; - case OPCODE_SFL: /* set false, operands ignored */ - { -static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; -store_vector4(inst, machine, result); - } - break; case OPCODE_SGE: /* set on greater or equal */ { GLfloat a[4], b[4], result[4]; @@ -1395,12 +1389,6 @@ _mesa_execute_program(struct gl_context * ctx, store_vector4(inst, machine, result); } break; - case OPCODE_STR: /* set true, operands ignored */ - { -static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F }; -store_vector4(inst, machine, result); - } - break; case OPCODE_SUB: { GLfloat a[4], b[4], result[4]; diff --git a/src/mesa/program/prog_instruction.c b/src/mesa/program/prog_instruction.c index e2eadc3..abe663d 100644 --- a/src/mesa/program/prog_instruction.c +++ b/src/mesa/program/prog_instruction.c @@ -202,7 +202,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_RSQ,RSQ, 1, 1 }, { OPCODE_SCS,SCS, 1, 1 }, { OPCODE_SEQ,SEQ, 2, 1 }, - { OPCODE_SFL,SFL, 0, 1 }, { OPCODE_SGE,SGE, 2, 1 }, { OPCODE_SGT,SGT, 2, 1 }, { OPCODE_SIN,SIN, 1, 1 }, @@ -210,7 +209,6 @@ static const struct instruction_info InstInfo[MAX_OPCODE] = { { OPCODE_SLT,SLT, 2, 1 }, { OPCODE_SNE,SNE, 2, 1 }, { OPCODE_SSG,SSG, 1, 1 }, - { OPCODE_STR,STR, 0, 1 }, { OPCODE_SUB,SUB, 2, 1 }, { OPCODE_SWZ,SWZ, 1, 1 }, { OPCODE_TEX,TEX, 1, 1 }, diff --git a/src/mesa/program/prog_instruction.h b/src/mesa/program/prog_instruction.h index b9604e5..4cca975 100644 --- a/src/mesa/program/prog_instruction.h +++ b/src/mesa/program/prog_instruction.h @@ -198,7 +198,6 @@ typedef enum prog_opcode { OPCODE_RSQ, /* XX X X X */ OPCODE_SCS, /*X X */ OPCODE_SEQ, /*2 X X */ - OPCODE_SFL, /*2 X */ OPCODE_SGE, /* XX X X X */ OPCODE_SGT, /*2 X X */ OPCODE_SIN, /*X 2 X X */ @@ -206,7 +205,6 @@ typedef enum prog_opcode { OPCODE_SLT, /* XX X X X */ OPCODE_SNE, /*2 X X */ OPCODE_SSG, /*2 X */ - OPCODE_STR, /*2 X */ OPCODE_SUB, /* XX 1.1 X X */ OPCODE_SWZ, /* XX X */ OPCODE_TEX, /*X 3 X X */ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): mesa: Drop unused NV_fragment_program opcodes.
Module: Mesa Branch: master Commit: a3688d686f147f4252d19b298ae26d4ac72c2e08 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a3688d686f147f4252d19b298ae26d4ac72c2e08 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 16:39:49 2014 -0800 mesa: Drop unused NV_fragment_program opcodes. The extension itself was deleted 2 years ago. There are still some prog_instruction opcodes from NV_fp that exist because they're used by ir_to_mesa.cpp, though. Reviewed-by: Jose Fonseca jfons...@vmware.com Reviewed-by: Ian Roamnick ian.d.roman...@intel.com --- src/mesa/program/prog_execute.c | 144 --- src/mesa/program/prog_instruction.c | 10 --- src/mesa/program/prog_instruction.h | 10 --- src/mesa/program/program_lexer.l| 13 4 files changed, 177 deletions(-) diff --git a/src/mesa/program/prog_execute.c b/src/mesa/program/prog_execute.c index e59ae70..650c40f 100644 --- a/src/mesa/program/prog_execute.c +++ b/src/mesa/program/prog_execute.c @@ -1119,77 +1119,6 @@ _mesa_execute_program(struct gl_context * ctx, break; case OPCODE_NOP: break; - case OPCODE_PK2H:/* pack two 16-bit floats in one 32-bit float */ - { -GLfloat a[4]; -GLuint result[4]; -GLhalfNV hx, hy; -fetch_vector4(inst-SrcReg[0], machine, a); -hx = _mesa_float_to_half(a[0]); -hy = _mesa_float_to_half(a[1]); -result[0] = -result[1] = -result[2] = -result[3] = hx | (hy 16); -store_vector4ui(inst, machine, result); - } - break; - case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */ - { -GLfloat a[4]; -GLuint result[4], usx, usy; -fetch_vector4(inst-SrcReg[0], machine, a); -a[0] = CLAMP(a[0], 0.0F, 1.0F); -a[1] = CLAMP(a[1], 0.0F, 1.0F); -usx = F_TO_I(a[0] * 65535.0F); -usy = F_TO_I(a[1] * 65535.0F); -result[0] = -result[1] = -result[2] = -result[3] = usx | (usy 16); -store_vector4ui(inst, machine, result); - } - break; - case OPCODE_PK4B:/* pack four GLbytes into one 32-bit float */ - { -GLfloat a[4]; -GLuint result[4], ubx, uby, ubz, ubw; -fetch_vector4(inst-SrcReg[0], machine, a); -a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F); -a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F); -a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F); -a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F); -ubx = F_TO_I(127.0F * a[0] + 128.0F); -uby = F_TO_I(127.0F * a[1] + 128.0F); -ubz = F_TO_I(127.0F * a[2] + 128.0F); -ubw = F_TO_I(127.0F * a[3] + 128.0F); -result[0] = -result[1] = -result[2] = -result[3] = ubx | (uby 8) | (ubz 16) | (ubw 24); -store_vector4ui(inst, machine, result); - } - break; - case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */ - { -GLfloat a[4]; -GLuint result[4], ubx, uby, ubz, ubw; -fetch_vector4(inst-SrcReg[0], machine, a); -a[0] = CLAMP(a[0], 0.0F, 1.0F); -a[1] = CLAMP(a[1], 0.0F, 1.0F); -a[2] = CLAMP(a[2], 0.0F, 1.0F); -a[3] = CLAMP(a[3], 0.0F, 1.0F); -ubx = F_TO_I(255.0F * a[0]); -uby = F_TO_I(255.0F * a[1]); -ubz = F_TO_I(255.0F * a[2]); -ubw = F_TO_I(255.0F * a[3]); -result[0] = -result[1] = -result[2] = -result[3] = ubx | (uby 8) | (ubz 16) | (ubw 24); -store_vector4ui(inst, machine, result); - } - break; case OPCODE_POW: { GLfloat a[4], b[4], result[4]; @@ -1224,20 +1153,6 @@ _mesa_execute_program(struct gl_context * ctx, pc = machine-CallStack[--machine-StackDepth] - 1; } break; - case OPCODE_RFL: /* reflection vector */ - { -GLfloat axis[4], dir[4], result[4], tmpX, tmpW; -fetch_vector4(inst-SrcReg[0], machine, axis); -fetch_vector4(inst-SrcReg[1], machine, dir); -tmpW = DOT3(axis, axis); -tmpX = (2.0F * DOT3(axis, dir)) / tmpW; -result[0] = tmpX * axis[0] - dir[0]; -result[1] = tmpX * axis[1] - dir[1]; -result[2] = tmpX * axis[2] - dir[2]; -/* result[3] is never written! XXX enforce in parser! */ -store_vector4(inst, machine, result); - } - break; case OPCODE_RSQ: /* 1 / sqrt() */ { GLfloat a[4], result[4]; @@ -1562,52 +1477,6 @@ _mesa_execute_program(struct gl_context * ctx
Mesa (master): gallium: Drop the unused RCC opcode.
Module: Mesa Branch: master Commit: de2f8d75db3dad3089c96b65223e47ad3986a25c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=de2f8d75db3dad3089c96b65223e47ad3986a25c Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 13:27:49 2014 -0800 gallium: Drop the unused RCC opcode. Nothing in the tree generated it. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 - src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |5 - src/gallium/auxiliary/tgsi/tgsi_exec.c | 20 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 - src/gallium/auxiliary/tgsi/tgsi_util.c |1 - src/gallium/docs/source/tgsi.rst| 11 --- src/gallium/drivers/ilo/shader/toy_tgsi.c |2 -- src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +++--- src/gallium/include/pipe/p_shader_tokens.h |2 +- 11 files changed, 5 insertions(+), 47 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 51cb54c..4a9ce37 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -207,7 +207,6 @@ lp_build_tgsi_inst_llvm( /* Ignore deprecated instructions */ switch (inst-Instruction.Opcode) { - case TGSI_OPCODE_RCC: case TGSI_OPCODE_UP2H: case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 7829a7e..3b9833a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -679,11 +679,6 @@ lp_emit_instruction_aos( case TGSI_OPCODE_XPD: return FALSE; - case TGSI_OPCODE_RCC: - /* deprecated? */ - assert(0); - return FALSE; - case TGSI_OPCODE_DPH: return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b9a4c7b..b3ea82f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -911,22 +911,6 @@ micro_div( } static void -micro_rcc(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ - uint i; - - for (i = 0; i 4; i++) { - float recip = 1.0f / src-f[i]; - - if (recip 0.0f) - dst-f[i] = CLAMP(recip, 5.42101e-020f, 1.84467e+019f); - else - dst-f[i] = CLAMP(recip, -1.84467e+019f, -5.42101e-020f); - } -} - -static void micro_lt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src0, @@ -3799,10 +3783,6 @@ exec_instruction( exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_RCC: - exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_DPH: exec_dph(mach, inst); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 6336304..d17426f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -71,7 +71,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, 0, 0, 0, 0, COMP, XPD, TGSI_OPCODE_XPD }, { 0, 0, 0, 0, 0, 0, NONE, , 32 }, /* removed */ { 1, 1, 0, 0, 0, 0, COMP, ABS, TGSI_OPCODE_ABS }, - { 1, 1, 0, 0, 0, 0, REPL, RCC, TGSI_OPCODE_RCC }, + { 0, 0, 0, 0, 0, 0, NONE, , 34 }, /* removed */ { 1, 2, 0, 0, 0, 0, REPL, DPH, TGSI_OPCODE_DPH }, { 1, 1, 0, 0, 0, 0, REPL, COS, TGSI_OPCODE_COS }, { 1, 1, 0, 0, 0, 0, COMP, DDX, TGSI_OPCODE_DDX }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index 56a7a97..33cf38b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -87,7 +87,6 @@ OP11(LG2) OP12(POW) OP12(XPD) OP11(ABS) -OP11(RCC) OP12(DPH) OP11(COS) OP11(DDX) diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index e1cba95..66cb167 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -253,7 +253,6 @@ tgsi_util_get_inst_usage_mask(const struct tgsi_full_instruction *inst, case TGSI_OPCODE_EX2: case TGSI_OPCODE_LG2: - case TGSI_OPCODE_RCC: read_mask = TGSI_WRITEMASK_X; break; diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 49de4ca..c912ec5 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -404,17 +404,6 @@ This instruction replicates its result. dst.w = |src.w| -.. opcode:: RCC - Reciprocal Clamped - -This instruction
Mesa (master): nine: Drop use of TGSI_OPCODE_CND.
Module: Mesa Branch: master Commit: 925cb75f894c312478e3e2f4124d2913c4d969ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=925cb75f894c312478e3e2f4124d2913c4d969ab Author: Jose Fonseca jfons...@vmware.com Date: Thu Nov 20 14:25:30 2014 -0800 nine: Drop use of TGSI_OPCODE_CND. This was the only state tracker emitting it, and hardware was just having to lower it anyway (or failing to lower it at all). v2: Extracted from a larger patch by Jose (which also dropped DP2A), fixed to actually not reference TGSI_OPCODE_CND. Change by anholt. Reviewed-by: Jose Fonseca jfons...@vmware.com Reviewed-by: Axel Davy axel.d...@ens.fr Reviewed-by: David Heidelberg da...@ixit.cz --- src/gallium/state_trackers/nine/nine_shader.c | 10 +- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index 85cc190..268612e 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -1374,7 +1374,6 @@ DECL_SPECIAL(CND) } cnd = tx_src_param(tx, tx-insn.src[0]); -#ifdef NINE_TGSI_LAZY_R600 cgt = tx_scratch(tx); if (tx-version.major == 1 tx-version.minor 4) { @@ -1387,13 +1386,6 @@ DECL_SPECIAL(CND) ureg_CMP(tx-ureg, dst, tx_src_param(tx, tx-insn.src[1]), tx_src_param(tx, tx-insn.src[2]), ureg_negate(cnd)); -#else -if (tx-version.major == 1 tx-version.minor 4) -cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); -ureg_CND(tx-ureg, dst, - tx_src_param(tx, tx-insn.src[1]), - tx_src_param(tx, tx-insn.src[2]), cnd); -#endif return D3D_OK; } @@ -2356,7 +2348,7 @@ struct sm1_op_info inst_table[] = _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), -_OPI(CND, CND, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), +_OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: Drop the unused SFL/STR opcodes.
Module: Mesa Branch: master Commit: ecfe9e2ad2b5f178ef09420f8d95d49937137cd9 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ecfe9e2ad2b5f178ef09420f8d95d49937137cd9 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:36:19 2014 -0800 gallium: Drop the unused SFL/STR opcodes. Nothing generated them. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 24 -- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|8 src/gallium/auxiliary/tgsi/tgsi_exec.c | 47 src/gallium/auxiliary/tgsi/tgsi_info.c |4 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |2 - src/gallium/docs/source/tgsi.rst | 13 -- src/gallium/drivers/ilo/shader/toy_tgsi.c | 26 --- .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |6 --- src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c |6 --- src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c |6 --- src/gallium/drivers/r300/r300_tgsi_to_rc.c |2 - src/gallium/drivers/r600/r600_shader.c | 12 ++--- src/gallium/include/pipe/p_shader_tokens.h |4 +- 13 files changed, 10 insertions(+), 150 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index 722aa9a..d0f1a7c 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -691,28 +691,6 @@ const struct lp_build_tgsi_action scs_action = { scs_emit /* emit */ }; -/* TGSI_OPCODE_SFL */ - -static void -sfl_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data-output[emit_data-chan] = bld_base-base.zero; -} - -/* TGSI_OPCODE_STR */ - -static void -str_emit( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - emit_data-output[emit_data-chan] = bld_base-base.one; -} - /* TGSI_OPCODE_SUB */ static void sub_emit( @@ -958,8 +936,6 @@ lp_set_default_actions(struct lp_build_tgsi_context * bld_base) bld_base-op_actions[TGSI_OPCODE_MUL].emit = mul_emit; bld_base-op_actions[TGSI_OPCODE_DIV].emit = fdiv_emit; bld_base-op_actions[TGSI_OPCODE_RCP].emit = rcp_emit; - bld_base-op_actions[TGSI_OPCODE_SFL].emit = sfl_emit; - bld_base-op_actions[TGSI_OPCODE_STR].emit = str_emit; bld_base-op_actions[TGSI_OPCODE_SUB].emit = sub_emit; bld_base-op_actions[TGSI_OPCODE_UARL].emit = mov_emit; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 49ad3b6..aacbeff 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -722,10 +722,6 @@ lp_emit_instruction_aos( dst0 = lp_build_select(bld-bld_base.base, tmp0, bld-bld_base.base.one, bld-bld_base.base.zero); break; - case TGSI_OPCODE_SFL: - dst0 = bld-bld_base.base.zero; - break; - case TGSI_OPCODE_SGT: src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL); @@ -753,10 +749,6 @@ lp_emit_instruction_aos( dst0 = lp_build_select(bld-bld_base.base, tmp0, bld-bld_base.base.one, bld-bld_base.base.zero); break; - case TGSI_OPCODE_STR: - dst0 = bld-bld_base.base.one; - break; - case TGSI_OPCODE_TEX: dst0 = emit_tex(bld, inst, LP_BLD_TEX_MODIFIER_NONE); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ab13c13..03cb277 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -452,24 +452,6 @@ micro_sne(union tgsi_exec_channel *dst, } static void -micro_sfl(union tgsi_exec_channel *dst) -{ - dst-f[0] = 0.0f; - dst-f[1] = 0.0f; - dst-f[2] = 0.0f; - dst-f[3] = 0.0f; -} - -static void -micro_str(union tgsi_exec_channel *dst) -{ - dst-f[0] = 1.0f; - dst-f[1] = 1.0f; - dst-f[2] = 1.0f; - dst-f[3] = 1.0f; -} - -static void micro_trunc(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -2446,27 +2428,6 @@ exec_declaration(struct tgsi_exec_machine *mach, } } - -typedef void (* micro_op)(union tgsi_exec_channel *dst); - -static void -exec_vector(struct tgsi_exec_machine *mach, -const struct tgsi_full_instruction *inst, -micro_op op, -enum tgsi_exec_datatype dst_datatype) -{ - unsigned int chan; - - for (chan = 0; chan TGSI_NUM_CHANNELS; chan++) { - if (inst-Dst[0].Register.WriteMask (1 chan)) { - union tgsi_exec_channel dst; - - op(dst); - store_dest(mach, dst, inst-Dst[0], inst, chan, dst_datatype
Mesa (master): nine: Don' t use the otherwise-dead SFL opcode in an unreachable path.
Module: Mesa Branch: master Commit: 7c0acd8535b36077b832e78f1f3a53176e453d74 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c0acd8535b36077b832e78f1f3a53176e453d74 Author: Eric Anholt e...@anholt.net Date: Thu Nov 20 14:17:07 2014 -0800 nine: Don't use the otherwise-dead SFL opcode in an unreachable path. Reviewed-by: Jose Fonseca jfons...@vmware.com Reviewed-by: Axel Davy axel.d...@ens.fr Reviewed-by: David Heidelberg da...@ixit.cz --- src/gallium/state_trackers/nine/nine_shader.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/state_trackers/nine/nine_shader.c b/src/gallium/state_trackers/nine/nine_shader.c index cc027b4..9b324c3 100644 --- a/src/gallium/state_trackers/nine/nine_shader.c +++ b/src/gallium/state_trackers/nine/nine_shader.c @@ -1615,7 +1615,7 @@ sm1_insn_flags_to_tgsi_setop(BYTE flags) case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; default: assert(!invalid comparison flags); -return TGSI_OPCODE_SFL; +return TGSI_OPCODE_SGT; } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): gallium: Drop unused BRA opcode.
Module: Mesa Branch: master Commit: 00f7002c5c45887b204a3f14b8e3b32472cc39bb URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=00f7002c5c45887b204a3f14b8e3b32472cc39bb Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:51:22 2014 -0800 gallium: Drop unused BRA opcode. Never generated, and implemented in only nvfx vertprog. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 - src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |6 -- src/gallium/auxiliary/tgsi/tgsi_exec.c |4 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 - src/gallium/docs/source/tgsi.rst |9 - src/gallium/drivers/ilo/shader/toy_tgsi.c|2 -- src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c |7 --- src/gallium/drivers/nouveau/nv30/nvfx_vertprog.c |1 - src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +++--- src/gallium/include/pipe/p_shader_tokens.h |2 +- 12 files changed, 5 insertions(+), 37 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index c5d3679..e391d8a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -211,7 +211,6 @@ lp_build_tgsi_inst_llvm( case TGSI_OPCODE_UP2US: case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: - case TGSI_OPCODE_BRA: case TGSI_OPCODE_PUSHA: case TGSI_OPCODE_POPA: case TGSI_OPCODE_SAD: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index aacbeff..9e468f9 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -786,12 +786,6 @@ lp_emit_instruction_aos( dst0 = lp_build_round(bld-bld_base.base, src0); break; - case TGSI_OPCODE_BRA: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_CAL: return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 03cb277..ec1374a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3768,10 +3768,6 @@ exec_instruction( exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_BRA: - assert (0); - break; - case TGSI_OPCODE_CAL: /* skip the call if no execution channels are enabled */ if (mach-ExecMask) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index e546816..2ec2853 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -99,7 +99,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, 0, 1, NONE, , 59 }, /* removed */ { 0, 1, 0, 0, 0, 1, NONE, , 60 }, /* removed */ { 1, 1, 0, 0, 0, 0, COMP, ARR, TGSI_OPCODE_ARR }, - { 0, 1, 0, 0, 0, 0, NONE, BRA, TGSI_OPCODE_BRA }, + { 0, 1, 0, 0, 0, 1, NONE, , 62 }, /* removed */ { 0, 0, 0, 1, 0, 0, NONE, CAL, TGSI_OPCODE_CAL }, { 0, 0, 0, 0, 0, 0, NONE, RET, TGSI_OPCODE_RET }, { 1, 1, 0, 0, 0, 0, COMP, SSG, TGSI_OPCODE_SSG }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index bfa78fc..b8bdba9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -109,7 +109,6 @@ OP11(UP2US) OP11(UP4B) OP11(UP4UB) OP11(ARR) -OP01(BRA) OP00_LBL(CAL) OP00(RET) OP11(SSG) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index 5145332..d4bf9cc 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -868,15 +868,6 @@ This instruction replicates its result. Considered for removal. -.. opcode:: BRA - Branch - - pc = target - -.. note:: - - Considered for removal. - - .. opcode:: CALLNZ - Subroutine Call If Not Zero TBD diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index 5989fc4..5938de4 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -829,7 +829,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_UP4B] = aos_unsupported, [TGSI_OPCODE_UP4UB]= aos_unsupported, [TGSI_OPCODE_ARR] = aos_simple, - [TGSI_OPCODE_BRA] = aos_unsupported, [TGSI_OPCODE_CAL] = aos_unsupported, [TGSI_OPCODE_RET] = aos_unsupported, [TGSI_OPCODE_SSG] = aos_set_sign, @@ -1374,7 +1373,6 @@ static const toy_tgsi_translate soa_translate_table
Mesa (master): gallium: Drop the unused RFL opcode.
Module: Mesa Branch: master Commit: dc00b382b58bb3eb94ca393d32bd7eb3bb07d021 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=dc00b382b58bb3eb94ca393d32bd7eb3bb07d021 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:32:13 2014 -0800 gallium: Drop the unused RFL opcode. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |3 -- src/gallium/auxiliary/tgsi/tgsi_exec.c | 56 -- src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 - src/gallium/docs/source/tgsi.rst | 17 --- src/gallium/drivers/ilo/shader/toy_tgsi.c|2 - src/gallium/drivers/nouveau/nv30/nvfx_fragprog.c | 13 - src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +-- src/gallium/include/pipe/p_shader_tokens.h |2 +- 10 files changed, 5 insertions(+), 98 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index d1425bb..49ad3b6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -715,9 +715,6 @@ lp_emit_instruction_aos( case TGSI_OPCODE_PK4UB: return FALSE; - case TGSI_OPCODE_RFL: - return FALSE; - case TGSI_OPCODE_SEQ: src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL); src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b191b5c..ab13c13 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2784,58 +2784,6 @@ exec_scs(struct tgsi_exec_machine *mach, } static void -exec_rfl(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - union tgsi_exec_channel r[9]; - - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_XYZ) { - /* r0 = dp3(src0, src0) */ - fetch_source(mach, r[2], inst-Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[0], r[2], r[2]); - fetch_source(mach, r[4], inst-Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[8], r[4], r[4]); - micro_add(r[0], r[0], r[8]); - fetch_source(mach, r[6], inst-Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[8], r[6], r[6]); - micro_add(r[0], r[0], r[8]); - - /* r1 = dp3(src0, src1) */ - fetch_source(mach, r[3], inst-Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[1], r[2], r[3]); - fetch_source(mach, r[5], inst-Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[8], r[4], r[5]); - micro_add(r[1], r[1], r[8]); - fetch_source(mach, r[7], inst-Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - micro_mul(r[8], r[6], r[7]); - micro_add(r[1], r[1], r[8]); - - /* r1 = 2 * r1 / r0 */ - micro_add(r[1], r[1], r[1]); - micro_div(r[1], r[1], r[0]); - - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_X) { - micro_mul(r[2], r[2], r[1]); - micro_sub(r[2], r[2], r[3]); - store_dest(mach, r[2], inst-Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_Y) { - micro_mul(r[4], r[4], r[1]); - micro_sub(r[4], r[4], r[5]); - store_dest(mach, r[4], inst-Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_Z) { - micro_mul(r[6], r[6], r[1]); - micro_sub(r[6], r[6], r[7]); - store_dest(mach, r[6], inst-Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); - } - } - if (inst-Dst[0].Register.WriteMask TGSI_WRITEMASK_W) { - store_dest(mach, OneVec, inst-Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); - } -} - -static void exec_xpd(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { @@ -3782,10 +3730,6 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_RFL: - exec_rfl(mach, inst); - break; - case TGSI_OPCODE_SEQ: exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index 94de670..bc64505 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -81,7 +81,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, COMP, PK2US, TGSI_OPCODE_PK2US }, { 1, 1, 0, 0, 0, 0, COMP, PK4B, TGSI_OPCODE_PK4B }, { 1, 1, 0, 0, 0, 0, COMP, PK4UB, TGSI_OPCODE_PK4UB }, - { 1, 2, 0, 0, 0, 0, COMP, RFL, TGSI_OPCODE_RFL }, + { 0, 1, 0, 0, 0, 1, NONE, , 44 }, /* removed */ { 1, 2, 0, 0, 0, 0, COMP, SEQ
Mesa (master): gallium: Drop the unused ARA opcode.
Module: Mesa Branch: master Commit: ff886c49555c2033dd5fda50459cafaf16540f86 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ff886c49555c2033dd5fda50459cafaf16540f86 Author: Eric Anholt e...@anholt.net Date: Thu Nov 13 10:08:02 2014 -0800 gallium: Drop the unused ARA opcode. Nothing in the tree generated it. v2: Only drop ARA, not ARR as well. Reviewed-by: Jose Fonseca jfons...@vmware.com (v2) --- src/gallium/auxiliary/gallivm/lp_bld_tgsi.c |1 - src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c |6 -- src/gallium/auxiliary/tgsi/tgsi_exec.c |4 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h|1 - src/gallium/docs/source/tgsi.rst|8 src/gallium/drivers/ilo/shader/toy_tgsi.c |2 -- src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +++--- src/gallium/include/pipe/p_shader_tokens.h |2 +- 10 files changed, 5 insertions(+), 28 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c index 4a9ce37..44a44a6 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c @@ -212,7 +212,6 @@ lp_build_tgsi_inst_llvm( case TGSI_OPCODE_UP4B: case TGSI_OPCODE_UP4UB: case TGSI_OPCODE_X2D: - case TGSI_OPCODE_ARA: case TGSI_OPCODE_BRA: case TGSI_OPCODE_PUSHA: case TGSI_OPCODE_POPA: diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 3b9833a..ed1798d 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -798,12 +798,6 @@ lp_emit_instruction_aos( return FALSE; break; - case TGSI_OPCODE_ARA: - /* deprecated */ - assert(0); - return FALSE; - break; - case TGSI_OPCODE_ARR: src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL); dst0 = lp_build_round(bld-bld_base.base, src0); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b3ea82f..578d4d8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -3912,10 +3912,6 @@ exec_instruction( exec_x2d(mach, inst); break; - case TGSI_OPCODE_ARA: - assert (0); - break; - case TGSI_OPCODE_ARR: exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index d17426f..b94f5ac 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -97,7 +97,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, 0, 0, COMP, UP4B, TGSI_OPCODE_UP4B }, { 1, 1, 0, 0, 0, 0, COMP, UP4UB, TGSI_OPCODE_UP4UB }, { 1, 3, 0, 0, 0, 0, COMP, X2D, TGSI_OPCODE_X2D }, - { 1, 1, 0, 0, 0, 0, COMP, ARA, TGSI_OPCODE_ARA }, + { 0, 1, 0, 0, 0, 1, NONE, , 60 }, /* removed */ { 1, 1, 0, 0, 0, 0, COMP, ARR, TGSI_OPCODE_ARR }, { 0, 1, 0, 0, 0, 0, NONE, BRA, TGSI_OPCODE_BRA }, { 0, 0, 0, 1, 0, 0, NONE, CAL, TGSI_OPCODE_CAL }, diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h index 33cf38b..2ac9031 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h +++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h @@ -112,7 +112,6 @@ OP11(UP2US) OP11(UP4B) OP11(UP4UB) OP13(X2D) -OP11(ARA) OP11(ARR) OP01(BRA) OP00_LBL(CAL) diff --git a/src/gallium/docs/source/tgsi.rst b/src/gallium/docs/source/tgsi.rst index c912ec5..2138b18 100644 --- a/src/gallium/docs/source/tgsi.rst +++ b/src/gallium/docs/source/tgsi.rst @@ -701,14 +701,6 @@ This instruction replicates its result. Considered for removal. -.. opcode:: ARA - Address Register Add - - TBD - -.. note:: - - Considered for removal. - .. opcode:: ARR - Address Register Load With Round .. math:: diff --git a/src/gallium/drivers/ilo/shader/toy_tgsi.c b/src/gallium/drivers/ilo/shader/toy_tgsi.c index 1bf9f21..b71d577 100644 --- a/src/gallium/drivers/ilo/shader/toy_tgsi.c +++ b/src/gallium/drivers/ilo/shader/toy_tgsi.c @@ -854,7 +854,6 @@ static const toy_tgsi_translate aos_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_UP4B] = aos_unsupported, [TGSI_OPCODE_UP4UB]= aos_unsupported, [TGSI_OPCODE_X2D] = aos_unsupported, - [TGSI_OPCODE_ARA] = aos_unsupported, [TGSI_OPCODE_ARR] = aos_simple, [TGSI_OPCODE_BRA] = aos_unsupported, [TGSI_OPCODE_CAL] = aos_unsupported, @@ -1404,7 +1403,6 @@ static const toy_tgsi_translate soa_translate_table[TGSI_OPCODE_LAST] = { [TGSI_OPCODE_UP4B] = soa_unsupported
Mesa (master): gallium: Drop the unused CND opcode.
Module: Mesa Branch: master Commit: 365a4a3f9a80d1b7a6d030d2921578dfc5c899c6 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=365a4a3f9a80d1b7a6d030d2921578dfc5c899c6 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:23:59 2014 -0800 gallium: Drop the unused CND opcode. Nothing in the tree generates it. Reviewed-by: Jose Fonseca jfons...@vmware.com --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c | 19 --- src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c|9 - src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 src/gallium/auxiliary/tgsi/tgsi_info.c |2 +- src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h |1 - src/gallium/auxiliary/tgsi/tgsi_util.c |1 - src/gallium/docs/source/tgsi.rst | 13 - src/gallium/drivers/ilo/shader/toy_tgsi.c | 17 - src/gallium/drivers/r300/r300_tgsi_to_rc.c |1 - src/gallium/drivers/r600/r600_shader.c |6 +++--- src/gallium/include/pipe/p_shader_tokens.h |2 +- 11 files changed, 5 insertions(+), 82 deletions(-) diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c index d0f1a7c..9cb42b2 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c @@ -1057,24 +1057,6 @@ ucmp_emit_cpu( cond, emit_data-args[1], emit_data-args[2]); } - -/* TGSI_OPCODE_CND (CPU Only) */ -static void -cnd_emit_cpu( - const struct lp_build_tgsi_action * action, - struct lp_build_tgsi_context * bld_base, - struct lp_build_emit_data * emit_data) -{ - LLVMValueRef half, tmp; - half = lp_build_const_vec(bld_base-base.gallivm, bld_base-base.type, 0.5); - tmp = lp_build_cmp(bld_base-base, PIPE_FUNC_GREATER, - emit_data-args[2], half); - emit_data-output[emit_data-chan] = lp_build_select(bld_base-base, - tmp, - emit_data-args[0], - emit_data-args[1]); -} - /* TGSI_OPCODE_COS (CPU Only) */ static void cos_emit_cpu( @@ -1821,7 +1803,6 @@ lp_set_default_actions_cpu( bld_base-op_actions[TGSI_OPCODE_ARL].emit = arl_emit_cpu; bld_base-op_actions[TGSI_OPCODE_ARR].emit = arr_emit_cpu; bld_base-op_actions[TGSI_OPCODE_CEIL].emit = ceil_emit_cpu; - bld_base-op_actions[TGSI_OPCODE_CND].emit = cnd_emit_cpu; bld_base-op_actions[TGSI_OPCODE_COS].emit = cos_emit_cpu; bld_base-op_actions[TGSI_OPCODE_CMP].emit = cmp_emit_cpu; bld_base-op_actions[TGSI_OPCODE_DIV].emit = div_emit_cpu; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 9e468f9..2ef5db1 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -620,15 +620,6 @@ lp_emit_instruction_aos( dst0 = lp_build_add(bld-bld_base.base, tmp0, src2); break; - case TGSI_OPCODE_CND: - src0 = lp_build_emit_fetch(bld-bld_base, inst, 0, LP_CHAN_ALL); - src1 = lp_build_emit_fetch(bld-bld_base, inst, 1, LP_CHAN_ALL); - src2 = lp_build_emit_fetch(bld-bld_base, inst, 2, LP_CHAN_ALL); - tmp1 = lp_build_const_vec(bld-bld_base.base.gallivm, bld-bld_base.base.type, 0.5); - tmp0 = lp_build_cmp(bld-bld_base.base, PIPE_FUNC_GREATER, src2, tmp1); - dst0 = lp_build_select(bld-bld_base.base, tmp0, src0, src1); - break; - case TGSI_OPCODE_DP2A: return FALSE; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ec1374a..834568b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -137,18 +137,6 @@ micro_cmp(union tgsi_exec_channel *dst, } static void -micro_cnd(union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2) -{ - dst-f[0] = src2-f[0] 0.5f ? src0-f[0] : src1-f[0]; - dst-f[1] = src2-f[1] 0.5f ? src0-f[1] : src1-f[1]; - dst-f[2] = src2-f[2] 0.5f ? src0-f[2] : src1-f[2]; - dst-f[3] = src2-f[3] 0.5f ? src0-f[3] : src1-f[3]; -} - -static void micro_cos(union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { @@ -3603,10 +3591,6 @@ exec_instruction( exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; - case TGSI_OPCODE_CND: - exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); - break; - case TGSI_OPCODE_SQRT: exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi
Mesa (master): nouveau: Fix build after STR/BRA opcode dropping.
Module: Mesa Branch: master Commit: 93d30ff5d628770fcff5a931401fe76fbd8242df URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=93d30ff5d628770fcff5a931401fe76fbd8242df Author: Eric Anholt e...@anholt.net Date: Mon Nov 24 15:22:25 2014 -0800 nouveau: Fix build after STR/BRA opcode dropping. I missed these while git grepping for users of the dead opcodes. Sigh, macros. --- src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |2 -- 1 file changed, 2 deletions(-) diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp index aa7390b..39a70aa 100644 --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp @@ -538,12 +538,10 @@ static nv50_ir::operation translateOpcode(uint opcode) NV50_IR_OPCODE_CASE(SIN, SIN); NV50_IR_OPCODE_CASE(SLE, SET); NV50_IR_OPCODE_CASE(SNE, SET); - NV50_IR_OPCODE_CASE(STR, SET); NV50_IR_OPCODE_CASE(TEX, TEX); NV50_IR_OPCODE_CASE(TXD, TXD); NV50_IR_OPCODE_CASE(TXP, TEX); - NV50_IR_OPCODE_CASE(BRA, BRA); NV50_IR_OPCODE_CASE(CAL, CALL); NV50_IR_OPCODE_CASE(RET, RET); NV50_IR_OPCODE_CASE(CMP, SLCT); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Update for new kernel ABI with async execution and waits.
Module: Mesa Branch: master Commit: 21577571b37e68edc0422fbf80932588a4614abc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=21577571b37e68edc0422fbf80932588a4614abc Author: Eric Anholt e...@anholt.net Date: Wed Nov 19 17:39:04 2014 -0800 vc4: Update for new kernel ABI with async execution and waits. Our submits now return immediately and you have to manually wait for things to complete if you want to (like a normal driver). --- src/gallium/drivers/vc4/Makefile.sources |1 + src/gallium/drivers/vc4/vc4_bufmgr.c | 65 +- src/gallium/drivers/vc4/vc4_bufmgr.h | 10 ++- src/gallium/drivers/vc4/vc4_context.c| 10 +++ src/gallium/drivers/vc4/vc4_context.h|3 + src/gallium/drivers/vc4/vc4_drm.h| 38 +++ src/gallium/drivers/vc4/vc4_fence.c | 108 ++ src/gallium/drivers/vc4/vc4_resource.c |5 +- src/gallium/drivers/vc4/vc4_screen.h | 13 9 files changed, 250 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 2336565..6ec48ab 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -9,6 +9,7 @@ C_SOURCES := \ vc4_draw.c \ vc4_drm.h \ vc4_emit.c \ + vc4_fence.c \ vc4_formats.c \ vc4_opt_algebraic.c \ vc4_opt_copy_propagation.c \ diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c index 33592e8..3b73ac8 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.c +++ b/src/gallium/drivers/vc4/vc4_bufmgr.c @@ -152,8 +152,57 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name) return true; } +bool +vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns) +{ +#ifndef USE_VC4_SIMULATOR +struct drm_vc4_wait_seqno wait; +memset(wait, 0, sizeof(wait)); +wait.seqno = seqno; +wait.timeout_ns = timeout_ns; + +int ret = drmIoctl(screen-fd, DRM_IOCTL_VC4_WAIT_SEQNO, wait); +if (ret == -ETIME) { +return false; +} else if (ret != 0) { +fprintf(stderr, wait failed\n); +abort(); +} else { +screen-finished_seqno = wait.seqno; +return true; +} +#else +return true; +#endif +} + +bool +vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns) +{ +#ifndef USE_VC4_SIMULATOR +struct vc4_screen *screen = bo-screen; + +struct drm_vc4_wait_bo wait; +memset(wait, 0, sizeof(wait)); +wait.handle = bo-handle; +wait.timeout_ns = timeout_ns; + +int ret = drmIoctl(screen-fd, DRM_IOCTL_VC4_WAIT_BO, wait); +if (ret == -ETIME) { +return false; +} else if (ret != 0) { +fprintf(stderr, wait failed\n); +abort(); +} else { +return true; +} +#else +return true; +#endif +} + void * -vc4_bo_map(struct vc4_bo *bo) +vc4_bo_map_unsynchronized(struct vc4_bo *bo) { int ret; @@ -179,3 +228,17 @@ vc4_bo_map(struct vc4_bo *bo) return bo-map; } + +void * +vc4_bo_map(struct vc4_bo *bo) +{ +void *map = vc4_bo_map_unsynchronized(bo); + +bool ok = vc4_bo_wait(bo, PIPE_TIMEOUT_INFINITE); +if (!ok) { +fprintf(stderr, BO wait for map failed\n); +abort(); +} + +return map; +} diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h index 00ea149..4a1d4a4 100644 --- a/src/gallium/drivers/vc4/vc4_bufmgr.h +++ b/src/gallium/drivers/vc4/vc4_bufmgr.h @@ -78,9 +78,17 @@ vc4_bo_unreference(struct vc4_bo **bo) *bo = NULL; } - void * vc4_bo_map(struct vc4_bo *bo); +void * +vc4_bo_map_unsynchronized(struct vc4_bo *bo); + +bool +vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns); + +bool +vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns); + #endif /* VC4_BUFMGR_H */ diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index a6becaf..bb30c0e 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -322,6 +322,8 @@ vc4_flush(struct pipe_context *pctx) } } +vc4-last_emit_seqno = submit.seqno; + vc4_reset_cl(vc4-bcl); vc4_reset_cl(vc4-rcl); vc4_reset_cl(vc4-shader_rec); @@ -350,7 +352,15 @@ static void vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, unsigned flags) { +struct vc4_context *vc4 = vc4_context(pctx); + vc4_flush(pctx); + +if (fence) { +struct vc4_fence *f = vc4_fence_create(vc4-screen, + vc4-last_emit_seqno); +*fence = (struct
Mesa (master): vc4: Emit semaphore instructions for new kernel ABI.
Module: Mesa Branch: master Commit: 82e919d33bbe508b3e1ba883a01ef2512dbc8f72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=82e919d33bbe508b3e1ba883a01ef2512dbc8f72 Author: Eric Anholt e...@anholt.net Date: Tue Nov 18 12:16:55 2014 -0800 vc4: Emit semaphore instructions for new kernel ABI. Previously, the kernel would dispatch thread 0, wait, then dispatch thread 1. By insisting that the thread contents use semaphores in the right place, the kernel can sleep for longer by dispatching both threads at once. --- src/gallium/drivers/vc4/kernel/vc4_drv.h |2 + src/gallium/drivers/vc4/kernel/vc4_validate.c | 77 - src/gallium/drivers/vc4/vc4_context.c | 14 - 3 files changed, 87 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index b0eb3f0..81ffa03 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -82,6 +82,8 @@ struct exec_info { bool found_tile_binning_mode_config_packet; bool found_tile_rendering_mode_config_packet; bool found_start_tile_binning_packet; + bool found_increment_semaphore_packet; + bool found_wait_on_semaphore_packet; uint8_t bin_tiles_x, bin_tiles_y; uint32_t fb_width, fb_height; uint32_t tile_alloc_init_block_size; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 8b04eb9..ba6e46f 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -203,6 +203,18 @@ check_tex_size(struct exec_info *exec, struct drm_gem_cma_object *fbo, } static int +validate_flush_all(VALIDATE_ARGS) +{ + if (exec-found_increment_semaphore_packet) { + DRM_ERROR(VC4_PACKET_FLUSH_ALL after + VC4_PACKET_INCREMENT_SEMAPHORE\n); + return -EINVAL; + } + + return 0; +} + +static int validate_start_tile_binning(VALIDATE_ARGS) { if (exec-found_start_tile_binning_packet) { @@ -220,6 +232,41 @@ validate_start_tile_binning(VALIDATE_ARGS) } static int +validate_increment_semaphore(VALIDATE_ARGS) +{ + if (exec-found_increment_semaphore_packet) { + DRM_ERROR(Duplicate VC4_PACKET_INCREMENT_SEMAPHORE\n); + return -EINVAL; + } + exec-found_increment_semaphore_packet = true; + + /* Once we've found the semaphore increment, there should be one FLUSH +* then the end of the command list. The FLUSH actually triggers the +* increment, so we only need to make sure there +*/ + + return 0; +} + +static int +validate_wait_on_semaphore(VALIDATE_ARGS) +{ + if (exec-found_wait_on_semaphore_packet) { + DRM_ERROR(Duplicate VC4_PACKET_WAIT_ON_SEMAPHORE\n); + return -EINVAL; + } + exec-found_wait_on_semaphore_packet = true; + + if (!exec-found_increment_semaphore_packet) { + DRM_ERROR(VC4_PACKET_WAIT_ON_SEMAPHORE without + VC4_PACKET_INCREMENT_SEMAPHORE\n); + return -EINVAL; + } + + return 0; +} + +static int validate_branch_to_sublist(VALIDATE_ARGS) { struct drm_gem_cma_object *target; @@ -233,6 +280,11 @@ validate_branch_to_sublist(VALIDATE_ARGS) return -EINVAL; } + if (!exec-found_wait_on_semaphore_packet) { + DRM_ERROR(Jumping to tile alloc before binning finished.\n); + return -EINVAL; + } + offset = *(uint32_t *)(untrusted + 0); if (offset % exec-tile_alloc_init_block_size || offset / exec-tile_alloc_init_block_size @@ -322,6 +374,11 @@ validate_indexed_prim_list(VALIDATE_ARGS) uint32_t index_size = (*(uint8_t *)(untrusted + 0) 4) ? 2 : 1; struct vc4_shader_state *shader_state; + if (exec-found_increment_semaphore_packet) { + DRM_ERROR(Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n); + return -EINVAL; + } + /* Check overflow condition */ if (exec-shader_state_count == 0) { DRM_ERROR(shader state must precede primitives\n); @@ -355,6 +412,11 @@ validate_gl_array_primitive(VALIDATE_ARGS) uint32_t max_index; struct vc4_shader_state *shader_state; + if (exec-found_increment_semaphore_packet) { + DRM_ERROR(Drawing after VC4_PACKET_INCREMENT_SEMAPHORE\n); + return -EINVAL; + } + /* Check overflow condition */ if (exec-shader_state_count == 0) { DRM_ERROR(shader state must precede primitives\n); @@ -600,10 +662,10 @@ static const struct cmd_info { [VC4_PACKET_HALT] = { 1, 1, 1, halt, NULL }, [VC4_PACKET_NOP] = { 1, 1, 1, nop, NULL }, [VC4_PACKET_FLUSH] = { 1, 1, 1, flush, NULL
Mesa (master): vc4: Mark a big array as const.
Module: Mesa Branch: master Commit: 05f165b62d1adce60b18783407c80b9fa2efa533 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=05f165b62d1adce60b18783407c80b9fa2efa533 Author: Eric Anholt e...@anholt.net Date: Wed Oct 29 15:15:29 2014 -0700 vc4: Mark a big array as const. Drops 1kb of code from this inner loop, in exchange for 2.5k of data. --- src/gallium/drivers/vc4/vc4_program.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index ef0d5b8..5fab1de 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1247,7 +1247,7 @@ static void emit_tgsi_instruction(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst) { -struct { +static const struct { enum qop op; struct qreg (*func)(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Avoid reusing a pointer from c-outputs[] after add_output().
Module: Mesa Branch: master Commit: b3d269f5ae1844b542d8e875d7177c5eff3a29f2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b3d269f5ae1844b542d8e875d7177c5eff3a29f2 Author: Eric Anholt e...@anholt.net Date: Wed Nov 12 14:14:32 2014 -0800 vc4: Avoid reusing a pointer from c-outputs[] after add_output(). add_output() can resize the qreg array, so we might use a stale pointer. --- src/gallium/drivers/vc4/vc4_program.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1cc6e96..ef0d5b8 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1822,12 +1822,11 @@ emit_stub_vpm_read(struct vc4_compile *c) static void emit_ucp_clipdistance(struct vc4_compile *c) { -struct qreg *clipvertex; - +unsigned cv; if (c-output_clipvertex_index != -1) -clipvertex = c-outputs[c-output_clipvertex_index]; +cv = c-output_clipvertex_index; else if (c-output_position_index != -1) -clipvertex = c-outputs[c-output_position_index]; +cv = c-output_position_index; else return; @@ -1846,12 +1845,14 @@ emit_ucp_clipdistance(struct vc4_compile *c) plane, TGSI_SWIZZLE_X); + struct qreg dist = qir_uniform_f(c, 0.0); for (int i = 0; i 4; i++) { +struct qreg pos_chan = c-outputs[cv + i]; struct qreg ucp = add_uniform(c, QUNIFORM_USER_CLIP_PLANE, plane * 4 + i); -dist = qir_FADD(c, dist, qir_FMUL(c, clipvertex[i], ucp)); +dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp)); } c-outputs[output_index] = dist; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix assumption of TGSI OUT[0] being POSITION in the VS.
Module: Mesa Branch: master Commit: acc1cca7ae35e9e7fb55b4c05fd80564253e1634 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=acc1cca7ae35e9e7fb55b4c05fd80564253e1634 Author: Eric Anholt e...@anholt.net Date: Fri Nov 7 14:26:32 2014 -0800 vc4: Fix assumption of TGSI OUT[0] being POSITION in the VS. All the shaders we've received so far had this be the case, but with nir-to-tgsi that changed. I might decide to make nir-to-tgsi keep the outputs in the same order, for debugging sanity, but I'm not sure. --- src/gallium/drivers/vc4/vc4_program.c | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 72bbcd8..1cc6e96 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1751,7 +1751,7 @@ emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w) xyi[i] = qir_FTOI(c, qir_FMUL(c, qir_FMUL(c, - c-outputs[i], + c-outputs[c-output_position_index + i], scale), rcp_w)); } @@ -1766,7 +1766,7 @@ emit_zs_write(struct vc4_compile *c, struct qreg rcp_w) struct qreg zoffset = add_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0); qir_VPM_WRITE(c, qir_FMUL(c, qir_FADD(c, qir_FMUL(c, - c-outputs[2], + c-outputs[c-output_position_index + 2], zscale), zoffset), rcp_w)); @@ -1863,7 +1863,7 @@ emit_vert_end(struct vc4_compile *c, struct vc4_varying_semantic *fs_inputs, uint32_t num_fs_inputs) { -struct qreg rcp_w = qir_RCP(c, c-outputs[3]); +struct qreg rcp_w = qir_RCP(c, c-outputs[c-output_position_index + 3]); emit_stub_vpm_read(c); emit_ucp_clipdistance(c); @@ -1900,12 +1900,12 @@ emit_vert_end(struct vc4_compile *c, static void emit_coord_end(struct vc4_compile *c) { -struct qreg rcp_w = qir_RCP(c, c-outputs[3]); +struct qreg rcp_w = qir_RCP(c, c-outputs[c-output_position_index + 3]); emit_stub_vpm_read(c); for (int i = 0; i 4; i++) -qir_VPM_WRITE(c, c-outputs[i]); +qir_VPM_WRITE(c, c-outputs[c-output_position_index + i]); emit_scaled_viewport_write(c, rcp_w); emit_zs_write(c, rcp_w); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don't forget to validate code that' s got PROG_END on it.
Module: Mesa Branch: master Commit: 8911879dec564ab2ef343d58e6de4fd558e35c3d URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8911879dec564ab2ef343d58e6de4fd558e35c3d Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 20:50:20 2014 +0100 vc4: Don't forget to validate code that's got PROG_END on it. This signal doesn't terminate the program now, it terminates the program soon. So you have to actually validate the code in the instruction. --- src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index 56c4a17..fdce033 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -261,6 +261,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj, case QPU_SIG_COLOR_LOAD: case QPU_SIG_LOAD_TMU0: case QPU_SIG_LOAD_TMU1: + case QPU_SIG_PROG_END: if (!check_instruction_writes(inst, validated_shader, validation_state)) { DRM_ERROR(Bad write at ip %d\n, ip); @@ -270,6 +271,11 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj, if (!check_instruction_reads(inst, validated_shader)) goto fail; + if (sig == QPU_SIG_PROG_END) { + found_shader_end = true; + shader_end_ip = ip; + } + break; case QPU_SIG_LOAD_IMM: @@ -280,11 +286,6 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj, } break; - case QPU_SIG_PROG_END: - found_shader_end = true; - shader_end_ip = ip; - break; - default: DRM_ERROR(Unsupported QPU signal %d at instruction %d\n, sig, ip); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add support for ARL and indirect register access on TGSI_FILE_CONSTANT.
Module: Mesa Branch: master Commit: f87c7008958cdb095efa1cfb29ca8f3c9b9066e4 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f87c7008958cdb095efa1cfb29ca8f3c9b9066e4 Author: Eric Anholt e...@anholt.net Date: Wed Oct 1 18:27:24 2014 -0700 vc4: Add support for ARL and indirect register access on TGSI_FILE_CONSTANT. Fixes 14 ARB_vp tests (which had no lowering done), and should improve performance of indirect uniform array access in GLSL. --- src/gallium/drivers/vc4/kernel/vc4_drv.h |1 + src/gallium/drivers/vc4/kernel/vc4_validate.c | 20 ++- .../drivers/vc4/kernel/vc4_validate_shaders.c | 188 +--- src/gallium/drivers/vc4/vc4_context.h | 23 +++ src/gallium/drivers/vc4/vc4_opt_dead_code.c|3 +- src/gallium/drivers/vc4/vc4_program.c | 152 +++- src/gallium/drivers/vc4/vc4_qir.c |1 + src/gallium/drivers/vc4/vc4_qir.h | 45 + src/gallium/drivers/vc4/vc4_qpu_emit.c |5 + src/gallium/drivers/vc4/vc4_screen.c |3 +- 10 files changed, 407 insertions(+), 34 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_drv.h b/src/gallium/drivers/vc4/kernel/vc4_drv.h index 45d9c40..b0eb3f0 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_drv.h +++ b/src/gallium/drivers/vc4/kernel/vc4_drv.h @@ -128,6 +128,7 @@ struct exec_info { * Setup) for definitions of the texture parameters. */ struct vc4_texture_sample_info { + bool is_direct; uint32_t p_offset[4]; }; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index 977e071..8b04eb9 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -767,6 +767,23 @@ reloc_tex(struct exec_info *exec, uint32_t cube_map_stride = 0; enum vc4_texture_data_type type; + if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, tex)) + return false; + + if (sample-is_direct) { + uint32_t remaining_size = tex-base.size - p0; + if (p0 tex-base.size - 4) { + DRM_ERROR(UBO offset greater than UBO size\n); + return false; + } + if (p1 remaining_size - 4) { + DRM_ERROR(UBO clamp would allow reads outside of UBO\n); + return false; + } + *validated_p0 = tex-paddr + p0; + return true; + } + if (width == 0) width = 2048; if (height == 0) @@ -832,9 +849,6 @@ reloc_tex(struct exec_info *exec, tiling_format = VC4_TILING_FORMAT_T; } - if (!vc4_use_bo(exec, texture_handle_index, VC4_MODE_RENDER, tex)) - return false; - if (!check_tex_size(exec, tex, offset + cube_map_stride * 5, tiling_format, width, height, cpp)) { return false; diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c index 03c7f23..e797c59 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate_shaders.c @@ -51,8 +51,39 @@ struct vc4_shader_validation_state { struct vc4_texture_sample_info tmu_setup[2]; int tmu_write_count[2]; + + /* For registers that were last written to by a MIN instruction with +* one argument being a uniform, the address of the uniform. +* Otherwise, ~0. +* +* This is used for the validation of direct address memory reads. +*/ + uint32_t live_clamp_offsets[32 + 32 + 4]; }; +static uint32_t +waddr_to_live_reg_index(uint32_t waddr, bool is_b) +{ + if (waddr 32) { + if (is_b) + return 32 + waddr; + else + return waddr; + } else if (waddr = QPU_W_ACC3) { + + return 64 + waddr - QPU_W_ACC0; + } else { + return ~0; + } +} + +static bool +is_tmu_submit(uint32_t waddr) +{ + return (waddr == QPU_W_TMU0_S || + waddr == QPU_W_TMU1_S); +} + static bool is_tmu_write(uint32_t waddr) { @@ -75,24 +106,86 @@ record_validated_texture_sample(struct vc4_validated_shader_info *validated_shad if (!temp_samples) return false; - memcpy(temp_samples[s].p_offset, - validation_state-tmu_setup[tmu].p_offset, - validation_state-tmu_write_count[tmu] * sizeof(uint32_t)); - for (i = validation_state-tmu_write_count[tmu]; i 4; i++) - temp_samples[s].p_offset[i] = ~0; + memcpy(temp_samples[s], + validation_state-tmu_setup[tmu], + sizeof(*temp_samples)); validated_shader
Mesa (master): vc4: Add .dir-locals.el for kernel style in the kernel code.
Module: Mesa Branch: master Commit: fc1eb614a70a777be0f4f8ada194bab53105999b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=fc1eb614a70a777be0f4f8ada194bab53105999b Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 20:49:27 2014 +0100 vc4: Add .dir-locals.el for kernel style in the kernel code. --- src/gallium/drivers/vc4/kernel/.dir-locals.el | 12 1 file changed, 12 insertions(+) diff --git a/src/gallium/drivers/vc4/kernel/.dir-locals.el b/src/gallium/drivers/vc4/kernel/.dir-locals.el new file mode 100644 index 000..2e58e90 --- /dev/null +++ b/src/gallium/drivers/vc4/kernel/.dir-locals.el @@ -0,0 +1,12 @@ +((nil + (indent-tabs-mode . t) + (tab-width . 8) + (c-basic-offset . 8) + (c-file-style . stroustrup) + (fill-column . 78) + (eval . (progn + (c-set-offset 'innamespace '0) + (c-set-offset 'inline-open '0))) + ) + (makefile-mode (indent-tabs-mode . t)) + ) ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Allow dead code elimination of unused varyings.
Module: Mesa Branch: master Commit: 52824811b9c0a9bb78a40fcb43af00b315f612d0 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=52824811b9c0a9bb78a40fcb43af00b315f612d0 Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 15:03:04 2014 +0100 vc4: Allow dead code elimination of unused varyings. total instructions in shared programs: 39022 - 37341 (-4.31%) instructions in affected programs: 26979 - 25298 (-6.23%) total uniforms in shared programs: 11242 - 10523 (-6.40%) uniforms in affected programs: 5836 - 5117 (-12.32%) --- src/gallium/drivers/vc4/vc4_opt_cse.c |2 +- src/gallium/drivers/vc4/vc4_opt_dead_code.c |2 +- src/gallium/drivers/vc4/vc4_program.c | 15 +++ src/gallium/drivers/vc4/vc4_qir.c | 15 +-- src/gallium/drivers/vc4/vc4_qir.h |2 +- 5 files changed, 31 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_cse.c b/src/gallium/drivers/vc4/vc4_opt_cse.c index d3ef910..bebfb652 100644 --- a/src/gallium/drivers/vc4/vc4_opt_cse.c +++ b/src/gallium/drivers/vc4/vc4_opt_cse.c @@ -132,7 +132,7 @@ qir_opt_cse(struct vc4_compile *c) foreach_s(node, t, c-instructions) { struct qinst *inst = (struct qinst *)node; -if (qir_has_side_effects(inst)) { +if (qir_has_side_effects(c, inst)) { if (inst-op == QOP_TLB_DISCARD_SETUP) last_sf = NULL; continue; diff --git a/src/gallium/drivers/vc4/vc4_opt_dead_code.c b/src/gallium/drivers/vc4/vc4_opt_dead_code.c index f08818a..d958dcb 100644 --- a/src/gallium/drivers/vc4/vc4_opt_dead_code.c +++ b/src/gallium/drivers/vc4/vc4_opt_dead_code.c @@ -63,7 +63,7 @@ qir_opt_dead_code(struct vc4_compile *c) if (inst-dst.file == QFILE_TEMP !used[inst-dst.index] -(!qir_has_side_effects(inst) || +(!qir_has_side_effects(c, inst) || inst-op == QOP_TEX_RESULT)) { if (inst-op == QOP_TEX_RESULT) { dce_tex = true; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 01941f8..0674e4f 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2018,6 +2018,18 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, shader-program_id = vc4-next_compiled_program_id++; if (stage == QSTAGE_FRAG) { +bool input_live[c-num_input_semantics]; +struct simple_node *node; + +memset(input_live, 0, sizeof(input_live)); +foreach(node, c-instructions) { +struct qinst *inst = (struct qinst *)node; +for (int i = 0; i qir_get_op_nsrc(inst-op); i++) { +if (inst-src[i].file == QFILE_VARY) +input_live[inst-src[i].index] = true; +} +} + shader-input_semantics = ralloc_array(shader, struct vc4_varying_semantic, c-num_input_semantics); @@ -2025,6 +2037,9 @@ vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, for (int i = 0; i c-num_input_semantics; i++) { struct vc4_varying_semantic *sem = c-input_semantics[i]; +if (!input_live[i]) +continue; + /* Skip non-VS-output inputs. */ if (sem-semantic == (uint8_t)~0) continue; diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 9c7c15e..a7a4d96 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -122,12 +122,23 @@ qir_get_op_nsrc(enum qop qop) abort(); } +/** + * Returns whether the instruction has any side effects that must be + * preserved. + */ bool -qir_has_side_effects(struct qinst *inst) +qir_has_side_effects(struct vc4_compile *c, struct qinst *inst) { +/* We can dead-code eliminate varyings, because we only tell the VS + * about the live ones at the end. But we have to preserve the + * point/line coordinates reads, because they're generated by + * fixed-function hardware. + */ for (int i = 0; i qir_get_op_nsrc(inst-op); i++) { -if (inst-src[i].file == QFILE_VARY) +if (inst-src[i].file == QFILE_VARY +c-input_semantics[inst-src[i].index].semantic == 0xff) { return true; +} } return qir_op_info[inst-op].has_side_effects; diff
Mesa (master): vc4: When asked to discard-map a whole resource, discard it.
Module: Mesa Branch: master Commit: 18ccda7b86b8f7ab7466265aefb3f3e773f4a757 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=18ccda7b86b8f7ab7466265aefb3f3e773f4a757 Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 16:50:37 2014 +0100 vc4: When asked to discard-map a whole resource, discard it. This saves a bunch of extra flushes when texsubimaging a whole texture that's been used for rendering, or subdataing a whole BO. In particular, this massively reduces the runtime of piglit texture-packed-formats (when the probes have been moved out of the inner loop). --- src/gallium/drivers/vc4/vc4_resource.c | 42 +--- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 62667bf..b02e289 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -34,6 +34,20 @@ #include vc4_tiling.h static void +vc4_resource_bo_alloc(struct vc4_resource *rsc) +{ +struct pipe_resource *prsc = rsc-base.b; +struct pipe_screen *pscreen = prsc-screen; + +vc4_bo_unreference(rsc-bo); +rsc-bo = vc4_bo_alloc(vc4_screen(pscreen), + rsc-slices[0].offset + + rsc-slices[0].size + + rsc-cube_map_stride * (prsc-array_size - 1), + resource); +} + +static void vc4_resource_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans) { @@ -75,14 +89,19 @@ vc4_resource_transfer_map(struct pipe_context *pctx, char *buf; if (usage PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) { -uint32_t size = rsc-bo-size; -vc4_bo_unreference(rsc-bo); -rsc-bo = vc4_bo_alloc(vc4-screen, size, resource); -} - -if (!(usage PIPE_TRANSFER_UNSYNCHRONIZED)) { -if (vc4_cl_references_bo(pctx, rsc-bo)) -vc4_flush(pctx); +vc4_resource_bo_alloc(rsc); +} else if (!(usage PIPE_TRANSFER_UNSYNCHRONIZED)) { +if (vc4_cl_references_bo(pctx, rsc-bo)) { +if ((usage PIPE_TRANSFER_DISCARD_RANGE) +prsc-last_level == 0 +prsc-width0 == box-width +prsc-height0 == box-height +prsc-depth0 == box-depth) { +vc4_resource_bo_alloc(rsc); +} else { +vc4_flush(pctx); +} +} } if (usage PIPE_TRANSFER_WRITE) @@ -324,12 +343,7 @@ vc4_resource_create(struct pipe_screen *pscreen, } vc4_setup_slices(rsc); - -rsc-bo = vc4_bo_alloc(vc4_screen(pscreen), - rsc-slices[0].offset + - rsc-slices[0].size + - rsc-cube_map_stride * (prsc-array_size - 1), - resource); +vc4_resource_bo_alloc(rsc); if (!rsc-bo) goto fail; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add debug output to match shaderdb info to program dumps.
Module: Mesa Branch: master Commit: 5d32e263357e562779bfc0d2af712d4c7538a32b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d32e263357e562779bfc0d2af712d4c7538a32b Author: Eric Anholt e...@anholt.net Date: Wed Oct 22 18:02:18 2014 +0100 vc4: Add debug output to match shaderdb info to program dumps. I'm going to be using VC4_DEBUG=shaderdb,norast to do shaderdb stats, but when debugging regressions, I want to match shaderdb output to shader disassembly. --- src/gallium/drivers/vc4/vc4_context.h |5 + src/gallium/drivers/vc4/vc4_program.c | 24 ++-- src/gallium/drivers/vc4/vc4_qir.h |3 +++ src/gallium/drivers/vc4/vc4_qpu_emit.c |4 +++- 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 45dfa02..d0b280a 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -79,6 +79,10 @@ struct vc4_shader_uniform_info { }; struct vc4_uncompiled_shader { +/** A name for this program, so you can track it in shader-db output. */ +uint32_t program_id; +/** How many variants of this program were compiled, for shader-db. */ +uint32_t compiled_variant_count; struct pipe_shader_state base; const struct tgsi_token *twoside_tokens; }; @@ -183,6 +187,7 @@ struct vc4_context { struct primconvert_context *primconvert; struct util_hash_table *fs_cache, *vs_cache; +uint32_t next_uncompiled_program_id; uint64_t next_compiled_program_id; struct ra_regs *regs; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index c6d9fb3..01941f8 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1827,6 +1827,8 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage, c-stage = stage; c-shader_state = key-shader_state-base; +c-program_id = key-shader_state-program_id; +c-variant_id = key-shader_state-compiled_variant_count++; c-key = key; switch (stage) { @@ -1874,7 +1876,9 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage, assert(ret == TGSI_PARSE_OK); if (vc4_debug VC4_DEBUG_TGSI) { -fprintf(stderr, TGSI:\n); +fprintf(stderr, %s prog %d/%d TGSI:\n, +qir_get_stage_name(c-stage), +c-program_id, c-variant_id); tgsi_dump(tokens, 0); } @@ -1918,17 +1922,23 @@ vc4_shader_tgsi_to_qir(struct vc4_context *vc4, enum qstage stage, qir_optimize(c); if (vc4_debug VC4_DEBUG_QIR) { -fprintf(stderr, QIR:\n); +fprintf(stderr, %s prog %d/%d QIR:\n, +qir_get_stage_name(c-stage), +c-program_id, c-variant_id); qir_dump(c); } qir_reorder_uniforms(c); vc4_generate_code(vc4, c); if (vc4_debug VC4_DEBUG_SHADERDB) { -fprintf(stderr, SHADER-DB: %s: %d instructions\n, -qir_get_stage_name(c-stage), c-qpu_inst_count); -fprintf(stderr, SHADER-DB: %s: %d uniforms\n, -qir_get_stage_name(c-stage), c-num_uniforms); +fprintf(stderr, SHADER-DB: %s prog %d/%d: %d instructions\n, +qir_get_stage_name(c-stage), +c-program_id, c-variant_id, +c-qpu_inst_count); +fprintf(stderr, SHADER-DB: %s prog %d/%d: %d uniforms\n, +qir_get_stage_name(c-stage), +c-program_id, c-variant_id, +c-num_uniforms); } return c; @@ -1938,6 +1948,7 @@ static void * vc4_shader_state_create(struct pipe_context *pctx, const struct pipe_shader_state *cso) { +struct vc4_context *vc4 = vc4_context(pctx); struct vc4_uncompiled_shader *so = CALLOC_STRUCT(vc4_uncompiled_shader); if (!so) return NULL; @@ -1961,6 +1972,7 @@ vc4_shader_state_create(struct pipe_context *pctx, so-base.tokens = tgsi_transform_lowering(lowering_config, cso-tokens, info); if (!so-base.tokens) so-base.tokens = tgsi_dup_tokens(cso-tokens); +so-program_id = vc4-next_uncompiled_program_id++; return so; } diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index b95dbc3..c2f83a7 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -289,6 +289,9 @@ struct vc4_compile { uint32_t qpu_inst_count; uint32_t qpu_inst_size; uint32_t num_inputs; + +uint32_t program_id; +uint32_t variant_id
Mesa (master): vc4: Refactor flushing before mapping a BO.
Module: Mesa Branch: master Commit: a71c3b885a532016aa426b5bb753291cffe39a44 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a71c3b885a532016aa426b5bb753291cffe39a44 Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 16:45:04 2014 +0100 vc4: Refactor flushing before mapping a BO. I'm going to want to make some other decisions here before flushing. --- src/gallium/drivers/vc4/vc4_context.c | 17 - src/gallium/drivers/vc4/vc4_context.h |2 +- src/gallium/drivers/vc4/vc4_resource.c |6 -- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 87f0251..b1f0f35 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -350,13 +350,13 @@ vc4_pipe_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence, * * This helps avoid flushing the command buffers when unnecessary. */ -void -vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) +bool +vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo) { struct vc4_context *vc4 = vc4_context(pctx); if (!vc4-needs_flush) -return; +return false; /* Walk all the referenced BOs in the drawing command list to see if * they match. @@ -365,8 +365,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) for (int i = 0; i (vc4-bo_handles.next - vc4-bo_handles.base) / 4; i++) { if (referenced_bos[i] == bo) { -vc4_flush(pctx); -return; +return true; } } @@ -377,8 +376,7 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) if (csurf) { struct vc4_resource *ctex = vc4_resource(csurf-base.texture); if (ctex-bo == bo) { -vc4_flush(pctx); -return; +return true; } } @@ -387,10 +385,11 @@ vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo) struct vc4_resource *ztex = vc4_resource(zsurf-base.texture); if (ztex-bo == bo) { -vc4_flush(pctx); -return; +return true; } } + +return false; } static void diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index d0b280a..9eaff8f 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -278,7 +278,7 @@ void vc4_write_uniforms(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate); void vc4_flush(struct pipe_context *pctx); -void vc4_flush_for_bo(struct pipe_context *pctx, struct vc4_bo *bo); +bool vc4_cl_references_bo(struct pipe_context *pctx, struct vc4_bo *bo); void vc4_emit_state(struct pipe_context *pctx); void vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c); struct qpu_reg *vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index c198ab9..62667bf 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -80,8 +80,10 @@ vc4_resource_transfer_map(struct pipe_context *pctx, rsc-bo = vc4_bo_alloc(vc4-screen, size, resource); } -if (!(usage PIPE_TRANSFER_UNSYNCHRONIZED)) -vc4_flush_for_bo(pctx, rsc-bo); +if (!(usage PIPE_TRANSFER_UNSYNCHRONIZED)) { +if (vc4_cl_references_bo(pctx, rsc-bo)) +vc4_flush(pctx); +} if (usage PIPE_TRANSFER_WRITE) rsc-writes++; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Reuse uniform_data/ contents indices when making uniforms.
Module: Mesa Branch: master Commit: 8c7ac377b7a859705479a0b421d1dacc53ca240a URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=8c7ac377b7a859705479a0b421d1dacc53ca240a Author: Eric Anholt e...@anholt.net Date: Fri Oct 24 17:16:59 2014 +0100 vc4: Reuse uniform_data/contents indices when making uniforms. This allows vc4_opt_cse.c to CSE-away operations involving the same uniform values. total instructions in shared programs: 37341 - 36906 (-1.16%) instructions in affected programs: 10233 - 9798 (-4.25%) total uniforms in shared programs: 10523 - 10320 (-1.93%) uniforms in affected programs: 2467 - 2264 (-8.23%) --- src/gallium/drivers/vc4/vc4_program.c |7 +++ 1 file changed, 7 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0674e4f..c6b7edb 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -114,6 +114,13 @@ add_uniform(struct vc4_compile *c, enum quniform_contents contents, uint32_t data) { +for (int i = 0; i c-num_uniforms; i++) { +if (c-uniform_contents[i] == contents +c-uniform_data[i] == data) { +return (struct qreg) { QFILE_UNIF, i }; +} +} + uint32_t uniform = c-num_uniforms++; struct qreg u = { QFILE_UNIF, uniform }; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Translate 4-byte index buffers to 2 bytes.
Module: Mesa Branch: master Commit: 6212d2402df4ad0658cbb98ce889e35ef5f32fa3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6212d2402df4ad0658cbb98ce889e35ef5f32fa3 Author: Eric Anholt e...@anholt.net Date: Sat Oct 18 12:50:05 2014 +0100 vc4: Translate 4-byte index buffers to 2 bytes. Fixes assertion failures in 14 piglit tests (half of which now pass). --- src/gallium/drivers/vc4/vc4_draw.c | 14 ++ src/gallium/drivers/vc4/vc4_resource.c | 46 src/gallium/drivers/vc4/vc4_resource.h | 17 +++- src/gallium/drivers/vc4/vc4_state.c| 25 ++--- 4 files changed, 92 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index f53caf7..e70506b 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -209,19 +209,23 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) */ if (info-indexed) { struct vc4_resource *rsc = vc4_resource(vc4-indexbuf.buffer); - -assert(vc4-indexbuf.index_size == 1 || - vc4-indexbuf.index_size == 2); +uint32_t offset = vc4-indexbuf.offset; +uint32_t index_size = vc4-indexbuf.index_size; +if (rsc-shadow_parent) { +vc4_update_shadow_index_buffer(pctx, vc4-indexbuf); +offset = 0; +index_size = 2; +} cl_start_reloc(vc4-bcl, 1); cl_u8(vc4-bcl, VC4_PACKET_GL_INDEXED_PRIMITIVE); cl_u8(vc4-bcl, info-mode | - (vc4-indexbuf.index_size == 2 ? + (index_size == 2 ? VC4_INDEX_BUFFER_U16: VC4_INDEX_BUFFER_U8)); cl_u32(vc4-bcl, info-count); -cl_reloc(vc4, vc4-bcl, rsc-bo, vc4-indexbuf.offset); +cl_reloc(vc4, vc4-bcl, rsc-bo, offset); cl_u32(vc4-bcl, max_index); } else { cl_u8(vc4-bcl, VC4_PACKET_GL_ARRAY_PRIMITIVE); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 803d357..c198ab9 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -512,6 +512,52 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, shadow-writes = orig-writes; } +/** + * Converts a 4-byte index buffer to 2 bytes. + * + * Since GLES2 only has support for 1 and 2-byte indices, the hardware doesn't + * include 4-byte index support, and we have to shrink it down. + * + * There's no fallback support for when indices end up being larger than 2^16, + * though it will at least assertion fail. Also, if the original index data + * was in user memory, it would be nice to not have uploaded it to a VBO + * before translating. + */ +void +vc4_update_shadow_index_buffer(struct pipe_context *pctx, + const struct pipe_index_buffer *ib) +{ +struct vc4_resource *shadow = vc4_resource(ib-buffer); +struct vc4_resource *orig = vc4_resource(shadow-shadow_parent); +uint32_t count = shadow-base.b.width0 / 2; + +if (shadow-writes == orig-writes) +return; + +struct pipe_transfer *src_transfer; +uint32_t *src = pipe_buffer_map_range(pctx, orig-base.b, + ib-offset, + count * 4, + PIPE_TRANSFER_READ, src_transfer); + +struct pipe_transfer *dst_transfer; +uint16_t *dst = pipe_buffer_map_range(pctx, shadow-base.b, + 0, + count * 2, + PIPE_TRANSFER_WRITE, dst_transfer); + +for (int i = 0; i count; i++) { +uint32_t src_index = src[i]; +assert(src_index = 0x); +dst[i] = src_index; +} + +pctx-transfer_unmap(pctx, dst_transfer); +pctx-transfer_unmap(pctx, src_transfer); + +shadow-writes = orig-writes; +} + void vc4_resource_screen_init(struct pipe_screen *pscreen) { diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index 7cc16a4..90b58e1 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -67,9 +67,22 @@ struct vc4_resource { * * This is used to track when we need to update this shadow resource * from its parent in the case of GL_TEXTURE_BASE_LEVEL (which we - * can't support in hardware). + * can't support in hardware) or GL_UNSIGNED_INTEGER index buffers
Mesa (master): vc4: Apply a Newton-Raphson step to improve RSQ
Module: Mesa Branch: master Commit: 15eb4c59f6504473989e6a064fda11d6c009ed8f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=15eb4c59f6504473989e6a064fda11d6c009ed8f Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 15:28:02 2014 +0100 vc4: Apply a Newton-Raphson step to improve RSQ Fixes all the piglit built-in-functions/*sqrt tests, among others. --- src/gallium/drivers/vc4/vc4_program.c | 22 -- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 0046b22..66dff97 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -313,6 +313,25 @@ tgsi_to_qir_rcp(struct vc4_compile *c, } static struct qreg +tgsi_to_qir_rsq(struct vc4_compile *c, +struct tgsi_full_instruction *tgsi_inst, +enum qop op, struct qreg *src, int i) +{ +struct qreg x = src[0 * 4 + 0]; +struct qreg r = qir_RSQ(c, x); + +/* Apply a Newton-Raphson step to improve the accuracy. */ +r = qir_FMUL(c, r, qir_FSUB(c, +qir_uniform_f(c, 1.5), +qir_FMUL(c, + qir_uniform_f(c, 0.5), + qir_FMUL(c, x, + qir_FMUL(c, r, r); + +return r; +} + +static struct qreg qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) { struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92)); @@ -1165,7 +1184,6 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv }, [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg }, -[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu }, [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq }, [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne }, [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge }, @@ -1182,7 +1200,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, [TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, -[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar }, +[TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_rsq }, [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LRP] = { 0, tgsi_to_qir_lrp }, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: correctly include the source files
Module: Mesa Branch: master Commit: 79d09a4b125e745cc89f9bca336619cbd44d9f95 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=79d09a4b125e745cc89f9bca336619cbd44d9f95 Author: Emil Velikov emil.l.veli...@gmail.com Date: Tue Oct 14 16:10:50 2014 +0100 vc4: correctly include the source files The kernel files are built into a separate static library and all the functions that require it are already wrapped in ifdef USE_VC4_SIMULATOR. Don't forget the header file :) Signed-off-by: Emil Velikov emil.l.veli...@gmail.com Reviewed-by: Eric Anholt e...@anholt.net --- src/gallium/drivers/vc4/Makefile.sources|3 --- src/gallium/drivers/vc4/kernel/Makefile.sources |1 + 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources index 1befb9f..2336565 100644 --- a/src/gallium/drivers/vc4/Makefile.sources +++ b/src/gallium/drivers/vc4/Makefile.sources @@ -36,7 +36,4 @@ C_SOURCES := \ vc4_state.c \ vc4_tiling.c \ vc4_tiling.h \ - kernel/vc4_gem.c \ - kernel/vc4_validate.c \ - kernel/vc4_validate_shaders.c \ $() diff --git a/src/gallium/drivers/vc4/kernel/Makefile.sources b/src/gallium/drivers/vc4/kernel/Makefile.sources index d1d70dd..7d17a89 100644 --- a/src/gallium/drivers/vc4/kernel/Makefile.sources +++ b/src/gallium/drivers/vc4/kernel/Makefile.sources @@ -1,4 +1,5 @@ C_SOURCES := \ + vc4_drv.h \ vc4_gem.c \ vc4_validate.c \ vc4_validate_shaders.c \ ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don' t look at back stencil state unless two-sided stencil is enabled.
Module: Mesa Branch: master Commit: 48f6351940ff62c29fff618cec722e845acc86d5 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=48f6351940ff62c29fff618cec722e845acc86d5 Author: Eric Anholt e...@anholt.net Date: Mon Oct 20 22:53:07 2014 +0100 vc4: Don't look at back stencil state unless two-sided stencil is enabled. Fixes regressions in the next bugfix, because gallium util stuff leaves the back stencil state as 0 if !back-enabled. --- src/gallium/drivers/vc4/vc4_state.c |8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 31f2424..99b5b3c 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -203,12 +203,16 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx, uint8_t front_writemask_bits = tlb_stencil_setup_writemask(front-writemask); -uint8_t back_writemask_bits = -tlb_stencil_setup_writemask(back-writemask); +uint8_t back_writemask = front-writemask; +uint8_t back_writemask_bits = front_writemask_bits; so-stencil_uniforms[0] = tlb_stencil_setup_bits(front, front_writemask_bits); if (back-enabled) { +back_writemask = back-writemask; +back_writemask_bits = +tlb_stencil_setup_writemask(back-writemask); + so-stencil_uniforms[0] |= (1 30); so-stencil_uniforms[1] = tlb_stencil_setup_bits(back, back_writemask_bits); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix stencil writemask handling.
Module: Mesa Branch: master Commit: cc298023c9b302a7a24ee01fe2da1c93f5b44666 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc298023c9b302a7a24ee01fe2da1c93f5b44666 Author: Eric Anholt e...@anholt.net Date: Mon Oct 20 21:14:57 2014 +0100 vc4: Fix stencil writemask handling. If the writemask doesn't compress, then we want to put in the uncompressed writemask, not the compressed writemask failure value (all-on). Fixes glean's stencil2 and fbo-clear-formats on stencil. --- src/gallium/drivers/vc4/vc4_state.c |4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 99b5b3c..73d 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -223,8 +223,8 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx, if (front_writemask_bits == 0xff || back_writemask_bits == 0xff) { -so-stencil_uniforms[2] = (front_writemask_bits | - (back_writemask_bits 8)); +so-stencil_uniforms[2] = (front-writemask | + (back_writemask 8)); } } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix SRC_ALPHA_SATURATE blending.
Module: Mesa Branch: master Commit: ef280c95f2623357452f5ca8e65367b7ac30699f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=ef280c95f2623357452f5ca8e65367b7ac30699f Author: Eric Anholt e...@anholt.net Date: Tue Oct 21 15:46:48 2014 +0100 vc4: Fix SRC_ALPHA_SATURATE blending. Fixes glean blendFunc. --- src/gallium/drivers/vc4/vc4_program.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 7a2a975..c6d9fb3 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1308,9 +1308,17 @@ vc4_blend_channel(struct vc4_compile *c, case PIPE_BLENDFACTOR_DST_COLOR: return qir_FMUL(c, val, dst[channel]); case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: -return qir_FMIN(c, src[3], qir_FSUB(c, -qir_uniform_f(c, 1.0), -dst[3])); +if (channel != 3) { +return qir_FMUL(c, +val, +qir_FMIN(c, + src[3], + qir_FSUB(c, + qir_uniform_f(c, 1.0), + dst[3]))); +} else { +return val; +} case PIPE_BLENDFACTOR_CONST_COLOR: return qir_FMUL(c, val, get_temp_for_uniform(c, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add support for rebasing texture levels so firstlevel == 0.
Module: Mesa Branch: master Commit: 572fba95e4cd85aebdde9bd757c17f719af2af04 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=572fba95e4cd85aebdde9bd757c17f719af2af04 Author: Eric Anholt e...@anholt.net Date: Thu Oct 2 22:14:03 2014 -0700 vc4: Add support for rebasing texture levels so firstlevel == 0. GLES2 doesn't have GL_TEXTURE_BASE_LEVEL, so the hardware doesn't. Fixes piglit levelclamp, tex-miplevel-selection, and texture-storage/2D mipmap rendering. --- src/gallium/drivers/vc4/vc4_context.c |6 ++ src/gallium/drivers/vc4/vc4_program.c |3 ++- src/gallium/drivers/vc4/vc4_resource.c | 37 +++- src/gallium/drivers/vc4/vc4_resource.h | 14 src/gallium/drivers/vc4/vc4_state.c| 26 +- 5 files changed, 83 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index cc57486..87f0251 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -253,6 +253,12 @@ vc4_setup_rcl(struct vc4_context *vc4) assert(!coords_emitted); } } + +if (vc4-resolve PIPE_CLEAR_COLOR0) +ctex-writes++; + +if (vc4-resolve (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) +ztex-writes++; } void diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 66dff97..7a2a975 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2265,7 +2265,8 @@ write_texture_p0(struct vc4_context *vc4, cl_reloc(vc4, vc4-uniforms, rsc-bo, VC4_SET_FIELD(rsc-slices[0].offset 12, VC4_TEX_P0_OFFSET) | - VC4_SET_FIELD(texture-u.tex.last_level, VC4_TEX_P0_MIPLVLS) | + VC4_SET_FIELD(texture-u.tex.last_level - + texture-u.tex.first_level, VC4_TEX_P0_MIPLVLS) | VC4_SET_FIELD(texture-target == PIPE_TEXTURE_CUBE, VC4_TEX_P0_CMMODE) | VC4_SET_FIELD(rsc-vc4_format 7, VC4_TEX_P0_TYPE)); diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c index 7006af3..803d357 100644 --- a/src/gallium/drivers/vc4/vc4_resource.c +++ b/src/gallium/drivers/vc4/vc4_resource.c @@ -83,6 +83,9 @@ vc4_resource_transfer_map(struct pipe_context *pctx, if (!(usage PIPE_TRANSFER_UNSYNCHRONIZED)) vc4_flush_for_bo(pctx, rsc-bo); +if (usage PIPE_TRANSFER_WRITE) +rsc-writes++; + trans = util_slab_alloc(vc4-transfer_pool); if (!trans) return NULL; @@ -168,6 +171,7 @@ vc4_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc) { struct vc4_resource *rsc = vc4_resource(prsc); +pipe_resource_reference(rsc-shadow_parent, NULL); vc4_bo_unreference(rsc-bo); free(rsc); } @@ -297,7 +301,7 @@ get_resource_texture_format(struct pipe_resource *prsc) return format; } -static struct pipe_resource * +struct pipe_resource * vc4_resource_create(struct pipe_screen *pscreen, const struct pipe_resource *tmpl) { @@ -478,6 +482,37 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) } void +vc4_update_shadow_baselevel_texture(struct pipe_context *pctx, +struct pipe_sampler_view *view) +{ +struct vc4_resource *shadow = vc4_resource(view-texture); +struct vc4_resource *orig = vc4_resource(shadow-shadow_parent); +assert(orig); + +if (shadow-writes == orig-writes) +return; + +for (int i = 0; i = shadow-base.b.last_level; i++) { +struct pipe_box box = { +.x = 0, +.y = 0, +.z = 0, +.width = u_minify(shadow-base.b.width0, i), +.height = u_minify(shadow-base.b.height0, i), +.depth = 1, +}; + +util_resource_copy_region(pctx, + shadow-base.b, i, 0, 0, 0, + orig-base.b, + view-u.tex.first_level + i, + box); +} + +shadow-writes = orig-writes; +} + +void vc4_resource_screen_init(struct pipe_screen *pscreen) { pscreen-resource_create = vc4_resource_create; diff --git a/src/gallium/drivers/vc4/vc4_resource.h b/src/gallium/drivers/vc4/vc4_resource.h index 01f481d..7cc16a4 100644 --- a/src/gallium/drivers/vc4/vc4_resource.h +++ b/src/gallium/drivers/vc4/vc4_resource.h @@ -61,6 +61,16 @@ struct vc4_resource { bool tiled; /** One
Mesa (master): vc4: Apply a Newton-Raphson step to improve RCP.
Module: Mesa Branch: master Commit: 1fc124b80f228319ded06f80a51681c75dc0a4f3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1fc124b80f228319ded06f80a51681c75dc0a4f3 Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 14:01:15 2014 +0100 vc4: Apply a Newton-Raphson step to improve RCP. Fixes all the piglit floating-point *-op-div tests, among others. --- src/gallium/drivers/vc4/vc4_program.c | 18 +- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 70a2b86..0046b22 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -297,6 +297,22 @@ tgsi_to_qir_scalar(struct vc4_compile *c, } static struct qreg +tgsi_to_qir_rcp(struct vc4_compile *c, +struct tgsi_full_instruction *tgsi_inst, +enum qop op, struct qreg *src, int i) +{ +struct qreg x = src[0 * 4 + 0]; +struct qreg r = qir_RCP(c, x); + +/* Apply a Newton-Raphson step to improve the accuracy. */ +r = qir_FMUL(c, r, qir_FSUB(c, +qir_uniform_f(c, 2.0), +qir_FMUL(c, x, r))); + +return r; +} + +static struct qreg qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) { struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92)); @@ -1165,7 +1181,7 @@ emit_tgsi_instruction(struct vc4_compile *c, [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp }, [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad }, -[TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_scalar }, +[TGSI_OPCODE_RCP] = { QOP_RCP, tgsi_to_qir_rcp }, [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_scalar }, [TGSI_OPCODE_EX2] = { QOP_EXP2, tgsi_to_qir_scalar }, [TGSI_OPCODE_LG2] = { QOP_LOG2, tgsi_to_qir_scalar }, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add a little bit more packet parsing to make dump reading easier.
Module: Mesa Branch: master Commit: 0fdc5111b4e659de8258ae8f3eb8e33ef466beb3 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=0fdc5111b4e659de8258ae8f3eb8e33ef466beb3 Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 15:04:27 2014 +0100 vc4: Add a little bit more packet parsing to make dump reading easier. Probably should have done this *before* staring at all those render lists today. --- src/gallium/drivers/vc4/vc4_cl_dump.c | 133 - 1 file changed, 114 insertions(+), 19 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c index a55c04f..6c38021 100644 --- a/src/gallium/drivers/vc4/vc4_cl_dump.c +++ b/src/gallium/drivers/vc4/vc4_cl_dump.c @@ -25,11 +25,101 @@ #include util/macros.h #include vc4_context.h -#define PACKET(name, size) [name] = { #name, size } +#define dump_VC4_PACKET_LINE_WIDTH dump_float +#define dump_VC4_PACKET_POINT_SIZE dump_float + +static void +dump_float(void *cl, uint32_t offset, uint32_t hw_offset) +{ +void *f = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: %f (0x%08x)\n, +offset, hw_offset, *(float *)f, *(uint32_t *)f); +} + +static void +dump_VC4_PACKET_BRANCH_TO_SUB_LIST(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint32_t *addr = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: addr 0x%08x\n, +offset, hw_offset, *addr); +} + +static void +dump_VC4_PACKET_FLAT_SHADE_FLAGS(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint32_t *bits = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: bits 0x%08x\n, +offset, hw_offset, *bits); +} + +static void +dump_VC4_PACKET_VIEWPORT_OFFSET(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint16_t *o = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: %f, %f (0x%04x, 0x%04x)\n, +offset, hw_offset, +o[0] / 16.0, o[1] / 16.0, +o[0], o[1]); +} + +static void +dump_VC4_PACKET_CLIPPER_XY_SCALING(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint32_t *scale = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: %f, %f (%f, %f, 0x%08x, 0x%08x)\n, +offset, hw_offset, +uif(scale[0]) / 16.0, uif(scale[1]) / 16.0, +uif(scale[0]), uif(scale[1]), +scale[0], scale[1]); +} + +static void +dump_VC4_PACKET_CLIPPER_Z_SCALING(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint32_t *translate = cl + offset; +uint32_t *scale = cl + offset + 8; + +fprintf(stderr, 0x%08x 0x%08x: %f, %f (0x%08x, 0x%08x)\n, +offset, hw_offset, +uif(translate[0]), uif(translate[1]), +translate[0], translate[1]); + +fprintf(stderr, 0x%08x 0x%08x: %f, %f (0x%08x, 0x%08x)\n, +offset + 8, hw_offset + 8, +uif(scale[0]), uif(scale[1]), +scale[0], scale[1]); +} + +static void +dump_VC4_PACKET_TILE_COORDINATES(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint8_t *tilecoords = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: %d, %d\n, +offset, hw_offset, tilecoords[0], tilecoords[1]); +} + +static void +dump_VC4_PACKET_GEM_HANDLES(void *cl, uint32_t offset, uint32_t hw_offset) +{ +uint32_t *handles = cl + offset; + +fprintf(stderr, 0x%08x 0x%08x: handle 0: %d, handle 1: %d\n, +offset, hw_offset, handles[0], handles[1]); +} + +#define PACKET_DUMP(name, size) [name] = { #name, size, dump_##name } +#define PACKET(name, size) [name] = { #name, size, NULL } static const struct packet_info { const char *name; uint8_t size; +void (*dump_func)(void *cl, uint32_t offset, uint32_t hw_offset); } packet_info[] = { PACKET(VC4_PACKET_HALT, 1), PACKET(VC4_PACKET_NOP, 1), @@ -41,7 +131,7 @@ static const struct packet_info { PACKET(VC4_PACKET_WAIT_ON_SEMAPHORE, 1), PACKET(VC4_PACKET_BRANCH, 5), -PACKET(VC4_PACKET_BRANCH_TO_SUB_LIST, 5), +PACKET_DUMP(VC4_PACKET_BRANCH_TO_SUB_LIST, 5), PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER, 1), PACKET(VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF, 1), @@ -63,23 +153,23 @@ static const struct packet_info { PACKET(VC4_PACKET_VG_SHADER_STATE, 5), PACKET(VC4_PACKET_CONFIGURATION_BITS, 4), -PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, 5), -PACKET(VC4_PACKET_POINT_SIZE, 5), -PACKET(VC4_PACKET_LINE_WIDTH, 5), +PACKET_DUMP(VC4_PACKET_FLAT_SHADE_FLAGS, 5), +PACKET_DUMP(VC4_PACKET_POINT_SIZE, 5), +PACKET_DUMP(VC4_PACKET_LINE_WIDTH, 5), PACKET(VC4_PACKET_RHT_X_BOUNDARY, 3), PACKET(VC4_PACKET_DEPTH_OFFSET, 5), PACKET(VC4_PACKET_CLIP_WINDOW, 9), -PACKET(VC4_PACKET_VIEWPORT_OFFSET, 5), +PACKET_DUMP
Mesa (master): vc4: Add some comments about state management.
Module: Mesa Branch: master Commit: 000976ed9926266b52827108c666d4c05e4f4a35 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=000976ed9926266b52827108c666d4c05e4f4a35 Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 09:40:12 2014 +0100 vc4: Add some comments about state management. --- src/gallium/drivers/vc4/vc4_context.c |6 ++ src/gallium/drivers/vc4/vc4_draw.c|5 + 2 files changed, 11 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index bcbb004..205f81d 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -319,6 +319,12 @@ vc4_flush(struct pipe_context *pctx) vc4-needs_flush = false; vc4-draw_call_queued = false; + +/* We have no hardware context saved between our draw calls, so we + * need to flag the next draw as needing all state emitted. Emitting + * all state at the start of our draws is also what ensures that we + * return to the state we need after a previous tile has finished. + */ vc4-dirty = ~0; vc4-resolve = 0; vc4-cleared = 0; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 0938a76..f3c225d 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -78,6 +78,11 @@ vc4_start_draw(struct vc4_context *vc4) VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_32 | VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32); +/* START_TILE_BINNING resets the statechange counters in the hardware, + * which are what is used when a primitive is binned to a tile to + * figure out what new state packets need to be written to that tile's + * command list. + */ cl_u8(vc4-bcl, VC4_PACKET_START_TILE_BINNING); vc4-needs_flush = true; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Set the primitive list format at the start of rendering.
Module: Mesa Branch: master Commit: afc3aa373d45775d08babffa49b566f952689efc URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=afc3aa373d45775d08babffa49b566f952689efc Author: Eric Anholt e...@anholt.net Date: Thu Oct 16 10:17:57 2014 +0100 vc4: Set the primitive list format at the start of rendering. The other driver does this manually before calling into each tile, but we can just let it get binned into the tiles (saving repeated kernel validation on the packet). Fixes simulator assertion failures on polygon-mode and non-auto texwrap. --- src/gallium/drivers/vc4/vc4_draw.c |9 + src/gallium/drivers/vc4/vc4_packet.h |6 ++ 2 files changed, 15 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index f3c225d..f53caf7 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -85,6 +85,15 @@ vc4_start_draw(struct vc4_context *vc4) */ cl_u8(vc4-bcl, VC4_PACKET_START_TILE_BINNING); +/* Reset the current compressed primitives format. This gets modified + * by VC4_PACKET_GL_INDEXED_PRIMITIVE and + * VC4_PACKET_GL_ARRAY_PRIMITIVE, so it needs to be reset at the start + * of every tile. + */ +cl_u8(vc4-bcl, VC4_PACKET_PRIMITIVE_LIST_FORMAT); +cl_u8(vc4-bcl, (VC4_PRIMITIVE_LIST_FORMAT_16_INDEX | + VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES)); + vc4-needs_flush = true; vc4-draw_call_queued = true; } diff --git a/src/gallium/drivers/vc4/vc4_packet.h b/src/gallium/drivers/vc4/vc4_packet.h index e9abfd1..181f2e0 100644 --- a/src/gallium/drivers/vc4/vc4_packet.h +++ b/src/gallium/drivers/vc4/vc4_packet.h @@ -234,6 +234,12 @@ enum vc4_packet { #define VC4_RENDER_CONFIG_TILE_BUFFER_64BIT(1 1) #define VC4_RENDER_CONFIG_MS_MODE_4X (1 0) +#define VC4_PRIMITIVE_LIST_FORMAT_16_INDEX (1 4) +#define VC4_PRIMITIVE_LIST_FORMAT_32_XY(3 4) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_POINTS (0 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_LINES (1 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_TRIANGLES (2 0) +#define VC4_PRIMITIVE_LIST_FORMAT_TYPE_RHT (3 0) enum vc4_texture_data_type { VC4_TEXTURE_TYPE_RGBA = 0, ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Make sure there' s exactly 1 tile store per tile coords packet.
Module: Mesa Branch: master Commit: 135287db175de9496b76f8edce04871ca6444d72 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=135287db175de9496b76f8edce04871ca6444d72 Author: Eric Anholt e...@anholt.net Date: Thu Oct 16 10:42:04 2014 +0100 vc4: Make sure there's exactly 1 tile store per tile coords packet. It's not documented that I can see, but the other driver does it (check vg_hw_4.c), and one of the HW guys confirmed that you really do need to do it. --- src/gallium/drivers/vc4/vc4_context.c | 79 ++--- 1 file changed, 64 insertions(+), 15 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 72ac344..bcbb004 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -36,6 +36,49 @@ #include vc4_context.h #include vc4_resource.h +/** + * Emits a no-op STORE_TILE_BUFFER_GENERAL. + * + * If we emit a PACKET_TILE_COORDINATES, it must be followed by a store of + * some sort before another load is triggered. + */ +static void +vc4_store_before_load(struct vc4_context *vc4, bool *coords_emitted) +{ +if (!*coords_emitted) +return; + +cl_u8(vc4-rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL); +cl_u8(vc4-rcl, VC4_LOADSTORE_TILE_BUFFER_NONE); +cl_u8(vc4-rcl, (VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_ZS_CLEAR | + VC4_STORE_TILE_BUFFER_DISABLE_VG_MASK_CLEAR)); +cl_u32(vc4-rcl, 0); /* no address, since we're in None mode */ + +*coords_emitted = false; +} + +/** + * Emits a PACKET_TILE_COORDINATES if one isn't already pending. + * + * The tile coordinates packet triggers a pending load if there is one, are + * used for clipping during rendering, and determine where loads/stores happen + * relative to their base address. + */ +static void +vc4_tile_coordinates(struct vc4_context *vc4, uint32_t x, uint32_t y, + bool *coords_emitted) +{ +if (*coords_emitted) +return; + +cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES); +cl_u8(vc4-rcl, x); +cl_u8(vc4-rcl, y); + +*coords_emitted = true; +} + static void vc4_setup_rcl(struct vc4_context *vc4) { @@ -116,9 +159,12 @@ vc4_setup_rcl(struct vc4_context *vc4) bool coords_emitted = false; /* Note that the load doesn't actually occur until the - * tile coords packet is processed. + * tile coords packet is processed, and only one load + * may be outstanding at a time. */ if (resolve_uncleared PIPE_CLEAR_COLOR) { +vc4_store_before_load(vc4, coords_emitted); + cl_start_reloc(vc4-rcl, 1); cl_u8(vc4-rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); cl_u8(vc4-rcl, @@ -132,13 +178,12 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_reloc(vc4, vc4-rcl, ctex-bo, csurf-offset); -cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES); -cl_u8(vc4-rcl, x); -cl_u8(vc4-rcl, y); -coords_emitted = true; +vc4_tile_coordinates(vc4, x, y, coords_emitted); } if (resolve_uncleared (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { +vc4_store_before_load(vc4, coords_emitted); + cl_start_reloc(vc4-rcl, 1); cl_u8(vc4-rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL); cl_u8(vc4-rcl, @@ -149,21 +194,14 @@ vc4_setup_rcl(struct vc4_context *vc4) cl_reloc(vc4, vc4-rcl, ztex-bo, zsurf-offset); -cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES); -cl_u8(vc4-rcl, x); -cl_u8(vc4-rcl, y); -coords_emitted = true; +vc4_tile_coordinates(vc4, x, y, coords_emitted); } /* Clipping depends on tile coordinates having been * emitted, so make sure it's happened even if * everything was cleared to start. */ -if (!coords_emitted) { -cl_u8(vc4-rcl, VC4_PACKET_TILE_COORDINATES); -cl_u8(vc4-rcl, x); -cl_u8(vc4-rcl
Mesa (master): vc4: Replace the FLUSH_ALL with FLUSH.
Module: Mesa Branch: master Commit: 895c90410314103814ca4d2684f94463bd8f243f URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=895c90410314103814ca4d2684f94463bd8f243f Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 09:42:35 2014 +0100 vc4: Replace the FLUSH_ALL with FLUSH. We don't need to emit all of our current state at the end of each bin list. We're going to be smashing it all at the start of the next tile's bin list, anyway. --- src/gallium/drivers/vc4/vc4_context.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 205f81d..7779c46 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -263,7 +263,9 @@ vc4_flush(struct pipe_context *pctx) if (!vc4-needs_flush) return; -cl_u8(vc4-bcl, VC4_PACKET_FLUSH_ALL); +/* The FLUSH caps all of our bin lists with a VC4_PACKET_RETURN. */ +cl_u8(vc4-bcl, VC4_PACKET_FLUSH); + cl_u8(vc4-bcl, VC4_PACKET_NOP); cl_u8(vc4-bcl, VC4_PACKET_HALT); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Make some assertions about how many flushes/ EOFs the simulator sees.
Module: Mesa Branch: master Commit: 9ebfb3014ea6d2bde9d4322f2537f21be97b280b URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=9ebfb3014ea6d2bde9d4322f2537f21be97b280b Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 09:43:54 2014 +0100 vc4: Make some assertions about how many flushes/EOFs the simulator sees. This caught the previous commit's bug in the kernel validator. --- src/gallium/drivers/vc4/vc4_cl.h|2 +- src/gallium/drivers/vc4/vc4_cl_dump.c |8 src/gallium/drivers/vc4/vc4_context.c |4 ++-- src/gallium/drivers/vc4/vc4_simulator.c | 21 +++-- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 2cdd77d..634a4b0 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -43,7 +43,7 @@ struct vc4_cl { void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl); void vc4_grow_cl(struct vc4_cl *cl); void vc4_reset_cl(struct vc4_cl *cl); -void vc4_dump_cl(struct vc4_cl *cl, bool is_render); +void vc4_dump_cl(void *cl, uint32_t size, bool is_render); uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo); static inline void diff --git a/src/gallium/drivers/vc4/vc4_cl_dump.c b/src/gallium/drivers/vc4/vc4_cl_dump.c index 40bcf01..a55c04f 100644 --- a/src/gallium/drivers/vc4/vc4_cl_dump.c +++ b/src/gallium/drivers/vc4/vc4_cl_dump.c @@ -83,12 +83,12 @@ static const struct packet_info { }; void -vc4_dump_cl(struct vc4_cl *cl, bool is_render) +vc4_dump_cl(void *cl, uint32_t size, bool is_render) { uint32_t offset = 0, hw_offset = 0; -uint8_t *cmds = cl-base; +uint8_t *cmds = cl; -while (offset cl-end - cl-base) { +while (offset size) { uint8_t header = cmds[offset]; if (header ARRAY_SIZE(packet_info) || @@ -105,7 +105,7 @@ vc4_dump_cl(struct vc4_cl *cl, bool is_render) header, p-name); for (uint32_t i = 1; i p-size; i++) { -if (offset + i = cl-end - cl-base) { +if (offset + i = size) { fprintf(stderr, 0x%08x 0x%08x: CL overflow!\n, offset + i, hw_offset + i); return; diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c index 7779c46..cc57486 100644 --- a/src/gallium/drivers/vc4/vc4_context.c +++ b/src/gallium/drivers/vc4/vc4_context.c @@ -273,9 +273,9 @@ vc4_flush(struct pipe_context *pctx) if (vc4_debug VC4_DEBUG_CL) { fprintf(stderr, BCL:\n); -vc4_dump_cl(vc4-bcl, false); +vc4_dump_cl(vc4-bcl.base, vc4-bcl.end - vc4-bcl.base, false); fprintf(stderr, RCL:\n); -vc4_dump_cl(vc4-rcl, true); +vc4_dump_cl(vc4-rcl.base, vc4-rcl.end - vc4-rcl.base, true); } struct drm_vc4_submit_cl submit; diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c index 34262f5..1040ae8 100644 --- a/src/gallium/drivers/vc4/vc4_simulator.c +++ b/src/gallium/drivers/vc4/vc4_simulator.c @@ -108,6 +108,7 @@ vc4_simulator_unpin_bos(struct exec_info *exec) int vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) { +struct vc4_screen *screen = vc4-screen; struct vc4_surface *csurf = vc4_surface(vc4-framebuffer.cbufs[0]); struct vc4_resource *ctex = csurf ? vc4_resource(csurf-base.texture) : NULL; uint32_t winsys_stride = ctex ? ctex-bo-simulator_winsys_stride : 0; @@ -149,8 +150,24 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args) if (ret) return ret; -simpenrose_do_binning(exec.ct0ca, exec.ct0ea); -simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); +int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea); +if (bfc != 1) { +fprintf(stderr, Binning returned %d flushes, should be 1.\n, +bfc); +fprintf(stderr, Relocated binning command list:\n); +vc4_dump_cl(screen-simulator_mem_base + exec.ct0ca, +exec.ct0ea - exec.ct0ca, false); +abort(); +} +int rfc = simpenrose_do_rendering(exec.ct1ca, exec.ct1ea); +if (rfc != 1) { +fprintf(stderr, Rendering returned %d frames, should be 1.\n, +rfc); +fprintf(stderr, Relocated render command list:\n); +vc4_dump_cl(screen-simulator_mem_base + exec.ct1ca, +exec.ct1ea - exec.ct1ca, true); +abort(); +} ret = vc4_simulator_unpin_bos(exec); if (ret
Mesa (master): vc4: Fix accidental dropping of the low bits of the store tilebuffer packet.
Module: Mesa Branch: master Commit: 1f7048419ed6ad4d25e89efa885fdc58d36c4213 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1f7048419ed6ad4d25e89efa885fdc58d36c4213 Author: Eric Anholt e...@anholt.net Date: Fri Oct 17 12:14:11 2014 +0100 vc4: Fix accidental dropping of the low bits of the store tilebuffer packet. Notably this included the EOF flag (the other bits are the full buffer dump selection, but we don't do full dumps), which caused the kernel checking for frame completion to trigger. --- src/gallium/drivers/vc4/kernel/vc4_validate.c |8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/kernel/vc4_validate.c b/src/gallium/drivers/vc4/kernel/vc4_validate.c index fe2cd44..86b8fa5 100644 --- a/src/gallium/drivers/vc4/kernel/vc4_validate.c +++ b/src/gallium/drivers/vc4/kernel/vc4_validate.c @@ -268,7 +268,7 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) uint32_t packet_b1 = *(uint8_t *)(untrusted + 1); struct drm_gem_cma_object *fbo; uint32_t buffer_type = packet_b0 0xf; - uint32_t offset, cpp; + uint32_t untrusted_address, offset, cpp; switch (buffer_type) { case VC4_LOADSTORE_TILE_BUFFER_NONE: @@ -295,7 +295,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) if (!vc4_use_handle(exec, 0, VC4_MODE_RENDER, fbo)) return -EINVAL; - offset = *(uint32_t *)(untrusted + 2) ~0xf; + untrusted_address = *(uint32_t *)(untrusted + 2); + offset = untrusted_address ~0xf; if (!check_tex_size(exec, fbo, offset, ((packet_b0 @@ -305,7 +306,8 @@ validate_loadstore_tile_buffer_general(VALIDATE_ARGS) return -EINVAL; } - *(uint32_t *)(validated + 2) = offset + fbo-paddr; + *(uint32_t *)(validated + 2) = (offset + fbo-paddr + + (untrusted_address 0xf)); return 0; } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Fix the uniform debug output.
Module: Mesa Branch: master Commit: 57de9bbb632ed7639d07d37965dcee5a1fe3bf30 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=57de9bbb632ed7639d07d37965dcee5a1fe3bf30 Author: Eric Anholt e...@anholt.net Date: Wed Oct 15 16:16:09 2014 +0100 vc4: Fix the uniform debug output. I dropped the shader index when moving to the compiled shader struct, but didn't update the format string here. --- src/gallium/drivers/vc4/vc4_program.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index f4b723a..70a2b86 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -2466,7 +2466,7 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, } #if 0 uint32_t written_val = *(uint32_t *)(vc4-uniforms.next - 4); -fprintf(stderr, %p/%d: %d: 0x%08x (%f)\n, +fprintf(stderr, %p: %d / 0x%08x (%f)\n, shader, i, written_val, uif(written_val)); #endif } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add support for user clip plane and gl_ClipVertex.
Module: Mesa Branch: master Commit: 201d4c0b2a6f7f0c1d59c4fd5cce4916fc48a2d2 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=201d4c0b2a6f7f0c1d59c4fd5cce4916fc48a2d2 Author: Eric Anholt e...@anholt.net Date: Wed Oct 15 15:25:57 2014 +0100 vc4: Add support for user clip plane and gl_ClipVertex. Fixes about 15 piglit tests about interpolation and clipping. --- src/gallium/drivers/vc4/vc4_context.h |2 + src/gallium/drivers/vc4/vc4_program.c | 84 +++-- src/gallium/drivers/vc4/vc4_qir.c |2 + src/gallium/drivers/vc4/vc4_qir.h |3 ++ src/gallium/drivers/vc4/vc4_state.c |4 +- 5 files changed, 91 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 56cfc7b..45dfa02 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -58,6 +58,7 @@ #define VC4_DIRTY_SCISSOR (1 17) #define VC4_DIRTY_FLAT_SHADE_FLAGS (1 18) #define VC4_DIRTY_PRIM_MODE (1 19) +#define VC4_DIRTY_CLIP (1 20) #define VC4_SHADER_DIRTY_VP (1 0) #define VC4_SHADER_DIRTY_FP (1 1) @@ -207,6 +208,7 @@ struct vc4_context { unsigned sample_mask; struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple stipple; +struct pipe_clip_state clip; struct pipe_viewport_state viewport; struct vc4_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct vc4_vertexbuf_stateobj vertexbuf; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index a79e354..f4b723a 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -53,6 +53,7 @@ struct vc4_key { unsigned wrap_t:3; uint8_t swizzle[4]; } tex[VC4_MAX_TEXTURE_SAMPLERS]; +uint8_t ucp_enables; }; struct vc4_fs_key { @@ -1097,6 +1098,9 @@ emit_tgsi_declaration(struct vc4_compile *c, case TGSI_SEMANTIC_POSITION: c-output_position_index = decl-Range.First * 4; break; +case TGSI_SEMANTIC_CLIPVERTEX: +c-output_clipvertex_index = decl-Range.First * 4; +break; case TGSI_SEMANTIC_COLOR: c-output_color_index = decl-Range.First * 4; break; @@ -1398,6 +1402,28 @@ vc4_blend(struct vc4_compile *c, struct qreg *result, } static void +clip_distance_discard(struct vc4_compile *c) +{ +for (int i = 0; i PIPE_MAX_CLIP_PLANES; i++) { +if (!(c-key-ucp_enables (1 i))) +continue; + +struct qreg dist = emit_fragment_varying(c, + TGSI_SEMANTIC_CLIPDIST, + i, + TGSI_SWIZZLE_X); + +qir_SF(c, dist); + +if (c-discard.file == QFILE_NULL) +c-discard = qir_uniform_f(c, 0.0); + +c-discard = qir_SEL_X_Y_NS(c, qir_uniform_f(c, 1.0), +c-discard); +} +} + +static void alpha_test_discard(struct vc4_compile *c) { struct qreg src_alpha; @@ -1456,6 +1482,7 @@ alpha_test_discard(struct vc4_compile *c) static void emit_frag_end(struct vc4_compile *c) { +clip_distance_discard(c); alpha_test_discard(c); enum pipe_format color_format = c-fs_key-color_format; @@ -1655,6 +1682,45 @@ emit_stub_vpm_read(struct vc4_compile *c) } static void +emit_ucp_clipdistance(struct vc4_compile *c) +{ +struct qreg *clipvertex; + +if (c-output_clipvertex_index != -1) +clipvertex = c-outputs[c-output_clipvertex_index]; +else if (c-output_position_index != -1) +clipvertex = c-outputs[c-output_position_index]; +else +return; + +for (int plane = 0; plane PIPE_MAX_CLIP_PLANES; plane++) { +if (!(c-key-ucp_enables (1 plane))) +continue; + +/* Pick the next outputs[] that hasn't been written to, since + * there are no other program writes left to be processed at + * this point. If something had been declared but not written + * (like a w component), we'll just smash over the top of it. + */ +uint32_t output_index = c-num_outputs++; +add_output(c, output_index, + TGSI_SEMANTIC_CLIPDIST, + plane, + TGSI_SWIZZLE_X); + +struct qreg dist = qir_uniform_f(c, 0.0); +for (int i = 0; i 4; i++) { +struct
Mesa (master): vc4: Move the output semantics setup to a helper.
Module: Mesa Branch: master Commit: 6a0bf67048d508f907db6bb05e5e367308c21511 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6a0bf67048d508f907db6bb05e5e367308c21511 Author: Eric Anholt e...@anholt.net Date: Wed Oct 15 16:39:54 2014 +0100 vc4: Move the output semantics setup to a helper. I want to reuse it elsewhere to set up outputs that aren't in the TGSI. --- src/gallium/drivers/vc4/vc4_program.c | 44 + 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index c603425..a79e354 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1021,6 +1021,29 @@ emit_face_input(struct vc4_compile *c, int attr) } static void +add_output(struct vc4_compile *c, + uint32_t decl_offset, + uint8_t semantic_name, + uint8_t semantic_index, + uint8_t semantic_swizzle) +{ +uint32_t old_array_size = c-outputs_array_size; +resize_qreg_array(c, c-outputs, c-outputs_array_size, + decl_offset + 1); + +if (old_array_size != c-outputs_array_size) { +c-output_semantics = reralloc(c, + c-output_semantics, + struct vc4_varying_semantic, + c-outputs_array_size); +} + +c-output_semantics[decl_offset].semantic = semantic_name; +c-output_semantics[decl_offset].index = semantic_index; +c-output_semantics[decl_offset].swizzle = semantic_swizzle; +} + +static void emit_tgsi_declaration(struct vc4_compile *c, struct tgsi_full_declaration *decl) { @@ -1062,23 +1085,12 @@ emit_tgsi_declaration(struct vc4_compile *c, break; case TGSI_FILE_OUTPUT: { -uint32_t old_array_size = c-outputs_array_size; -resize_qreg_array(c, c-outputs, c-outputs_array_size, - (decl-Range.Last + 1) * 4); - -if (old_array_size != c-outputs_array_size) { -c-output_semantics = reralloc(c, - c-output_semantics, - struct vc4_varying_semantic, - c-outputs_array_size); -} - -struct vc4_varying_semantic *sem = -c-output_semantics[decl-Range.First * 4]; for (int i = 0; i 4; i++) { -sem[i].semantic = decl-Semantic.Name; -sem[i].index = decl-Semantic.Index; -sem[i].swizzle = i; +add_output(c, + decl-Range.First * 4 + i, + decl-Semantic.Name, + decl-Semantic.Index, + i); } switch (decl-Semantic.Name) { ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Add support for having 0 vertex elements used.
Module: Mesa Branch: master Commit: b5fc9d5664d08d2e47ae89bf580e43732346a694 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=b5fc9d5664d08d2e47ae89bf580e43732346a694 Author: Eric Anholt e...@anholt.net Date: Thu Sep 25 14:57:01 2014 -0700 vc4: Add support for having 0 vertex elements used. You have to load at least 1, according to the simulator. Fixes 4 piglit tests and even more ES2 conformance tests. --- src/gallium/drivers/vc4/vc4_draw.c| 27 +-- src/gallium/drivers/vc4/vc4_program.c | 26 ++ 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 1a0c0dc..0938a76 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -120,8 +120,12 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4-constbuf[PIPE_SHADER_VERTEX], vc4-verttex); +/* The simulator throws a fit if VS or CS don't read an attribute, so + * we emit a dummy read. + */ +uint32_t num_elements_emit = MAX2(vtx-num_elements, 1); /* Emit the shader record. */ -cl_start_shader_reloc(vc4-shader_rec, 3 + vtx-num_elements); +cl_start_shader_reloc(vc4-shader_rec, 3 + num_elements_emit); cl_u16(vc4-shader_rec, VC4_SHADER_FLAG_ENABLE_CLIPPING | ((info-mode == PIPE_PRIM_POINTS @@ -133,14 +137,14 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */ cl_u16(vc4-shader_rec, 0); /* vs num uniforms */ -cl_u8(vc4-shader_rec, (1 vtx-num_elements) - 1); /* vs attribute array bitfield */ -cl_u8(vc4-shader_rec, 16 * vtx-num_elements); /* vs total attribute size */ +cl_u8(vc4-shader_rec, (1 num_elements_emit) - 1); /* vs attribute array bitfield */ +cl_u8(vc4-shader_rec, 16 * num_elements_emit); /* vs total attribute size */ cl_reloc(vc4, vc4-shader_rec, vc4-prog.vs-bo, 0); cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */ cl_u16(vc4-shader_rec, 0); /* cs num uniforms */ -cl_u8(vc4-shader_rec, (1 vtx-num_elements) - 1); /* cs attribute array bitfield */ -cl_u8(vc4-shader_rec, 16 * vtx-num_elements); /* cs total attribute size */ +cl_u8(vc4-shader_rec, (1 num_elements_emit) - 1); /* cs attribute array bitfield */ +cl_u8(vc4-shader_rec, 16 * num_elements_emit); /* cs total attribute size */ cl_reloc(vc4, vc4-shader_rec, vc4-prog.cs-bo, 0); cl_u32(vc4-shader_rec, 0); /* UBO offset written by kernel */ @@ -167,13 +171,24 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) } } +if (vtx-num_elements == 0) { +assert(num_elements_emit == 1); +struct vc4_bo *bo = vc4_bo_alloc(vc4-screen, 4096, scratch VBO); +cl_reloc(vc4, vc4-shader_rec, bo, 0); +cl_u8(vc4-shader_rec, 16 - 1); /* element size */ +cl_u8(vc4-shader_rec, 0); /* stride */ +cl_u8(vc4-shader_rec, 0); /* VS VPM offset */ +cl_u8(vc4-shader_rec, 0); /* CS VPM offset */ +vc4_bo_unreference(bo); +} + /* the actual draw call. */ cl_u8(vc4-bcl, VC4_PACKET_GL_SHADER_STATE); assert(vtx-num_elements = 8); /* Note that number of attributes == 0 in the packet means 8 * attributes. This field also contains the offset into shader_rec. */ -cl_u32(vc4-bcl, vtx-num_elements 0x7); +cl_u32(vc4-bcl, num_elements_emit 0x7); /* Note that the primitive type fields match with OpenGL/gallium * definitions, up to but not including QUADS. diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3056c67..c603425 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1619,6 +1619,29 @@ emit_point_size_write(struct vc4_compile *c) qir_VPM_WRITE(c, point_size); } +/** + * Emits a VPM read of the stub vertex attribute set up by vc4_draw.c. + * + * The simulator insists that there be at least one vertex attribute, so + * vc4_draw.c will emit one if it wouldn't have otherwise. The simulator also + * insists that all vertex attributes loaded get read by the VS/CS, so we have + * to consume it here. + */ +static void +emit_stub_vpm_read(struct vc4_compile *c) +{ +if (c-num_inputs) +return; + +for (int i = 0; i 4; i++) { +qir_emit(c, qir_inst(QOP_VPM_READ, + qir_get_temp(c), + c-undef, + c
Mesa (master): vc4: Fix render target NPOT alignment at small miplevels.
Module: Mesa Branch: master Commit: a2d8b6dbd5359e5dc930e22ac21a92bf20587401 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=a2d8b6dbd5359e5dc930e22ac21a92bf20587401 Author: Eric Anholt e...@anholt.net Date: Tue Oct 14 14:28:14 2014 +0100 vc4: Fix render target NPOT alignment at small miplevels. The texturing hardware takes the POT level 0 width/height and minifies those. This is different from what we were doing, for example, for 273-wide's level 5: POT(2735) == 8, while POT(273)5 == 16. Fixes piglit-depthstencil-render-miplevels 273. --- src/gallium/drivers/vc4/vc4_state.c | 15 --- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c index 2a123eb..7ccffeb 100644 --- a/src/gallium/drivers/vc4/vc4_state.c +++ b/src/gallium/drivers/vc4/vc4_state.c @@ -400,9 +400,18 @@ vc4_set_framebuffer_state(struct pipe_context *pctx, * framebuffer. Note that if the z/color buffers were mismatched * sizes, we wouldn't be able to do this. */ -if ((cso-cbufs[0] cso-cbufs[0]-u.tex.level) || - (cso-zsbuf cso-zsbuf-u.tex.level)) { -cso-width = util_next_power_of_two(cso-width); +if (cso-cbufs[0] cso-cbufs[0]-u.tex.level) { +struct vc4_resource *rsc = +vc4_resource(cso-cbufs[0]-texture); +cso-width = +(rsc-slices[cso-cbufs[0]-u.tex.level].stride / + rsc-cpp); +} else if (cso-zsbuf cso-zsbuf-u.tex.level){ +struct vc4_resource *rsc = +vc4_resource(cso-zsbuf-texture); +cso-width = +(rsc-slices[cso-zsbuf-u.tex.level].stride / + rsc-cpp); } vc4-dirty |= VC4_DIRTY_FRAMEBUFFER; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Match VS outputs to FS inputs.
Module: Mesa Branch: master Commit: 5d72a1c95662109b1338605da83329dd25e00859 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5d72a1c95662109b1338605da83329dd25e00859 Author: Eric Anholt e...@anholt.net Date: Mon Oct 13 08:24:57 2014 +0100 vc4: Match VS outputs to FS inputs. If the VS doesn't output a value that the FS needs, we still need to read the right contents for the remaining FS inputs, by emitting padding. And if the VS outputs something the FS doesn't need, we shouldn't put it in the VPM at all (so the code producing it can get DCEed). Fixes 77 piglit tests. --- src/gallium/drivers/vc4/vc4_context.h | 10 +++ src/gallium/drivers/vc4/vc4_program.c | 113 - src/gallium/drivers/vc4/vc4_qir.h | 30 - 3 files changed, 135 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index 31dec04..56cfc7b 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -83,6 +83,7 @@ struct vc4_uncompiled_shader { }; struct vc4_compiled_shader { +uint64_t program_id; struct vc4_bo *bo; struct vc4_shader_uniform_info uniforms; @@ -91,6 +92,14 @@ struct vc4_compiled_shader { uint32_t color_inputs; uint8_t num_inputs; + +/** + * Array of the meanings of the VPM inputs this shader needs. + * + * It doesn't include those that aren't part of the VPM, like + * point/line coordinates. + */ +struct vc4_varying_semantic *input_semantics; }; struct vc4_program_stateobj { @@ -173,6 +182,7 @@ struct vc4_context { struct primconvert_context *primconvert; struct util_hash_table *fs_cache, *vs_cache; +uint64_t next_compiled_program_id; struct ra_regs *regs; unsigned int reg_class_any; diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index b7c51a7..455b5d7 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -75,6 +75,13 @@ struct vc4_fs_key { struct vc4_vs_key { struct vc4_key base; + +/** + * This is a proxy for the array of FS input semantics, which is + * larger than we would want to put in the key. + */ +uint64_t compiled_fs_id; + enum pipe_format attr_formats[8]; bool is_coord; bool per_vertex_point_size; @@ -948,13 +955,28 @@ emit_point_coord_input(struct vc4_compile *c, int attr) } static struct qreg -emit_fragment_varying(struct vc4_compile *c, int index) +emit_fragment_varying(struct vc4_compile *c, uint8_t semantic, + uint8_t index, uint8_t swizzle) { +uint32_t i = c-num_input_semantics++; struct qreg vary = { QFILE_VARY, -index +i }; +if (c-num_input_semantics = c-input_semantics_array_size) { +c-input_semantics_array_size = +MAX2(4, c-input_semantics_array_size * 2); + +c-input_semantics = reralloc(c, c-input_semantics, + struct vc4_varying_semantic, + c-input_semantics_array_size); +} + +c-input_semantics[i].semantic = semantic; +c-input_semantics[i].index = index; +c-input_semantics[i].swizzle = swizzle; + return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c))); } @@ -964,12 +986,11 @@ emit_fragment_input(struct vc4_compile *c, int attr, { for (int i = 0; i 4; i++) { c-inputs[attr * 4 + i] = -emit_fragment_varying(c, attr * 4 + i); +emit_fragment_varying(c, + decl-Semantic.Name, + decl-Semantic.Index, + i); c-num_inputs++; - -if (decl-Semantic.Name == TGSI_SEMANTIC_COLOR || -decl-Semantic.Name == TGSI_SEMANTIC_BCOLOR) -c-color_inputs |= 1 i; } } @@ -1027,10 +1048,26 @@ emit_tgsi_declaration(struct vc4_compile *c, } break; -case TGSI_FILE_OUTPUT: +case TGSI_FILE_OUTPUT: { +uint32_t old_array_size = c-outputs_array_size; resize_qreg_array(c, c-outputs, c-outputs_array_size, (decl-Range.Last + 1) * 4); +if (old_array_size != c-outputs_array_size) { +c-output_semantics = reralloc(c, + c-output_semantics, + struct vc4_varying_semantic
Mesa (master): vc4: Add support for the TXL opcode.
Module: Mesa Branch: master Commit: e1d1c396265ce3b363e99422b46275275723ee21 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e1d1c396265ce3b363e99422b46275275723ee21 Author: Eric Anholt e...@anholt.net Date: Mon Oct 13 14:38:10 2014 +0100 vc4: Add support for the TXL opcode. There's a bit at the bottom of cube map stride (which has some formatting bugs in the docs) which flips the bias coordinate to being an absolute LOD. --- src/gallium/drivers/vc4/vc4_program.c | 20 +++- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 1bbdba5..3056c67 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -528,6 +528,7 @@ tgsi_to_qir_tex(struct vc4_compile *c, struct qreg t = src[0 * 4 + 1]; struct qreg r = src[0 * 4 + 2]; uint32_t unit = tgsi_inst-Src[1].Register.Index; +bool is_txl = tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXL; struct qreg proj = c-undef; if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXP) { @@ -561,6 +562,13 @@ tgsi_to_qir_tex(struct vc4_compile *c, } if (tgsi_inst-Texture.Texture == TGSI_TEXTURE_CUBE || +tgsi_inst-Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || +is_txl) { +texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, + unit | (is_txl 16)); +} + +if (tgsi_inst-Texture.Texture == TGSI_TEXTURE_CUBE || tgsi_inst-Texture.Texture == TGSI_TEXTURE_SHADOWCUBE) { struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r); struct qreg rcp_ma = qir_RCP(c, ma); @@ -568,8 +576,6 @@ tgsi_to_qir_tex(struct vc4_compile *c, t = qir_FMUL(c, t, rcp_ma); r = qir_FMUL(c, r, rcp_ma); -texture_u[2] = add_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, unit); - qir_TEX_R(c, r, texture_u[next_texture_u++]); } else if (c-key-tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER || c-key-tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP || @@ -591,7 +597,8 @@ tgsi_to_qir_tex(struct vc4_compile *c, qir_TEX_T(c, t, texture_u[next_texture_u++]); -if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXB) +if (tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXB || +tgsi_inst-Instruction.Opcode == TGSI_OPCODE_TXL) qir_TEX_B(c, src[0 * 4 + 3], texture_u[next_texture_u++]); qir_TEX_S(c, s, texture_u[next_texture_u++]); @@ -1175,6 +1182,7 @@ emit_tgsi_instruction(struct vc4_compile *c, case TGSI_OPCODE_TEX: case TGSI_OPCODE_TXP: case TGSI_OPCODE_TXB: +case TGSI_OPCODE_TXL: tgsi_to_qir_tex(c, tgsi_inst, op_trans[tgsi_op].op, src_regs); return; @@ -2163,15 +2171,17 @@ write_texture_p1(struct vc4_context *vc4, static void write_texture_p2(struct vc4_context *vc4, struct vc4_texture_stateobj *texstate, - uint32_t unit) + uint32_t data) { +uint32_t unit = data 0x; struct pipe_sampler_view *texture = texstate-textures[unit]; struct vc4_resource *rsc = vc4_resource(texture-texture); cl_u32(vc4-uniforms, VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE, VC4_TEX_P2_PTYPE) | - VC4_SET_FIELD(rsc-cube_map_stride 12, VC4_TEX_P2_CMST)); + VC4_SET_FIELD(rsc-cube_map_stride 12, VC4_TEX_P2_CMST) | + VC4_SET_FIELD((data 16) 1, VC4_TEX_P2_BSLOD)); } ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Improve the accuracy of SIN and COS.
Module: Mesa Branch: master Commit: 5bc91b6e322354d0964c07375c9a3a28b0083a38 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=5bc91b6e322354d0964c07375c9a3a28b0083a38 Author: Eric Anholt e...@anholt.net Date: Mon Oct 13 14:11:28 2014 +0100 vc4: Improve the accuracy of SIN and COS. This gets them to pass glsl-sin/cos. There was an obvious problem that I was using the FRC code on the scaled input value, which means that we had a range in [0, 1], while our taylor is most accurate across [-0.5, 0.5]. We can just slide things over, but that means flipping the sign of the coefficients. After that, it was just a matter of stuffing more coefficients in. --- src/gallium/drivers/vc4/vc4_program.c | 28 +--- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 455b5d7..1bbdba5 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -766,10 +766,11 @@ tgsi_to_qir_sin(struct vc4_compile *c, enum qop op, struct qreg *src, int i) { float coeff[] = { -2.0 * M_PI, --pow(2.0 * M_PI, 3) / (3 * 2 * 1), -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1), --pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1), +-2.0 * M_PI, +pow(2.0 * M_PI, 3) / (3 * 2 * 1), +-pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1), +pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1), +-pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), }; struct qreg scaled_x = @@ -777,8 +778,9 @@ tgsi_to_qir_sin(struct vc4_compile *c, src[0 * 4 + 0], qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); - -struct qreg x = tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0); +struct qreg x = qir_FADD(c, + tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0), + qir_uniform_f(c, -0.5)); struct qreg x2 = qir_FMUL(c, x, x); struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0])); for (int i = 1; i ARRAY_SIZE(coeff); i++) { @@ -799,16 +801,20 @@ tgsi_to_qir_cos(struct vc4_compile *c, enum qop op, struct qreg *src, int i) { float coeff[] = { -1.0f, --pow(2.0 * M_PI, 2) / (2 * 1), -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1), --pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1), +-1.0f, +pow(2.0 * M_PI, 2) / (2 * 1), +-pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1), +pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1), +-pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), +pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), }; struct qreg scaled_x = qir_FMUL(c, src[0 * 4 + 0], qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); -struct qreg x_frac = tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0); +struct qreg x_frac = qir_FADD(c, + tgsi_to_qir_frc(c, NULL, 0, scaled_x, 0), + qir_uniform_f(c, -0.5)); struct qreg sum = qir_uniform_f(c, coeff[0]); struct qreg x2 = qir_FMUL(c, x_frac, x_frac); ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Write the VPM read setup multiple times to queue all the inputs.
Module: Mesa Branch: master Commit: 615bbf0ca641d356d975f12a5491f2fd56549ed8 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=615bbf0ca641d356d975f12a5491f2fd56549ed8 Author: Eric Anholt e...@anholt.net Date: Mon Oct 13 16:20:01 2014 +0100 vc4: Write the VPM read setup multiple times to queue all the inputs. There's a 4-element fifo, and the size (number of dwords per vertex) field is just 4 bits. Fixes glsl-routing on sim. --- src/gallium/drivers/vc4/vc4_qpu_emit.c | 21 ++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 397e6f2..99e634e 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -238,15 +238,30 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c) { struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c); bool discard = false; +uint32_t inputs_remaining = c-num_inputs; +uint32_t vpm_read_fifo_count = 0; +uint32_t vpm_read_offset = 0; make_empty_list(c-qpu_inst_list); switch (c-stage) { case QSTAGE_VERT: case QSTAGE_COORD: -queue(c, qpu_load_imm_ui(qpu_vrsetup(), - (0x1a00 + - 0x0010 * c-num_inputs))); +/* There's a 4-entry FIFO for VPMVCD reads, each of which can + * load up to 16 dwords (4 vec4s) per vertex. + */ +while (inputs_remaining) { +uint32_t num_entries = MIN2(inputs_remaining, 16); +queue(c, qpu_load_imm_ui(qpu_vrsetup(), + vpm_read_offset | + 0x1a00 | + ((num_entries 0xf) 20))); +inputs_remaining -= num_entries; +vpm_read_offset += num_entries; +vpm_read_fifo_count++; +} +assert(vpm_read_fifo_count = 4); + queue(c, qpu_load_imm_ui(qpu_vwsetup(), 0x1a00)); break; case QSTAGE_FRAG: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Don' t look up the compiled shaders unless state has changed.
Module: Mesa Branch: master Commit: d09509da2aa5ff709c48bdc4163a2c57811c70ab URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=d09509da2aa5ff709c48bdc4163a2c57811c70ab Author: Eric Anholt e...@anholt.net Date: Fri Oct 10 14:17:15 2014 +0200 vc4: Don't look up the compiled shaders unless state has changed. Improves simulated norast performance on a little benchmark by 38.0965% +/- 3.27534% (n=11). --- src/gallium/drivers/vc4/vc4_context.h |3 +++ src/gallium/drivers/vc4/vc4_draw.c|5 + src/gallium/drivers/vc4/vc4_program.c | 20 3 files changed, 28 insertions(+) diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h index da07cfa..31dec04 100644 --- a/src/gallium/drivers/vc4/vc4_context.h +++ b/src/gallium/drivers/vc4/vc4_context.h @@ -57,6 +57,7 @@ #define VC4_DIRTY_INDEXBUF (1 16) #define VC4_DIRTY_SCISSOR (1 17) #define VC4_DIRTY_FLAT_SHADE_FLAGS (1 18) +#define VC4_DIRTY_PRIM_MODE (1 19) #define VC4_SHADER_DIRTY_VP (1 0) #define VC4_SHADER_DIRTY_FP (1 1) @@ -177,6 +178,8 @@ struct vc4_context { unsigned int reg_class_any; unsigned int reg_class_a; +uint8_t prim_mode; + /** @{ Current pipeline state objects */ struct pipe_scissor_state scissor; struct pipe_blend_state *blend; diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index bc08da3..1a0c0dc 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -99,6 +99,11 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) struct vc4_vertex_stateobj *vtx = vc4-vtx; struct vc4_vertexbuf_stateobj *vertexbuf = vc4-vertexbuf; +if (vc4-prim_mode != info-mode) { +vc4-prim_mode = info-mode; +vc4-dirty |= VC4_DIRTY_PRIM_MODE; +} + vc4_start_draw(vc4); vc4_update_compiled_shaders(vc4, info-mode); diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index bf76acf..b22426c 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -1792,6 +1792,17 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) struct vc4_fs_key local_key; struct vc4_fs_key *key = local_key; +if (!(vc4-dirty (VC4_DIRTY_PRIM_MODE | +VC4_DIRTY_BLEND | +VC4_DIRTY_FRAMEBUFFER | +VC4_DIRTY_ZSA | +VC4_DIRTY_RASTERIZER | +VC4_DIRTY_FRAGTEX | +VC4_DIRTY_TEXSTATE | +VC4_DIRTY_PROG))) { +return; +} + memset(key, 0, sizeof(*key)); vc4_setup_shared_key(key-base, vc4-fragtex); key-base.shader_state = vc4-prog.bind_fs; @@ -1840,6 +1851,15 @@ vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) struct vc4_vs_key local_key; struct vc4_vs_key *key = local_key; +if (!(vc4-dirty (VC4_DIRTY_PRIM_MODE | +VC4_DIRTY_RASTERIZER | +VC4_DIRTY_VERTTEX | +VC4_DIRTY_TEXSTATE | +VC4_DIRTY_VTXSTATE | +VC4_DIRTY_PROG))) { +return; +} + memset(key, 0, sizeof(*key)); vc4_setup_shared_key(key-base, vc4-verttex); key-base.shader_state = vc4-prog.bind_vs; ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Actually clear the context's dirty flags.
Module: Mesa Branch: master Commit: c6f50c4086a96c3a8d84522ed7ee5c75a8b0868c URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c6f50c4086a96c3a8d84522ed7ee5c75a8b0868c Author: Eric Anholt e...@anholt.net Date: Fri Oct 10 14:24:06 2014 +0200 vc4: Actually clear the context's dirty flags. I was trying to skip state updates when !dirty, and suspiciously everything was always dirty. --- src/gallium/drivers/vc4/vc4_draw.c |1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/drivers/vc4/vc4_draw.c b/src/gallium/drivers/vc4/vc4_draw.c index 2cac7ef..bc08da3 100644 --- a/src/gallium/drivers/vc4/vc4_draw.c +++ b/src/gallium/drivers/vc4/vc4_draw.c @@ -103,6 +103,7 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info) vc4_update_compiled_shaders(vc4, info-mode); vc4_emit_state(pctx); +vc4-dirty = 0; vc4_write_uniforms(vc4, vc4-prog.fs, vc4-constbuf[PIPE_SHADER_FRAGMENT], ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit
Mesa (master): vc4: Optimize the other case of SEL_X_Y wih a 0 - SEL_X_0( a).
Module: Mesa Branch: master Commit: 7c474f9f2e5e3161ad27129844139ee14d916726 URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7c474f9f2e5e3161ad27129844139ee14d916726 Author: Eric Anholt e...@anholt.net Date: Thu Oct 9 09:40:51 2014 +0200 vc4: Optimize the other case of SEL_X_Y wih a 0 - SEL_X_0(a). Cleans up some output to be more obvious in a piglit test I'm looking at. --- src/gallium/drivers/vc4/vc4_opt_algebraic.c | 24 +++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/gallium/drivers/vc4/vc4_opt_algebraic.c b/src/gallium/drivers/vc4/vc4_opt_algebraic.c index c114458..ec526fb 100644 --- a/src/gallium/drivers/vc4/vc4_opt_algebraic.c +++ b/src/gallium/drivers/vc4/vc4_opt_algebraic.c @@ -174,7 +174,10 @@ qir_opt_algebraic(struct vc4_compile *c) */ replace_with_mov(c, inst, inst-src[1]); progress = true; -} else if (is_zero(c, defs, inst-src[1])) { +break; +} + +if (is_zero(c, defs, inst-src[1])) { /* Replace references to a 0 uniform value * with the SEL_X_0 equivalent. */ @@ -183,7 +186,26 @@ qir_opt_algebraic(struct vc4_compile *c) inst-src[1] = c-undef; progress = true; dump_to(c, inst); +break; } + +if (is_zero(c, defs, inst-src[0])) { +/* Replace references to a 0 uniform value + * with the SEL_X_0 equivalent, flipping the + * condition being evaluated since the operand + * order is flipped. + */ +dump_from(c, inst); +inst-op -= QOP_SEL_X_Y_ZS; +inst-op ^= 1; +inst-op += QOP_SEL_X_0_ZS; +inst-src[0] = inst-src[1]; +inst-src[1] = c-undef; +progress = true; +dump_to(c, inst); +break; +} + break; case QOP_FSUB: ___ mesa-commit mailing list mesa-commit@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-commit