Unfortunately Adreno A4xx hardware returns incorrect results with the GATHER4 opcodes. As a result, we have to lower to 4 individual texture calls (txl since we have to force lod to 0). We achieve this using offsets, including on cube maps which normally never have offsets.
Signed-off-by: Ilia Mirkin <imir...@alum.mit.edu> --- This pass relies on the hw doing the "right thing", working with nonconst offsets, and not having the usual limits (since the gather offset will in effect get offset by another 1). It fails two tests out of all the gather ones: bin/zero-tex-coord textureGather tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-textureGatherOffset-uniform-array-offset.shader_test We haven't fully investigated why yet, but this is a good start. Note that the blob does this differently - they modify the source coordinate. However this seems unnecessary given that the hw can be made to use the offsets. Also please note that my knowledge of nir is minimal. Please carefully check that I used the right helpers/etc. This was largely a result of seeing what doesn't result in assertions. docs/features.txt | 4 +- src/gallium/drivers/freedreno/Makefile.sources | 1 + src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 7 +- src/gallium/drivers/freedreno/ir3/ir3_nir.c | 2 + src/gallium/drivers/freedreno/ir3/ir3_nir.h | 1 + .../freedreno/ir3/ir3_nir_lower_tg4_to_tex.c | 139 +++++++++++++++++++++ src/gallium/drivers/freedreno/meson.build | 1 + 8 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c diff --git a/docs/features.txt b/docs/features.txt index 633d2593738..99fb1715e0b 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -130,7 +130,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi GL_ARB_tessellation_shader DONE (i965/gen7+) GL_ARB_texture_buffer_object_rgb32 DONE (freedreno, i965/gen6+, llvmpipe, softpipe, swr) GL_ARB_texture_cube_map_array DONE (i965/gen6+, nv50, llvmpipe, softpipe) - GL_ARB_texture_gather DONE (freedreno/a5xx, i965/gen6+, nv50, llvmpipe, softpipe, swr) + GL_ARB_texture_gather DONE (freedreno, i965/gen6+, nv50, llvmpipe, softpipe, swr) GL_ARB_texture_query_lod DONE (freedreno, i965, nv50, llvmpipe, softpipe) GL_ARB_transform_feedback2 DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr) GL_ARB_transform_feedback3 DONE (i965/gen7+, llvmpipe, softpipe, swr) @@ -256,7 +256,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi GL_ARB_texture_multisample (Multisample textures) DONE (i965/gen7+, nv50, r600, llvmpipe, softpipe) GL_ARB_texture_storage_multisample DONE (all drivers that support GL_ARB_texture_multisample) GL_ARB_vertex_attrib_binding DONE (all drivers) - GS5 Enhanced textureGather DONE (i965/gen7+, r600) + GS5 Enhanced textureGather DONE (freedreno, i965/gen7+, r600) GS5 Packing/bitfield/conversion functions DONE (i965/gen6+, r600) GL_EXT_shader_integer_mix DONE (all drivers that support GLSL) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index b109a5a7a21..40c2eff0455 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -168,6 +168,7 @@ ir3_SOURCES := \ ir3/ir3_nir.c \ ir3/ir3_nir.h \ ir3/ir3_nir_lower_if_else.c \ + ir3/ir3_nir_lower_tg4_to_tex.c \ ir3/ir3_print.c \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index e61344fd104..62e4a574b90 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -264,7 +264,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - if (is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen)) return 4; return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index da4aeaa7acb..c97df4f1d63 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -2399,9 +2399,12 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) */ if (has_off | has_lod | has_bias) { if (has_off) { - for (i = 0; i < coords; i++) + unsigned off_coords = coords; + if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + off_coords--; + for (i = 0; i < off_coords; i++) src1[nsrc1++] = off[i]; - if (coords < 2) + if (off_coords < 2) src1[nsrc1++] = create_immed(b, fui(0.0)); flags |= IR3_INSTR_O; } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_nir.c index 7dd24e5f4ee..39d3bf15fde 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.c @@ -188,6 +188,8 @@ ir3_optimize_nir(struct ir3_shader *shader, nir_shader *s, OPT_V(s, nir_lower_tex, &tex_options); OPT_V(s, nir_lower_load_const_to_scalar); + if (shader->compiler->gpu_id < 500) + OPT_V(s, ir3_nir_lower_tg4_to_tex); ir3_optimize_loop(s); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h index e0e3108e328..ca873407fae 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h @@ -38,6 +38,7 @@ void ir3_nir_scan_driver_consts(nir_shader *shader, struct ir3_driver_const_layo bool ir3_nir_lower_if_else(nir_shader *shader); bool ir3_nir_apply_trig_workarounds(nir_shader *shader); +bool ir3_nir_lower_tg4_to_tex(nir_shader *shader); struct nir_shader * ir3_tgsi_to_nir(const struct tgsi_token *tokens); const nir_shader_compiler_options * ir3_get_compiler_options(struct ir3_compiler *compiler); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c new file mode 100644 index 00000000000..087688cd234 --- /dev/null +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c @@ -0,0 +1,139 @@ +/* + * Copyright © 2017 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "ir3_nir.h" +#include "compiler/nir/nir_builder.h" + +/* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the + * gather results, rather than before. As a result, it must be emulated with + * direct texture calls. + */ + +static bool +lower_tg4(nir_block *block, nir_builder *b, void *mem_ctx) +{ + bool progress = false; + + static const int offsets[3][2] = { {0, 1}, {1, 1}, {1, 0} }; + + nir_foreach_instr_safe(instr, block) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tg4 = (nir_tex_instr *)instr; + + if (tg4->op != nir_texop_tg4) + continue; + + b->cursor = nir_before_instr(&tg4->instr); + + nir_ssa_def *results[4]; + int offset_index = nir_tex_instr_src_index(tg4, nir_tex_src_offset); + for (int i = 0; i < 4; i++) { + int num_srcs = tg4->num_srcs + 1 /* lod */; + if (offset_index < 0 && i < 3) + num_srcs++; + + nir_tex_instr *tex = nir_tex_instr_create(b->shader, num_srcs); + tex->op = nir_texop_txl; + tex->sampler_dim = tg4->sampler_dim; + tex->coord_components = tg4->coord_components; + tex->is_array = tg4->is_array; + tex->is_shadow = tg4->is_shadow; + tex->is_new_style_shadow = tg4->is_new_style_shadow; + tex->texture_index = tg4->texture_index; + tex->texture = nir_deref_var_clone(tg4->texture, tex); + tex->sampler_index = tg4->sampler_index; + tex->sampler = nir_deref_var_clone(tg4->sampler, tex); + tex->dest_type = tg4->dest_type; + + for (int j = 0; j < tg4->num_srcs; j++) { + nir_src_copy(&tex->src[j].src, &tg4->src[j].src, tex); + tex->src[j].src_type = tg4->src[j].src_type; + } + if (i != 3) { + nir_ssa_def *offset = + nir_vec2(b, nir_imm_int(b, offsets[i][0]), + nir_imm_int(b, offsets[i][1])); + if (offset_index < 0) { + tex->src[tg4->num_srcs].src = nir_src_for_ssa(offset); + tex->src[tg4->num_srcs].src_type = nir_tex_src_offset; + } else { + assert(tex->src[offset_index].src.is_ssa); + nir_ssa_def *orig = tex->src[offset_index].src.ssa; + tex->src[offset_index].src = + nir_src_for_ssa(nir_iadd(b, orig, offset)); + } + } + tex->src[num_srcs - 1].src = nir_src_for_ssa(nir_imm_float(b, 0)); + tex->src[num_srcs - 1].src_type = nir_tex_src_lod; + + nir_ssa_dest_init(&tex->instr, &tex->dest, + nir_tex_instr_dest_size(tex), 32, NULL); + nir_builder_instr_insert(b, &tex->instr); + + results[i] = nir_channel(b, &tex->dest.ssa, tg4->component); + } + + nir_ssa_def *result = nir_vec4(b, results[0], results[1], results[2], results[3]); + nir_ssa_def_rewrite_uses(&tg4->dest.ssa, nir_src_for_ssa(result)); + + nir_instr_remove(&tg4->instr); + + progress = true; + } + + return progress; +} + +static bool +lower_tg4_func(nir_function_impl *impl) +{ + void *mem_ctx = ralloc_parent(impl); + nir_builder b; + nir_builder_init(&b, impl); + + bool progress = false; + nir_foreach_block_safe(block, impl) { + progress |= lower_tg4(block, &b, mem_ctx); + } + + if (progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return progress; +} + +bool +ir3_nir_lower_tg4_to_tex(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(function, shader) { + if (function->impl) + progress |= lower_tg4_func(function->impl); + } + + return progress; +} diff --git a/src/gallium/drivers/freedreno/meson.build b/src/gallium/drivers/freedreno/meson.build index d2b901334d0..01b5836be29 100644 --- a/src/gallium/drivers/freedreno/meson.build +++ b/src/gallium/drivers/freedreno/meson.build @@ -188,6 +188,7 @@ files_libfreedreno = files( 'ir3/ir3_nir.c', 'ir3/ir3_nir.h', 'ir3/ir3_nir_lower_if_else.c', + 'ir3/ir3_nir_lower_tg4_to_tex.c', 'ir3/ir3_print.c', 'ir3/ir3_ra.c', 'ir3/ir3_sched.c', -- 2.13.6 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev