[Freedreno] [PATCH] freedreno/a4xx: add stencil texturing support
Copied from a5xx, should be identical. Signed-off-by: Ilia Mirkin --- docs/features.txt| 6 ++--- src/gallium/drivers/freedreno/a4xx/fd4_emit.c| 2 ++ src/gallium/drivers/freedreno/a4xx/fd4_format.c | 11 +--- src/gallium/drivers/freedreno/a4xx/fd4_texture.c | 34 ++-- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 99fb1715e0b..2d6e0b20fb5 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -180,7 +180,7 @@ GL 4.3, GLSL 4.30 -- all DONE: i965/gen8+, nvc0, radeonsi GL_ARB_robust_buffer_access_behavior DONE (i965) GL_ARB_shader_image_size DONE (freedreno/a5xx, i965, r600, softpipe) GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965, softpipe) - GL_ARB_stencil_texturing DONE (freedreno/a5xx, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr) + GL_ARB_stencil_texturing DONE (freedreno, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr) GL_ARB_texture_buffer_range DONE (freedreno, nv50, i965, r600, llvmpipe) GL_ARB_texture_query_levels DONE (all drivers that support GLSL 1.30) GL_ARB_texture_storage_multisampleDONE (all drivers that support GL_ARB_texture_multisample) @@ -203,7 +203,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, radeonsi GL_ARB_multi_bind DONE (all drivers) GL_ARB_query_buffer_objectDONE (i965/hsw+) GL_ARB_texture_mirror_clamp_to_edge DONE (i965, nv50, r600, llvmpipe, softpipe, swr) - GL_ARB_texture_stencil8 DONE (freedreno/a5xx, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr) + GL_ARB_texture_stencil8 DONE (freedreno, i965/hsw+, nv50, r600, llvmpipe, softpipe, swr) GL_ARB_vertex_type_10f_11f_11f_revDONE (i965, nv50, r600, llvmpipe, softpipe, swr) GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi @@ -252,7 +252,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi GL_ARB_shader_storage_buffer_object DONE (freedreno/a5xx, i965/gen7+, softpipe) GL_ARB_shading_language_packing DONE (all drivers) GL_ARB_separate_shader_objectsDONE (all drivers) - GL_ARB_stencil_texturing DONE (freedreno/a5xx, nv50, r600, llvmpipe, softpipe, swr) + GL_ARB_stencil_texturing DONE (freedreno, nv50, r600, llvmpipe, softpipe, swr) GL_ARB_texture_multisample (Multisample textures) DONE (i965/gen7+, nv50, r600, llvmpipe, softpipe) GL_ARB_texture_storage_multisampleDONE (all drivers that support GL_ARB_texture_multisample) GL_ARB_vertex_attrib_binding DONE (all drivers) diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 0f7c6470330..8262b45daad 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -190,6 +190,8 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, view->texconst3); if (view->base.texture) { struct fd_resource *rsc = fd_resource(view->base.texture); + if (view->base.format == PIPE_FORMAT_X32_S8X24_UINT) + rsc = rsc->stencil; OUT_RELOC(ring, rsc->bo, view->offset, view->texconst4, 0); } else { OUT_RING(ring, 0x); diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_format.c b/src/gallium/drivers/freedreno/a4xx/fd4_format.c index 3e1dc277850..75d24126149 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c @@ -211,10 +211,13 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = { VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE,WZYX), - _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), - _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), - _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX), - _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX), + _T(Z16_UNORM,16_UNORM, R16_UNORM, WZYX), + _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(X24S8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, XYZW), + _T(Z24_UNORM_S8_UINT,X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT,32_FLOAT, R8G8B8A8_UNORM, WZYX)
[Freedreno] [PATCH 1/2] nir: allow texture offsets with cube maps
GL doesn't have this, but some hardware supports it. This is convenient for lowering tg4 to plain texture calls, which is necessary on Adreno A4xx hardware. Signed-off-by: Ilia Mirkin --- src/compiler/nir/nir.h | 15 +-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index f46f6147110..64965ae16d6 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -1364,8 +1364,7 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) if (instr->src[src].src_type == nir_tex_src_ms_mcs) return 4; - if (instr->src[src].src_type == nir_tex_src_offset || - instr->src[src].src_type == nir_tex_src_ddx || + if (instr->src[src].src_type == nir_tex_src_ddx || instr->src[src].src_type == nir_tex_src_ddy) { if (instr->is_array) return instr->coord_components - 1; @@ -1373,6 +1372,18 @@ nir_tex_instr_src_size(const nir_tex_instr *instr, unsigned src) return instr->coord_components; } + /* Usual APIs don't allow cube + offset, but we allow it, with 2 coords for +* the offset, since a cube maps to a single face. +*/ + if (instr->src[src].src_type == nir_tex_src_offset) { + unsigned ret = instr->coord_components; + if (instr->is_array) + ret--; + if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) + ret--; + return ret; + } + return 1; } -- 2.13.6 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 2/2] freedreno/ir3: add a pass to lower tg4 to txl, enable gather on a4xx
Unfortunately Adreno A4xx hardware returns incorrect results with the GATHER4 opcodes. As a result, we have to lower to 4 individual texture calls (txl since we have to force lod to 0). We achieve this using offsets, including on cube maps which normally never have offsets. Signed-off-by: Ilia Mirkin --- This pass relies on the hw doing the "right thing", working with nonconst offsets, and not having the usual limits (since the gather offset will in effect get offset by another 1). It fails two tests out of all the gather ones: bin/zero-tex-coord textureGather tests/spec/arb_gpu_shader5/execution/built-in-functions/fs-textureGatherOffset-uniform-array-offset.shader_test We haven't fully investigated why yet, but this is a good start. Note that the blob does this differently - they modify the source coordinate. However this seems unnecessary given that the hw can be made to use the offsets. Also please note that my knowledge of nir is minimal. Please carefully check that I used the right helpers/etc. This was largely a result of seeing what doesn't result in assertions. docs/features.txt | 4 +- src/gallium/drivers/freedreno/Makefile.sources | 1 + src/gallium/drivers/freedreno/freedreno_screen.c | 2 +- .../drivers/freedreno/ir3/ir3_compiler_nir.c | 7 +- src/gallium/drivers/freedreno/ir3/ir3_nir.c| 2 + src/gallium/drivers/freedreno/ir3/ir3_nir.h| 1 + .../freedreno/ir3/ir3_nir_lower_tg4_to_tex.c | 139 + src/gallium/drivers/freedreno/meson.build | 1 + 8 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/gallium/drivers/freedreno/ir3/ir3_nir_lower_tg4_to_tex.c diff --git a/docs/features.txt b/docs/features.txt index 633d2593738..99fb1715e0b 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -130,7 +130,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen7+, nvc0, r600, radeonsi GL_ARB_tessellation_shaderDONE (i965/gen7+) GL_ARB_texture_buffer_object_rgb32DONE (freedreno, i965/gen6+, llvmpipe, softpipe, swr) GL_ARB_texture_cube_map_array DONE (i965/gen6+, nv50, llvmpipe, softpipe) - GL_ARB_texture_gather DONE (freedreno/a5xx, i965/gen6+, nv50, llvmpipe, softpipe, swr) + GL_ARB_texture_gather DONE (freedreno, i965/gen6+, nv50, llvmpipe, softpipe, swr) GL_ARB_texture_query_lod DONE (freedreno, i965, nv50, llvmpipe, softpipe) GL_ARB_transform_feedback2DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr) GL_ARB_transform_feedback3DONE (i965/gen7+, llvmpipe, softpipe, swr) @@ -256,7 +256,7 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi GL_ARB_texture_multisample (Multisample textures) DONE (i965/gen7+, nv50, r600, llvmpipe, softpipe) GL_ARB_texture_storage_multisampleDONE (all drivers that support GL_ARB_texture_multisample) GL_ARB_vertex_attrib_binding DONE (all drivers) - GS5 Enhanced textureGatherDONE (i965/gen7+, r600) + GS5 Enhanced textureGatherDONE (freedreno, i965/gen7+, r600) GS5 Packing/bitfield/conversion functions DONE (i965/gen6+, r600) GL_EXT_shader_integer_mix DONE (all drivers that support GLSL) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index b109a5a7a21..40c2eff0455 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -168,6 +168,7 @@ ir3_SOURCES := \ ir3/ir3_nir.c \ ir3/ir3_nir.h \ ir3/ir3_nir_lower_if_else.c \ + ir3/ir3_nir_lower_tg4_to_tex.c \ ir3/ir3_print.c \ ir3/ir3_ra.c \ ir3/ir3_sched.c \ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index e61344fd104..62e4a574b90 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -264,7 +264,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param) return 0; case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS: - if (is_a5xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen)) return 4; return 0; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c index da4aeaa7acb..c97df4f1d63 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c @@ -2399,9 +2399,12 @@ emit_tex(struct ir3_context *ctx, nir_tex_instr *tex) */ if (has_o