[Freedreno] [PATCH 1/3] a3xx: make sure to actually clamp depth as requested
We were previously ... not clamping. I guess this meant that everything got clamped to 1/0, which was enough to pass the existing tests. Or perhaps the clamping would only happen to the rasterized depth value and not the frag shader's output depth value. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97231 Signed-off-by: Ilia Mirkin Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/freedreno/a3xx/a3xx.xml.h | 2 +- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 30 ++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h index dcb6dfb..bf787d1 100644 --- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h +++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h @@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val) { return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK; } -#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE 0x0080 +#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE 0x0080 #define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE0x8000 #define REG_A3XX_RB_DEPTH_CLEAR 0x2101 diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 3466a0d..e66836b 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -31,6 +31,7 @@ #include "util/u_memory.h" #include "util/u_helpers.h" #include "util/u_format.h" +#include "util/u_viewport.h" #include "freedreno_resource.h" #include "freedreno_query_hw.h" @@ -535,7 +536,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); } - if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control; if (fp->writes_pos) { val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; @@ -544,6 +545,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (fp->has_kill) { val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; } + if (!ctx->rasterizer->depth_clip) { + val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE; + } OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); OUT_RING(ring, val); } @@ -647,6 +651,30 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); } + if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { + float zmin, zmax; + int depth = 24; + if (ctx->batch->framebuffer.zsbuf) { + depth = util_format_get_component_bits( + pipe_surface_format(ctx->batch->framebuffer.zsbuf), + UTIL_FORMAT_COLORSPACE_ZS, 0); + } + util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, + &zmin, &zmax); + + OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2); + if (depth == 32) { + OUT_RING(ring, (uint32_t)(zmin * 0x)); + OUT_RING(ring, (uint32_t)(zmax * 0x)); + } else if (depth == 16) { + OUT_RING(ring, (uint32_t)(zmin * 0x)); + OUT_RING(ring, (uint32_t)(zmax * 0x)); + } else { + OUT_RING(ring, (uint32_t)(zmin * 0xff)); + OUT_RING(ring, (uint32_t)(zmax * 0xff)); + } + } + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) { struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; int nr_cbufs = pfb->nr_cbufs; -- 2.7.3 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
[Freedreno] [PATCH 2/3] a3xx: make use of software clipping when hw can't handle it
The hw clipper only handles up to 6 UCPs. If there are more than 6 UCPs, or a clip vertex, or clip distances are in use, then we must use the fallback discard-based clipping from the frag shader. Signed-off-by: Ilia Mirkin Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/freedreno/a3xx/fd3_draw.c| 3 +++ src/gallium/drivers/freedreno/a3xx/fd3_emit.c| 12 src/gallium/drivers/freedreno/a3xx/fd3_program.c | 15 +++ src/gallium/drivers/freedreno/a3xx/fd3_program.h | 3 +++ src/gallium/drivers/freedreno/ir3/ir3_shader.c | 6 ++ src/gallium/drivers/freedreno/ir3/ir3_shader.h | 1 + 6 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c index a1594b6..d26786f 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c @@ -156,6 +156,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, }; + if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer)) + emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable; + fixup_shader_state(ctx, &emit.key); unsigned dirty = ctx->dirty; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index e66836b..7945184 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -571,20 +571,24 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer) ->gras_cl_clip_cntl; + uint8_t planes = ctx->rasterizer->clip_plane_enable; val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); - /* TODO only use if prog doesn't use clipvertex/clipdist */ - val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( - MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6)); + if (!emit->key.ucp_enables) + val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( + MIN2(util_bitcount(planes), 6)); OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); OUT_RING(ring, val); } - if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) { + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) { uint32_t planes = ctx->rasterizer->clip_plane_enable; int count = 0; + if (emit->key.ucp_enables) + planes = 0; + while (planes && count < 6) { int i = ffs(planes) - 1; diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 485a4da..3146dc5 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -28,6 +28,7 @@ #include "pipe/p_state.h" #include "util/u_string.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_inlines.h" #include "util/u_format.h" @@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) delete_shader_stateobj(so); } +bool +fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so, + const struct pipe_rasterizer_state *rast) +{ + uint64_t outputs = ir3_shader_outputs(so->shader); + + return (!rast->depth_clip || + util_bitcount(rast->clip_plane_enable) > 6 || + outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) | + (1ULL << VARYING_SLOT_CLIP_DIST0) | + (1ULL << VARYING_SLOT_CLIP_DIST1))); +} + + static void emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) { diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h index b3fcc0c..b95df4c 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h @@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, void fd3_prog_init(struct pipe_context *pctx); +bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *, + const struct pipe_rasterizer_state *); + #endif /* FD3_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
[Freedreno] [PATCH 3/3] a3xx: use window scissor to simulate viewport xy clip
Unfortunately a3xx does not have a separate disable for depth clipping, so when depth clamp is enabled, we disable the whole 3d clipper logic. This in turn also gets rid of the xy clip that it would normally do. When we detect this would happen, instead we integrate the viewport into the window scissor. This may have slightly different behavior around wide points, but it's unlikely that anything depends on this. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97231 Signed-off-by: Ilia Mirkin Cc: mesa-sta...@lists.freedesktop.org --- src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 36 +++ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 7945184..6d223c0 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -629,19 +629,35 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_RING(ring, val); } - if (dirty & FD_DIRTY_SCISSOR) { + if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) { struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + int minx = scissor->minx; + int miny = scissor->miny; + int maxx = scissor->maxx; + int maxy = scissor->maxy; + + /* Unfortunately there is no separate depth clip disable, only an all +* or nothing deal. So when we disable clipping, we must handle the +* viewport clip via scissors. +*/ + if (!ctx->rasterizer->depth_clip) { + struct pipe_viewport_state *vp = &ctx->viewport; + minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0]))); + miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1]))); + maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0]))); + maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1]))); + } OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); - OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | - A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); - OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | - A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); - - ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); - ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); - ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); - ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) | + A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny)); + OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) | + A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1)); + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, maxy); } if (dirty & FD_DIRTY_VIEWPORT) { -- 2.7.3 ___ Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno