[Freedreno] [PATCH 1/3] a3xx: make sure to actually clamp depth as requested

2016-08-30 Thread Ilia Mirkin
We were previously ... not clamping. I guess this meant that everything
got clamped to 1/0, which was enough to pass the existing tests. Or
perhaps the clamping would only happen to the rasterized depth value and
not the frag shader's output depth value.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97231
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a3xx/a3xx.xml.h |  2 +-
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 30 ++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h 
b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index dcb6dfb..bf787d1 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -1472,7 +1472,7 @@ static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum 
adreno_compare_func val)
 {
return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & 
A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
 }
-#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE
0x0080
+#define A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE   0x0080
 #define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE0x8000
 
 #define REG_A3XX_RB_DEPTH_CLEAR
0x2101
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 3466a0d..e66836b 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -31,6 +31,7 @@
 #include "util/u_memory.h"
 #include "util/u_helpers.h"
 #include "util/u_format.h"
+#include "util/u_viewport.h"
 
 #include "freedreno_resource.h"
 #include "freedreno_query_hw.h"
@@ -535,7 +536,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer 
*ring,

A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
}
 
-   if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+   if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control;
if (fp->writes_pos) {
val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
@@ -544,6 +545,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer 
*ring,
if (fp->has_kill) {
val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
}
+   if (!ctx->rasterizer->depth_clip) {
+   val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE;
+   }
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, val);
}
@@ -647,6 +651,30 @@ fd3_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
OUT_RING(ring, 
A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
}
 
+   if (dirty & (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | 
FD_DIRTY_FRAMEBUFFER)) {
+   float zmin, zmax;
+   int depth = 24;
+   if (ctx->batch->framebuffer.zsbuf) {
+   depth = util_format_get_component_bits(
+   
pipe_surface_format(ctx->batch->framebuffer.zsbuf),
+   UTIL_FORMAT_COLORSPACE_ZS, 0);
+   }
+   util_viewport_zmin_zmax(&ctx->viewport, 
ctx->rasterizer->clip_halfz,
+   &zmin, &zmax);
+
+   OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2);
+   if (depth == 32) {
+   OUT_RING(ring, (uint32_t)(zmin * 0x));
+   OUT_RING(ring, (uint32_t)(zmax * 0x));
+   } else if (depth == 16) {
+   OUT_RING(ring, (uint32_t)(zmin * 0x));
+   OUT_RING(ring, (uint32_t)(zmax * 0x));
+   } else {
+   OUT_RING(ring, (uint32_t)(zmin * 0xff));
+   OUT_RING(ring, (uint32_t)(zmax * 0xff));
+   }
+   }
+
if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | 
FD_DIRTY_BLEND_DUAL)) {
struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer;
int nr_cbufs = pfb->nr_cbufs;
-- 
2.7.3

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno


[Freedreno] [PATCH 2/3] a3xx: make use of software clipping when hw can't handle it

2016-08-30 Thread Ilia Mirkin
The hw clipper only handles up to 6 UCPs. If there are more than 6 UCPs,
or a clip vertex, or clip distances are in use, then we must use the
fallback discard-based clipping from the frag shader.

Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c|  3 +++
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c| 12 
 src/gallium/drivers/freedreno/a3xx/fd3_program.c | 15 +++
 src/gallium/drivers/freedreno/a3xx/fd3_program.h |  3 +++
 src/gallium/drivers/freedreno/ir3/ir3_shader.c   |  6 ++
 src/gallium/drivers/freedreno/ir3/ir3_shader.h   |  1 +
 6 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index a1594b6..d26786f 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -156,6 +156,9 @@ fd3_draw_vbo(struct fd_context *ctx, const struct 
pipe_draw_info *info)
.sprite_coord_mode = ctx->rasterizer->sprite_coord_mode,
};
 
+   if (fd3_needs_manual_clipping(ctx->prog.vp, ctx->rasterizer))
+   emit.key.ucp_enables = ctx->rasterizer->clip_plane_enable;
+
fixup_shader_state(ctx, &emit.key);
 
unsigned dirty = ctx->dirty;
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index e66836b..7945184 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -571,20 +571,24 @@ fd3_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
->gras_cl_clip_cntl;
+   uint8_t planes = ctx->rasterizer->clip_plane_enable;
val |= COND(fp->writes_pos, 
A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
-   /* TODO only use if prog doesn't use clipvertex/clipdist */
-   val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
-   
MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6));
+   if (!emit->key.ucp_enables)
+   val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES(
+   MIN2(util_bitcount(planes), 6));
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
OUT_RING(ring, val);
}
 
-   if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+   if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) {
uint32_t planes = ctx->rasterizer->clip_plane_enable;
int count = 0;
 
+   if (emit->key.ucp_enables)
+   planes = 0;
+
while (planes && count < 6) {
int i = ffs(planes) - 1;
 
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 485a4da..3146dc5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_state.h"
 #include "util/u_string.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
 #include "util/u_format.h"
@@ -85,6 +86,20 @@ fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
delete_shader_stateobj(so);
 }
 
+bool
+fd3_needs_manual_clipping(const struct fd3_shader_stateobj *so,
+ const struct 
pipe_rasterizer_state *rast)
+{
+   uint64_t outputs = ir3_shader_outputs(so->shader);
+
+   return (!rast->depth_clip ||
+   util_bitcount(rast->clip_plane_enable) > 6 ||
+   outputs & ((1ULL << VARYING_SLOT_CLIP_VERTEX) |
+  (1ULL << VARYING_SLOT_CLIP_DIST0) |
+  (1ULL << VARYING_SLOT_CLIP_DIST1)));
+}
+
+
 static void
 emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
 {
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h 
b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
index b3fcc0c..b95df4c 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
@@ -44,4 +44,7 @@ void fd3_program_emit(struct fd_ringbuffer *ring, struct 
fd3_emit *emit,
 
 void fd3_prog_init(struct pipe_context *pctx);
 
+bool fd3_needs_manual_clipping(const struct fd3_shader_stateobj *,
+  const struct 
pipe_rasterizer_state *);
+
 #endif /* FD3_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c

[Freedreno] [PATCH 3/3] a3xx: use window scissor to simulate viewport xy clip

2016-08-30 Thread Ilia Mirkin
Unfortunately a3xx does not have a separate disable for depth clipping,
so when depth clamp is enabled, we disable the whole 3d clipper logic.
This in turn also gets rid of the xy clip that it would normally do.
When we detect this would happen, instead we integrate the viewport into
the window scissor. This may have slightly different behavior around
wide points, but it's unlikely that anything depends on this.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97231
Signed-off-by: Ilia Mirkin 
Cc: mesa-sta...@lists.freedesktop.org
---
 src/gallium/drivers/freedreno/a3xx/fd3_emit.c | 36 +++
 1 file changed, 26 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c 
b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
index 7945184..6d223c0 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -629,19 +629,35 @@ fd3_emit_state(struct fd_context *ctx, struct 
fd_ringbuffer *ring,
OUT_RING(ring, val);
}
 
-   if (dirty & FD_DIRTY_SCISSOR) {
+   if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | 
FD_DIRTY_VIEWPORT)) {
struct pipe_scissor_state *scissor = 
fd_context_get_scissor(ctx);
+   int minx = scissor->minx;
+   int miny = scissor->miny;
+   int maxx = scissor->maxx;
+   int maxy = scissor->maxy;
+
+   /* Unfortunately there is no separate depth clip disable, only 
an all
+* or nothing deal. So when we disable clipping, we must handle 
the
+* viewport clip via scissors.
+*/
+   if (!ctx->rasterizer->depth_clip) {
+   struct pipe_viewport_state *vp = &ctx->viewport;
+   minx = MAX2(minx, (int)floorf(vp->translate[0] - 
fabsf(vp->scale[0])));
+   miny = MAX2(miny, (int)floorf(vp->translate[1] - 
fabsf(vp->scale[1])));
+   maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + 
fabsf(vp->scale[0])));
+   maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + 
fabsf(vp->scale[1])));
+   }
 
OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
-   OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
-   
A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
-   OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 
1) |
-   A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy 
- 1));
-
-   ctx->batch->max_scissor.minx = 
MIN2(ctx->batch->max_scissor.minx, scissor->minx);
-   ctx->batch->max_scissor.miny = 
MIN2(ctx->batch->max_scissor.miny, scissor->miny);
-   ctx->batch->max_scissor.maxx = 
MAX2(ctx->batch->max_scissor.maxx, scissor->maxx);
-   ctx->batch->max_scissor.maxy = 
MAX2(ctx->batch->max_scissor.maxy, scissor->maxy);
+   OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) |
+   A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny));
+   OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) |
+   A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1));
+
+   ctx->batch->max_scissor.minx = 
MIN2(ctx->batch->max_scissor.minx, minx);
+   ctx->batch->max_scissor.miny = 
MIN2(ctx->batch->max_scissor.miny, miny);
+   ctx->batch->max_scissor.maxx = 
MAX2(ctx->batch->max_scissor.maxx, maxx);
+   ctx->batch->max_scissor.maxy = 
MAX2(ctx->batch->max_scissor.maxy, maxy);
}
 
if (dirty & FD_DIRTY_VIEWPORT) {
-- 
2.7.3

___
Freedreno mailing list
Freedreno@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/freedreno