Just wanted to say I love this set of patches...  I always hoped that
someone would be able to find a CPU-friendly path through triangle
setup, it looks like you're definitely on the right track...

Keith

On Sun, 2009-01-04 at 19:52 -0800, Jonathan Adamczewski wrote:
> Put setup.v{min,mid,max,provoke} into a union with qword vertex_headers.
> 
> Rewrite vertex sorting to more efficiently handle the packed data items.
> 
> Reduces spu_tri.o by ~128 bytes.
> ---
>  src/gallium/drivers/cell/spu/spu_tri.c |   97 
> ++++++++++++++------------------
>  1 files changed, 42 insertions(+), 55 deletions(-)
> 
> diff --git a/src/gallium/drivers/cell/spu/spu_tri.c 
> b/src/gallium/drivers/cell/spu/spu_tri.c
> index 30531d3..c7ff75c 100644
> --- a/src/gallium/drivers/cell/spu/spu_tri.c
> +++ b/src/gallium/drivers/cell/spu/spu_tri.c
> @@ -103,10 +103,15 @@ struct setup_stage {
>      * turn.  Currently fixed at 4 floats, but should change in time.
>      * Codegen will help cope with this.
>      */
> -   const struct vertex_header *vmax;
> -   const struct vertex_header *vmid;
> -   const struct vertex_header *vmin;
> -   const struct vertex_header *vprovoke;
> +   union {
> +      struct {
> +         const struct vertex_header *vmin;
> +         const struct vertex_header *vmid;
> +         const struct vertex_header *vmax;
> +         const struct vertex_header *vprovoke;
> +      };
> +      qword vertex_headers;
> +   };
>  
>     struct edge ebot;
>     struct edge etop;
> @@ -452,55 +457,39 @@ setup_sort_vertices(const struct vertex_header *v0,
>  
>     /* determine bottom to top order of vertices */
>     {
> -      float y0 = spu_extract(v0->data[0], 1);
> -      float y1 = spu_extract(v1->data[0], 1);
> -      float y2 = spu_extract(v2->data[0], 1);
> -      if (y0 <= y1) {
> -      if (y1 <= y2) {
> -         /* y0<=y1<=y2 */
> -         setup.vmin = v0;   
> -         setup.vmid = v1;   
> -         setup.vmax = v2;
> -            sign = -1.0f;
> -      }
> -      else if (y2 <= y0) {
> -         /* y2<=y0<=y1 */
> -         setup.vmin = v2;   
> -         setup.vmid = v0;   
> -         setup.vmax = v1;   
> -            sign = -1.0f;
> -      }
> -      else {
> -         /* y0<=y2<=y1 */
> -         setup.vmin = v0;   
> -         setup.vmid = v2;   
> -         setup.vmax = v1;  
> -            sign = 1.0f;
> -      }
> -      }
> -      else {
> -      if (y0 <= y2) {
> -         /* y1<=y0<=y2 */
> -         setup.vmin = v1;   
> -         setup.vmid = v0;   
> -         setup.vmax = v2;  
> -            sign = 1.0f;
> -      }
> -      else if (y2 <= y1) {
> -         /* y2<=y1<=y0 */
> -         setup.vmin = v2;   
> -         setup.vmid = v1;   
> -         setup.vmax = v0;  
> -            sign = 1.0f;
> -      }
> -      else {
> -         /* y1<=y2<=y0 */
> -         setup.vmin = v1;   
> -         setup.vmid = v2;   
> -         setup.vmax = v0;
> -            sign = -1.0f;
> -      }
> -      }
> +      /* A table of shuffle patterns for putting vertex_header pointers into
> +         correct order.  Quite magical. */
> +      const vec_uchar16 sort_order_patterns[] = {
> +         SHUFFLE4(A,B,C,C),
> +         SHUFFLE4(C,A,B,C),
> +         SHUFFLE4(A,C,B,C),
> +         SHUFFLE4(B,C,A,C),
> +         SHUFFLE4(B,A,C,C),
> +         SHUFFLE4(C,B,A,C) };
> +
> +      /* The vertex_header pointers, packed for easy shuffling later */
> +      const vec_uint4 vs = {(unsigned)v0, (unsigned)v1, (unsigned)v2};
> +
> +      /* Collate y values into two vectors for comparison.
> +         Using only one shuffle constant! ;) */
> +      const vec_float4 y_02_ = spu_shuffle(v0->data[0], v2->data[0], 
> SHUFFLE4(0,B,b,C));
> +      const vec_float4 y_10_ = spu_shuffle(v1->data[0], v0->data[0], 
> SHUFFLE4(0,B,b,C));
> +      const vec_float4 y_012 = spu_shuffle(y_02_, v1->data[0], 
> SHUFFLE4(0,B,b,C));
> +      const vec_float4 y_120 = spu_shuffle(y_10_, v2->data[0], 
> SHUFFLE4(0,B,b,C));
> +
> +      /* Perform comparison: {y0,y1,y2} > {y1,y2,y0} */
> +      const vec_uint4 compare = spu_cmpgt(y_012, y_120);
> +      /* Compress the result of the comparison into 4 bits */
> +      const vec_uint4 gather = spu_gather(compare);
> +      /* Subtract one to attain the index into the LUT.  Magical. */
> +      const unsigned int index = spu_extract(gather, 0) - 1;
> +
> +      /* Load the appropriate pattern and construct the desired vector. */
> +      setup.vertex_headers = (qword)spu_shuffle(vs, vs, 
> sort_order_patterns[index]);
> +
> +      /* Using the result of the comparison, set sign.
> +         Very magical. */
> +      sign = ((si_to_uint(si_cntb((qword)gather)) == 2) ? 1.0f : -1.0f);
>     }
>  
>     /* Check if triangle is completely outside the tile bounds */
> @@ -543,8 +532,6 @@ setup_sort_vertices(const struct vertex_header *v0,
>     setup.facing = (area * sign > 0.0f)
>        ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW);
>  
> -   setup.vprovoke = v2;
> -
>     return TRUE;
>  }
>  


------------------------------------------------------------------------------
_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to