Re: Post-processing hook for vertex setup code

Felix Kühling Mon, 20 Dec 2004 08:10:07 -0800

Hi Keith,

I'm attaching my current solution for the Savage driver. I'm going to
commit this later today. It doesn't need any modifications of the common
TNL code. It is probably not the most efficient solution though, since
it requires an indirect function call for each emitted vertex. That
said, I havn't noticed any performance regressions which may be because
the Savage hardware is quite slow in relation to my CPU (mobile Athlon
XP 2000+).

Also see my comments below ...

Am Sa, den 18.12.2004 schrieb Keith Whitwell um 0:37:
> Felix Kühling wrote:
> > Am Fr, den 17.12.2004 schrieb Keith Whitwell um 22:59:
[snip]
> >>Secondly, is the obvious counter-concern -- what happens with clipping? 
> >>  The 'post processing' probably needs to be undone so that clipping can 
> >>proceed, then be re-done on the clipped vertices, right?
> > 
> > 
> > Right. But that would have been broken with t_dd_vbtmp.h too. ;-)
> 
> No, t_dd_vbtmp.h *does* undo the projection, look around line 534.

Ok, sorry. I missed that detail. Though I do have a question about this
code:

               rqdst = 1.0 / qdst;
               dst->v.u0 *= rqdst;
               dst->v.v0 *= rqdst;
               dst->v.w *= rqdst;

Shouldn't the last line say:

               dst->v.w *= qdst;

I don't claim to understand the math behind this completely, but that
would be the analogue thing to the code around line 277.

[ ... your other reply ... ]

> I can think of the i810 and mga which both have this projective texture 
> issue *and* have the fast path (in i810render.c and mga_render.c 
> respectively).  It (used to be?) a worthwhile optimization.

I didn't know about the i810 driver. But in the MGA driver the render
stage is disabled. AFAICT it has been since the transition to Mesa 4.
Anyway, my solution is very driver-specific. Whoever is going to port
this to i810 will have to deal with the fallback case to the
_tnl_render_stage.

I'd like to implement a render stage for the Savage driver at some
point. This way we could reduce the number of vertices emitted to the
hardware by using triangle strips and fans where appropriate. It would
also minimize the impact of indirect function calls per vertex.

> 
> Keith

Regards,
  Felix

-- 
| Felix Kühling <[EMAIL PROTECTED]>                     http://fxk.de.vu |
| PGP Fingerprint: 6A3C 9566 5B30 DDED 73C3  B152 151C 5CC1 D888 E595 |

--- ./savagedma.c.~1.4.~	2004-12-15 16:37:19.000000000 +0100
+++ ./savagedma.c	2004-12-17 21:35:56.000000000 +0100
@@ -312,8 +312,8 @@
 };
 
 void savageFakeVertices (savageContextPtr imesa, drmBufPtr buffer) {
-    GLuint vertexStride = imesa->vertex_size; /* stride in dwords */
-    GLuint vertexSize = imesa->vertex_size; /* the real vertex size in dwords */
+    GLuint vertexStride = imesa->HwVertexSize; /* stride in dwords */
+    GLuint vertexSize = imesa->HwVertexSize; /* the real vertex size in dwords */
     GLuint nVertices = buffer->used / (vertexStride*4);
     u_int32_t *data = (u_int32_t*)buffer->address;
     u_int32_t vertexFormat = imesa->DrawPrimitiveCmd & SAVAGE_HW_SKIPFLAGS;
--- ./savagecontext.h.~1.11.~	2004-12-17 16:06:50.000000000 +0100
+++ ./savagecontext.h	2004-12-18 01:28:44.000000000 +0100
@@ -84,6 +84,8 @@
 typedef void (*savage_line_func)( savageContextPtr,
 				  savageVertex *, savageVertex * );
 typedef void (*savage_point_func)( savageContextPtr, savageVertex * );
+typedef void (*savage_emit_vert_func)( u_int32_t *vb, GLuint vertex_size,
+				       GLuint start, savageVertexPtr v );
 
 
 /**************************************************************
@@ -179,12 +181,14 @@
    GLenum render_primitive;
 
    GLuint DrawPrimitiveCmd;
+   GLuint HwVertexSize;
 
    /* Fallback rasterization functions 
     */
    savage_point_func draw_point;
    savage_line_func draw_line;
    savage_tri_func draw_tri;
+   savage_emit_vert_func emit_vert;
 
     /* Funny mesa mirrors
      */
--- ./savagetris.c.~1.16.~	2004-12-17 16:34:52.000000000 +0100
+++ ./savagetris.c	2004-12-18 16:09:09.000000000 +0100
@@ -76,36 +76,82 @@
  *                    Emit primitives                                  *
  ***********************************************************************/
 
+#if 0
+
 #if defined (USE_X86_ASM)
-#define EMIT_VERT( j, vb, vertex_size, start, v )	\
+#define EMIT_VERT( vb, vertex_size, start, v )	\
 do {	int __tmp;					\
         vb += start;					\
 	__asm__ __volatile__( "rep ; movsl"		\
-			 : "=%c" (j), "=D" (vb), "=S" (__tmp)		\
-			 : "0" (vertex_size-start),	\
+			 : "=D" (vb), "=S" (__tmp)		\
+			 : "c" (vertex_size-start),	\
 			   "D" ((long)vb), 		\
 			   "S" ((long)&v->ui[start]));	\
 } while (0)
 #else
-#define EMIT_VERT( j, vb, vertex_size, start, v )	\
+#define EMIT_VERT( vb, vertex_size, start, v )	\
 do {						\
+   GLuint j;					\
    for ( j = start ; j < vertex_size ; j++ )	\
       vb[j] = (v)->ui[j];			\
    vb += vertex_size;				\
 } while (0)
 #endif
 
+#else
+
+#define EMIT_VERT( vb, vertex_size, start, v )		\
+do {							\
+   imesa->emit_vert( vb, vertex_size, start, v );	\
+   vb += vertex_size;					\
+} while (0)
+
+#endif
+
+#if defined (USE_X86_ASM)
+static __inline__ void savage_emit_vert (u_int32_t *vb, GLuint vertex_size,
+					 GLuint start, savageVertexPtr v) {
+   int __tmp;
+   vb += start;
+   __asm__ __volatile__( "rep ; movsl"
+			 : "=D" (vb), "=S" (__tmp)
+			 : "c" (vertex_size-start),
+			 "D" ((long)vb),
+			 "S" ((long)&v->ui[start]));
+}
+#else
+static __inline__ void savage_emit_vert (u_int32_t *vb, GLuint vertex_size,
+					 GLuint start, savageVertexPtr v) {
+   GLuint j;
+   for ( j = start ; j < vertex_size ; j++ )
+      vb[j] = v->ui[j];
+}
+#endif
+
+/* Emit vertices fixing up vertices with homogenous texture coordinates. */
+static void savage_emit_vert_ptex (u_int32_t *vb, GLuint vertex_size,
+				   GLuint start, savageVertexPtr v) {
+   GLfloat rhw = 1.0 / v->f[vertex_size];
+   savageVertex tmp;
+   GLuint j;
+   for ( j = start ; j < vertex_size ; j++ )
+      tmp.f[j] = v->f[j];
+   tmp.f[3] *= v->f[vertex_size];
+   tmp.f[vertex_size-2] *= rhw;
+   tmp.f[vertex_size-1] *= rhw;
+   savage_emit_vert (vb, vertex_size, start, &tmp);
+}
+
 static void __inline__ savage_draw_triangle (savageContextPtr imesa,
 					     savageVertexPtr v0,
 					     savageVertexPtr v1,
 					     savageVertexPtr v2) {
-   GLuint vertsize = imesa->vertex_size;
+   GLuint vertsize = imesa->HwVertexSize;
    u_int32_t *vb = savageAllocDmaLow (imesa, 3*4*vertsize);
-   GLuint j;
 
-   EMIT_VERT (j, vb, vertsize, 0, v0);
-   EMIT_VERT (j, vb, vertsize, 0, v1);
-   EMIT_VERT (j, vb, vertsize, 0, v2);
+   EMIT_VERT (vb, vertsize, 0, v0);
+   EMIT_VERT (vb, vertsize, 0, v1);
+   EMIT_VERT (vb, vertsize, 0, v2);
 }
 
 static void __inline__ savage_draw_quad (savageContextPtr imesa,
@@ -113,60 +159,57 @@
 					 savageVertexPtr v1,
 					 savageVertexPtr v2,
 					 savageVertexPtr v3) {
-   GLuint vertsize = imesa->vertex_size;
+   GLuint vertsize = imesa->HwVertexSize;
    u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize);
-   GLuint j;
 
-   EMIT_VERT (j, vb, vertsize, 0, v0);
-   EMIT_VERT (j, vb, vertsize, 0, v1);
-   EMIT_VERT (j, vb, vertsize, 0, v3);
-   EMIT_VERT (j, vb, vertsize, 0, v1);
-   EMIT_VERT (j, vb, vertsize, 0, v2);
-   EMIT_VERT (j, vb, vertsize, 0, v3);
+   EMIT_VERT (vb, vertsize, 0, v0);
+   EMIT_VERT (vb, vertsize, 0, v1);
+   EMIT_VERT (vb, vertsize, 0, v3);
+   EMIT_VERT (vb, vertsize, 0, v1);
+   EMIT_VERT (vb, vertsize, 0, v2);
+   EMIT_VERT (vb, vertsize, 0, v3);
 }
 
 static __inline__ void savage_draw_point (savageContextPtr imesa,
 					  savageVertexPtr tmp) {
-   GLuint vertsize = imesa->vertex_size;
+   GLuint vertsize = imesa->HwVertexSize;
    u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize);
    const GLfloat x = tmp->v.x;
    const GLfloat y = tmp->v.y;
    const GLfloat sz = imesa->glCtx->Point._Size * .5;
-   GLuint j;
 
    *(float *)&vb[0] = x - sz;
    *(float *)&vb[1] = y - sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 
    *(float *)&vb[0] = x + sz;
    *(float *)&vb[1] = y - sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 
    *(float *)&vb[0] = x + sz;
    *(float *)&vb[1] = y + sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 
    *(float *)&vb[0] = x + sz;
    *(float *)&vb[1] = y + sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 
    *(float *)&vb[0] = x - sz;
    *(float *)&vb[1] = y + sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 
    *(float *)&vb[0] = x - sz;
    *(float *)&vb[1] = y - sz;
-   EMIT_VERT (j, vb, vertsize, 2, tmp);
+   EMIT_VERT (vb, vertsize, 2, tmp);
 }
 
 static __inline__ void savage_draw_line (savageContextPtr imesa,
 					 savageVertexPtr v0,
 					 savageVertexPtr v1 ) {
-   GLuint vertsize = imesa->vertex_size;
+   GLuint vertsize = imesa->HwVertexSize;
    u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize);
    GLfloat width = imesa->glCtx->Line._Width;
    GLfloat dx, dy, ix, iy;
-   GLuint j;
 
    dx = v0->v.x - v1->v.x;
    dy = v0->v.y - v1->v.y;
@@ -178,27 +221,27 @@
 
    *(float *)&vb[0] = v0->v.x - ix;
    *(float *)&vb[1] = v0->v.y - iy;
-   EMIT_VERT (j, vb, vertsize, 2, v0);
+   EMIT_VERT (vb, vertsize, 2, v0);
 
    *(float *)&vb[0] = v1->v.x + ix;
    *(float *)&vb[1] = v1->v.y + iy;
-   EMIT_VERT (j, vb, vertsize, 2, v1);
+   EMIT_VERT (vb, vertsize, 2, v1);
 
    *(float *)&vb[0] = v0->v.x + ix;
    *(float *)&vb[1] = v0->v.y + iy;
-   EMIT_VERT (j, vb, vertsize, 2, v0);
+   EMIT_VERT (vb, vertsize, 2, v0);
 
    *(float *)&vb[0] = v0->v.x - ix;
    *(float *)&vb[1] = v0->v.y - iy;
-   EMIT_VERT (j, vb, vertsize, 2, v0);
+   EMIT_VERT (vb, vertsize, 2, v0);
 
    *(float *)&vb[0] = v1->v.x - ix;
    *(float *)&vb[1] = v1->v.y - iy;
-   EMIT_VERT (j, vb, vertsize, 2, v1);
+   EMIT_VERT (vb, vertsize, 2, v1);
 
    *(float *)&vb[0] = v1->v.x + ix;
    *(float *)&vb[1] = v1->v.y + iy;
-   EMIT_VERT (j, vb, vertsize, 2, v1);
+   EMIT_VERT (vb, vertsize, 2, v1);
 } 
  
 /***********************************************************************
@@ -724,17 +767,19 @@
    imesa->vertex_attr_count++;						\
 } while (0)
 
-#define SAVAGE_EMIT_XYZ 0x0001
-#define SAVAGE_EMIT_W   0x0002
-#define SAVAGE_EMIT_C0  0x0004
-#define SAVAGE_EMIT_C1  0x0008
-#define SAVAGE_EMIT_FOG 0x0010
-#define SAVAGE_EMIT_S0  0x0020
-#define SAVAGE_EMIT_T0  0x0040
-#define SAVAGE_EMIT_ST0 0x0060
-#define SAVAGE_EMIT_S1  0x0080
-#define SAVAGE_EMIT_T1  0x0100
-#define SAVAGE_EMIT_ST1 0x0180
+#define SAVAGE_EMIT_XYZ  0x0001
+#define SAVAGE_EMIT_W    0x0002
+#define SAVAGE_EMIT_C0   0x0004
+#define SAVAGE_EMIT_C1   0x0008
+#define SAVAGE_EMIT_FOG  0x0010
+#define SAVAGE_EMIT_S0   0x0020
+#define SAVAGE_EMIT_T0   0x0040
+#define SAVAGE_EMIT_Q0   0x0080
+#define SAVAGE_EMIT_ST0  0x0060
+#define SAVAGE_EMIT_STQ0 0x00e0
+#define SAVAGE_EMIT_S1   0x0100
+#define SAVAGE_EMIT_T1   0x0200
+#define SAVAGE_EMIT_ST1  0x0300
 
 
 static void savageRenderStart( GLcontext *ctx )
@@ -779,21 +824,23 @@
    }
 
    if (index & _TNL_BIT_TEX(0)) {
-      if (VB->TexCoordPtr[0]->size > 2) {
-	 /* projective textures are not supported by the hardware */
+      if (VB->TexCoordPtr[0]->size == 4 &&
+	  (index & _TNL_BITS_TEX_ANY) == _TNL_BIT_TEX(0))
+	 /* Projective textures are not supported by the hardware, but
+	  * we can fake them if only one tex unit is used. */
+	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW, SAVAGE_EMIT_STQ0, SAVAGE_HW_NO_UV0);
+      else if (VB->TexCoordPtr[0]->size > 2)
 	 FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
-      }
-      if (VB->TexCoordPtr[0]->size == 2)
+      else if (VB->TexCoordPtr[0]->size == 2)
 	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, SAVAGE_EMIT_ST0, SAVAGE_HW_NO_UV0 );
       else
 	 EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F, SAVAGE_EMIT_S0, SAVAGE_HW_NO_U0 );
    }
    if (index & _TNL_BIT_TEX(1)) {
-      if (VB->TexCoordPtr[1]->size > 2) {
-	 /* projective textures are not supported by the hardware */
+      if (VB->TexCoordPtr[1]->size > 2)
+	 /* Projective textures are not supported by the hardware */
 	 FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE);
-      }
-      if (VB->TexCoordPtr[1]->size == 2)
+      else if (VB->TexCoordPtr[1]->size == 2)
 	 EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, SAVAGE_EMIT_ST1, SAVAGE_HW_NO_UV1 );
       else
 	 EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_1F, SAVAGE_EMIT_S1, SAVAGE_HW_NO_U1 );
@@ -811,6 +858,18 @@
       imesa->SetupIndex = setupIndex;
 
       imesa->DrawPrimitiveCmd = drawCmd;
+      imesa->HwVertexSize = imesa->vertex_size;
+
+      if (setupIndex & SAVAGE_EMIT_Q0) {
+	 /* HACK: we have to deal with two different vertex sizes
+	  * now. The vertex setup code emits homogenous texture
+	  * coordinates. They are converted to normal 2D coords by
+	  * savage_emit_vert_ptex, since the hardware doesn't know
+	  * about homogenous tex coords. */
+	 imesa->emit_vert = savage_emit_vert_ptex;
+	 imesa->HwVertexSize--;
+      } else
+	 imesa->emit_vert = savage_emit_vert;
    }
 
    if (!SAVAGE_CONTEXT(ctx)->Fallback) {

Re: Post-processing hook for vertex setup code

Reply via email to