Hi Keith, I'm attaching my current solution for the Savage driver. I'm going to commit this later today. It doesn't need any modifications of the common TNL code. It is probably not the most efficient solution though, since it requires an indirect function call for each emitted vertex. That said, I havn't noticed any performance regressions which may be because the Savage hardware is quite slow in relation to my CPU (mobile Athlon XP 2000+).
Also see my comments below ... Am Sa, den 18.12.2004 schrieb Keith Whitwell um 0:37: > Felix Kühling wrote: > > Am Fr, den 17.12.2004 schrieb Keith Whitwell um 22:59: [snip] > >>Secondly, is the obvious counter-concern -- what happens with clipping? > >> The 'post processing' probably needs to be undone so that clipping can > >>proceed, then be re-done on the clipped vertices, right? > > > > > > Right. But that would have been broken with t_dd_vbtmp.h too. ;-) > > No, t_dd_vbtmp.h *does* undo the projection, look around line 534. Ok, sorry. I missed that detail. Though I do have a question about this code: rqdst = 1.0 / qdst; dst->v.u0 *= rqdst; dst->v.v0 *= rqdst; dst->v.w *= rqdst; Shouldn't the last line say: dst->v.w *= qdst; I don't claim to understand the math behind this completely, but that would be the analogue thing to the code around line 277. [ ... your other reply ... ] > I can think of the i810 and mga which both have this projective texture > issue *and* have the fast path (in i810render.c and mga_render.c > respectively). It (used to be?) a worthwhile optimization. I didn't know about the i810 driver. But in the MGA driver the render stage is disabled. AFAICT it has been since the transition to Mesa 4. Anyway, my solution is very driver-specific. Whoever is going to port this to i810 will have to deal with the fallback case to the _tnl_render_stage. I'd like to implement a render stage for the Savage driver at some point. This way we could reduce the number of vertices emitted to the hardware by using triangle strips and fans where appropriate. It would also minimize the impact of indirect function calls per vertex. > > Keith Regards, Felix -- | Felix Kühling <[EMAIL PROTECTED]> http://fxk.de.vu | | PGP Fingerprint: 6A3C 9566 5B30 DDED 73C3 B152 151C 5CC1 D888 E595 |
--- ./savagedma.c.~1.4.~ 2004-12-15 16:37:19.000000000 +0100 +++ ./savagedma.c 2004-12-17 21:35:56.000000000 +0100 @@ -312,8 +312,8 @@ }; void savageFakeVertices (savageContextPtr imesa, drmBufPtr buffer) { - GLuint vertexStride = imesa->vertex_size; /* stride in dwords */ - GLuint vertexSize = imesa->vertex_size; /* the real vertex size in dwords */ + GLuint vertexStride = imesa->HwVertexSize; /* stride in dwords */ + GLuint vertexSize = imesa->HwVertexSize; /* the real vertex size in dwords */ GLuint nVertices = buffer->used / (vertexStride*4); u_int32_t *data = (u_int32_t*)buffer->address; u_int32_t vertexFormat = imesa->DrawPrimitiveCmd & SAVAGE_HW_SKIPFLAGS; --- ./savagecontext.h.~1.11.~ 2004-12-17 16:06:50.000000000 +0100 +++ ./savagecontext.h 2004-12-18 01:28:44.000000000 +0100 @@ -84,6 +84,8 @@ typedef void (*savage_line_func)( savageContextPtr, savageVertex *, savageVertex * ); typedef void (*savage_point_func)( savageContextPtr, savageVertex * ); +typedef void (*savage_emit_vert_func)( u_int32_t *vb, GLuint vertex_size, + GLuint start, savageVertexPtr v ); /************************************************************** @@ -179,12 +181,14 @@ GLenum render_primitive; GLuint DrawPrimitiveCmd; + GLuint HwVertexSize; /* Fallback rasterization functions */ savage_point_func draw_point; savage_line_func draw_line; savage_tri_func draw_tri; + savage_emit_vert_func emit_vert; /* Funny mesa mirrors */ --- ./savagetris.c.~1.16.~ 2004-12-17 16:34:52.000000000 +0100 +++ ./savagetris.c 2004-12-18 16:09:09.000000000 +0100 @@ -76,36 +76,82 @@ * Emit primitives * ***********************************************************************/ +#if 0 + #if defined (USE_X86_ASM) -#define EMIT_VERT( j, vb, vertex_size, start, v ) \ +#define EMIT_VERT( vb, vertex_size, start, v ) \ do { int __tmp; \ vb += start; \ __asm__ __volatile__( "rep ; movsl" \ - : "=%c" (j), "=D" (vb), "=S" (__tmp) \ - : "0" (vertex_size-start), \ + : "=D" (vb), "=S" (__tmp) \ + : "c" (vertex_size-start), \ "D" ((long)vb), \ "S" ((long)&v->ui[start])); \ } while (0) #else -#define EMIT_VERT( j, vb, vertex_size, start, v ) \ +#define EMIT_VERT( vb, vertex_size, start, v ) \ do { \ + GLuint j; \ for ( j = start ; j < vertex_size ; j++ ) \ vb[j] = (v)->ui[j]; \ vb += vertex_size; \ } while (0) #endif +#else + +#define EMIT_VERT( vb, vertex_size, start, v ) \ +do { \ + imesa->emit_vert( vb, vertex_size, start, v ); \ + vb += vertex_size; \ +} while (0) + +#endif + +#if defined (USE_X86_ASM) +static __inline__ void savage_emit_vert (u_int32_t *vb, GLuint vertex_size, + GLuint start, savageVertexPtr v) { + int __tmp; + vb += start; + __asm__ __volatile__( "rep ; movsl" + : "=D" (vb), "=S" (__tmp) + : "c" (vertex_size-start), + "D" ((long)vb), + "S" ((long)&v->ui[start])); +} +#else +static __inline__ void savage_emit_vert (u_int32_t *vb, GLuint vertex_size, + GLuint start, savageVertexPtr v) { + GLuint j; + for ( j = start ; j < vertex_size ; j++ ) + vb[j] = v->ui[j]; +} +#endif + +/* Emit vertices fixing up vertices with homogenous texture coordinates. */ +static void savage_emit_vert_ptex (u_int32_t *vb, GLuint vertex_size, + GLuint start, savageVertexPtr v) { + GLfloat rhw = 1.0 / v->f[vertex_size]; + savageVertex tmp; + GLuint j; + for ( j = start ; j < vertex_size ; j++ ) + tmp.f[j] = v->f[j]; + tmp.f[3] *= v->f[vertex_size]; + tmp.f[vertex_size-2] *= rhw; + tmp.f[vertex_size-1] *= rhw; + savage_emit_vert (vb, vertex_size, start, &tmp); +} + static void __inline__ savage_draw_triangle (savageContextPtr imesa, savageVertexPtr v0, savageVertexPtr v1, savageVertexPtr v2) { - GLuint vertsize = imesa->vertex_size; + GLuint vertsize = imesa->HwVertexSize; u_int32_t *vb = savageAllocDmaLow (imesa, 3*4*vertsize); - GLuint j; - EMIT_VERT (j, vb, vertsize, 0, v0); - EMIT_VERT (j, vb, vertsize, 0, v1); - EMIT_VERT (j, vb, vertsize, 0, v2); + EMIT_VERT (vb, vertsize, 0, v0); + EMIT_VERT (vb, vertsize, 0, v1); + EMIT_VERT (vb, vertsize, 0, v2); } static void __inline__ savage_draw_quad (savageContextPtr imesa, @@ -113,60 +159,57 @@ savageVertexPtr v1, savageVertexPtr v2, savageVertexPtr v3) { - GLuint vertsize = imesa->vertex_size; + GLuint vertsize = imesa->HwVertexSize; u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize); - GLuint j; - EMIT_VERT (j, vb, vertsize, 0, v0); - EMIT_VERT (j, vb, vertsize, 0, v1); - EMIT_VERT (j, vb, vertsize, 0, v3); - EMIT_VERT (j, vb, vertsize, 0, v1); - EMIT_VERT (j, vb, vertsize, 0, v2); - EMIT_VERT (j, vb, vertsize, 0, v3); + EMIT_VERT (vb, vertsize, 0, v0); + EMIT_VERT (vb, vertsize, 0, v1); + EMIT_VERT (vb, vertsize, 0, v3); + EMIT_VERT (vb, vertsize, 0, v1); + EMIT_VERT (vb, vertsize, 0, v2); + EMIT_VERT (vb, vertsize, 0, v3); } static __inline__ void savage_draw_point (savageContextPtr imesa, savageVertexPtr tmp) { - GLuint vertsize = imesa->vertex_size; + GLuint vertsize = imesa->HwVertexSize; u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize); const GLfloat x = tmp->v.x; const GLfloat y = tmp->v.y; const GLfloat sz = imesa->glCtx->Point._Size * .5; - GLuint j; *(float *)&vb[0] = x - sz; *(float *)&vb[1] = y - sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); *(float *)&vb[0] = x + sz; *(float *)&vb[1] = y - sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); *(float *)&vb[0] = x + sz; *(float *)&vb[1] = y + sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); *(float *)&vb[0] = x + sz; *(float *)&vb[1] = y + sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); *(float *)&vb[0] = x - sz; *(float *)&vb[1] = y + sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); *(float *)&vb[0] = x - sz; *(float *)&vb[1] = y - sz; - EMIT_VERT (j, vb, vertsize, 2, tmp); + EMIT_VERT (vb, vertsize, 2, tmp); } static __inline__ void savage_draw_line (savageContextPtr imesa, savageVertexPtr v0, savageVertexPtr v1 ) { - GLuint vertsize = imesa->vertex_size; + GLuint vertsize = imesa->HwVertexSize; u_int32_t *vb = savageAllocDmaLow (imesa, 6*4*vertsize); GLfloat width = imesa->glCtx->Line._Width; GLfloat dx, dy, ix, iy; - GLuint j; dx = v0->v.x - v1->v.x; dy = v0->v.y - v1->v.y; @@ -178,27 +221,27 @@ *(float *)&vb[0] = v0->v.x - ix; *(float *)&vb[1] = v0->v.y - iy; - EMIT_VERT (j, vb, vertsize, 2, v0); + EMIT_VERT (vb, vertsize, 2, v0); *(float *)&vb[0] = v1->v.x + ix; *(float *)&vb[1] = v1->v.y + iy; - EMIT_VERT (j, vb, vertsize, 2, v1); + EMIT_VERT (vb, vertsize, 2, v1); *(float *)&vb[0] = v0->v.x + ix; *(float *)&vb[1] = v0->v.y + iy; - EMIT_VERT (j, vb, vertsize, 2, v0); + EMIT_VERT (vb, vertsize, 2, v0); *(float *)&vb[0] = v0->v.x - ix; *(float *)&vb[1] = v0->v.y - iy; - EMIT_VERT (j, vb, vertsize, 2, v0); + EMIT_VERT (vb, vertsize, 2, v0); *(float *)&vb[0] = v1->v.x - ix; *(float *)&vb[1] = v1->v.y - iy; - EMIT_VERT (j, vb, vertsize, 2, v1); + EMIT_VERT (vb, vertsize, 2, v1); *(float *)&vb[0] = v1->v.x + ix; *(float *)&vb[1] = v1->v.y + iy; - EMIT_VERT (j, vb, vertsize, 2, v1); + EMIT_VERT (vb, vertsize, 2, v1); } /*********************************************************************** @@ -724,17 +767,19 @@ imesa->vertex_attr_count++; \ } while (0) -#define SAVAGE_EMIT_XYZ 0x0001 -#define SAVAGE_EMIT_W 0x0002 -#define SAVAGE_EMIT_C0 0x0004 -#define SAVAGE_EMIT_C1 0x0008 -#define SAVAGE_EMIT_FOG 0x0010 -#define SAVAGE_EMIT_S0 0x0020 -#define SAVAGE_EMIT_T0 0x0040 -#define SAVAGE_EMIT_ST0 0x0060 -#define SAVAGE_EMIT_S1 0x0080 -#define SAVAGE_EMIT_T1 0x0100 -#define SAVAGE_EMIT_ST1 0x0180 +#define SAVAGE_EMIT_XYZ 0x0001 +#define SAVAGE_EMIT_W 0x0002 +#define SAVAGE_EMIT_C0 0x0004 +#define SAVAGE_EMIT_C1 0x0008 +#define SAVAGE_EMIT_FOG 0x0010 +#define SAVAGE_EMIT_S0 0x0020 +#define SAVAGE_EMIT_T0 0x0040 +#define SAVAGE_EMIT_Q0 0x0080 +#define SAVAGE_EMIT_ST0 0x0060 +#define SAVAGE_EMIT_STQ0 0x00e0 +#define SAVAGE_EMIT_S1 0x0100 +#define SAVAGE_EMIT_T1 0x0200 +#define SAVAGE_EMIT_ST1 0x0300 static void savageRenderStart( GLcontext *ctx ) @@ -779,21 +824,23 @@ } if (index & _TNL_BIT_TEX(0)) { - if (VB->TexCoordPtr[0]->size > 2) { - /* projective textures are not supported by the hardware */ + if (VB->TexCoordPtr[0]->size == 4 && + (index & _TNL_BITS_TEX_ANY) == _TNL_BIT_TEX(0)) + /* Projective textures are not supported by the hardware, but + * we can fake them if only one tex unit is used. */ + EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_3F_XYW, SAVAGE_EMIT_STQ0, SAVAGE_HW_NO_UV0); + else if (VB->TexCoordPtr[0]->size > 2) FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE); - } - if (VB->TexCoordPtr[0]->size == 2) + else if (VB->TexCoordPtr[0]->size == 2) EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_2F, SAVAGE_EMIT_ST0, SAVAGE_HW_NO_UV0 ); else EMIT_ATTR( _TNL_ATTRIB_TEX0, EMIT_1F, SAVAGE_EMIT_S0, SAVAGE_HW_NO_U0 ); } if (index & _TNL_BIT_TEX(1)) { - if (VB->TexCoordPtr[1]->size > 2) { - /* projective textures are not supported by the hardware */ + if (VB->TexCoordPtr[1]->size > 2) + /* Projective textures are not supported by the hardware */ FALLBACK(ctx, SAVAGE_FALLBACK_PROJ_TEXTURE, GL_TRUE); - } - if (VB->TexCoordPtr[1]->size == 2) + else if (VB->TexCoordPtr[1]->size == 2) EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_2F, SAVAGE_EMIT_ST1, SAVAGE_HW_NO_UV1 ); else EMIT_ATTR( _TNL_ATTRIB_TEX1, EMIT_1F, SAVAGE_EMIT_S1, SAVAGE_HW_NO_U1 ); @@ -811,6 +858,18 @@ imesa->SetupIndex = setupIndex; imesa->DrawPrimitiveCmd = drawCmd; + imesa->HwVertexSize = imesa->vertex_size; + + if (setupIndex & SAVAGE_EMIT_Q0) { + /* HACK: we have to deal with two different vertex sizes + * now. The vertex setup code emits homogenous texture + * coordinates. They are converted to normal 2D coords by + * savage_emit_vert_ptex, since the hardware doesn't know + * about homogenous tex coords. */ + imesa->emit_vert = savage_emit_vert_ptex; + imesa->HwVertexSize--; + } else + imesa->emit_vert = savage_emit_vert; } if (!SAVAGE_CONTEXT(ctx)->Fallback) {