here is a patch that works at least for multiarb.c
It is against HEAD from 19 June 2003
(I cleaned it up a bit but its not ready for merge: 
still some questions...)

1) could someone with an 8MB or 16MB Radeon check if the
resulting max_texturesize is big enough?
(just use mesa's glxinfo: glxinfo -l)

2) could someone try it out with a game/demo that
makes use of the 3rd TMU?

3) could someone with knowledge about the vfmt and codegen
stuff have a look on it? especially whether we need those
dummys and what should be done in the fast-path and
with vertex3f.

best regards,
Andreas
diff -ru trunk_20030619/xc/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h tex3_20030619/xc/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h
--- trunk_20030619/xc/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h	Fri Apr  4 19:31:02 2003
+++ tex3_20030619/xc/xc/extras/Mesa/src/tnl_dd/t_dd_vbtmp.h	Tue Jun 24 23:39:04 2003
@@ -381,6 +381,7 @@
 		     v->pv.q0 = 1.0;
 
 		  v->pv.q1 = 0;	/* radeon */
+		  v->pv.q2 = 0; /* should we do it this way or the other below?  */
 	       } 
 	       else if (tc0_size == 4) {
 		  float rhw = 1.0 / tc0[i][3];
@@ -390,6 +391,9 @@
 	       } 
 	    } 
 	 }
+	 else if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	    v->pv.q0 = 0; /* do we need this, too, for radeon ? */
+	 }
 	 if (DO_TEX1) {
 	    if (DO_PTEX) {
 	       v->pv.u1 = tc1[i][0];
@@ -403,6 +407,43 @@
 	       v->v.u1 = tc1[i][0];
 	       v->v.v1 = tc1[i][1];
 	    }
+	 }
+	 else if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	    v->pv.q1 = 0; /* do we need this, too, for the radeon ? */
+	 }
+	 if (DO_TEX2) {
+	    if (DO_PTEX) {
+	       v->pv.u2 = tc2[i][0];
+	       v->pv.v2 = tc2[i][1];
+	       if (tc2_size == 4) 
+		  v->pv.q2 = tc2[i][3];
+	       else
+		  v->pv.q2 = 1.0;
+	    } 
+	    else {
+	       v->v.u2 = tc2[i][0];
+	       v->v.v2 = tc2[i][1];
+	    }
+	 }
+	 else if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	    v->pv.q2 = 0; /* do we need this, too, for the radeon ? */
+	 }
+	 if (DO_TEX3) {
+	    if (DO_PTEX) {
+	       v->pv.u3 = tc3[i][0];
+	       v->pv.v3 = tc3[i][1];
+	       if (tc3_size == 4) 
+		  v->pv.q3 = tc3[i][3];
+	       else
+		  v->pv.q3 = 1.0;
+	    } 
+	    else {
+	       v->v.u3 = tc3[i][0];
+	       v->v.v3 = tc3[i][1];
+	    }
+	 }
+	 else if (DO_PTEX && HAVE_PTEX_VERTICES) {
+	    v->pv.q3 = 0; /* do we need this, too, for the radeon ? */
 	 }
       }
    }
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c	Mon Nov 25 21:20:09 2002
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_compat.c	Tue Jun 24 23:56:46 2003
@@ -77,6 +77,7 @@
    radeon_context_regs_t *ctx = &sarea->ContextState;
    radeon_texture_regs_t *tex0 = &sarea->TexState[0];
    radeon_texture_regs_t *tex1 = &sarea->TexState[1];
+   radeon_texture_regs_t *tex2 = &sarea->TexState[2];
    int i;
    int *buf = state->cmd;
 
@@ -180,14 +181,25 @@
 	 tex1->pp_border_color = buf[i++];
 	 sarea->dirty |= RADEON_UPLOAD_TEX1;
 	 break;
+      case RADEON_EMIT_PP_TXFILTER_2:
+	 tex2->pp_txfilter = buf[i++];
+	 tex2->pp_txformat = buf[i++];
+	 tex2->pp_txoffset = buf[i++];
+	 tex2->pp_txcblend = buf[i++];
+	 tex2->pp_txablend = buf[i++];
+	 tex2->pp_tfactor = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX2;
+	 break;
+      case RADEON_EMIT_PP_BORDER_COLOR_2:
+	 tex2->pp_border_color = buf[i++];
+	 sarea->dirty |= RADEON_UPLOAD_TEX2;
+	 break;
 
       case RADEON_EMIT_SE_ZBIAS_FACTOR:
 	 i++;
 	 i++;
 	 break;
 
-      case RADEON_EMIT_PP_TXFILTER_2:
-      case RADEON_EMIT_PP_BORDER_COLOR_2:
       case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
       case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
       default:
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c	Wed Jun 11 00:06:16 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.c	Thu Jun 26 17:15:36 2003
@@ -301,7 +301,10 @@
     */
 
    ctx = rmesa->glCtx;
-   ctx->Const.MaxTextureUnits = 2;
+   if( getenv( "RADEON_NO_3RD_TMU"))
+      ctx->Const.MaxTextureUnits = 2;
+   else
+      ctx->Const.MaxTextureUnits = RADEON_MAX_TEXTURE_UNITS; /* 3 */
 
    driCalculateMaxTextureLevels( rmesa->texture_heaps,
 				 rmesa->nr_heaps,
@@ -314,6 +317,8 @@
 				 12,
 				 GL_FALSE );
 
+   /* FIXME: we should verify that we dont get limits below the minimum requirements of OpenGL */
+
    ctx->Const.MaxTextureMaxAnisotropy = 16.0;
 
    /* No wide points.
@@ -374,13 +379,15 @@
 
    _math_matrix_ctr( &rmesa->TexGenMatrix[0] );
    _math_matrix_ctr( &rmesa->TexGenMatrix[1] );
+   _math_matrix_ctr( &rmesa->TexGenMatrix[2] );
    _math_matrix_ctr( &rmesa->tmpmat );
    _math_matrix_set_identity( &rmesa->TexGenMatrix[0] );
    _math_matrix_set_identity( &rmesa->TexGenMatrix[1] );
+   _math_matrix_set_identity( &rmesa->TexGenMatrix[2] );
    _math_matrix_set_identity( &rmesa->tmpmat );
 
    driInitExtensions( ctx, card_extensions, GL_TRUE );
-   if( rmesa->dri.drmMinor >= 9 || getenv( "RADEON_RECTANGLE_FORCE_ENABLE")) /* FIXME! a.s. */
+   if( rmesa->dri.drmMinor >= 9)
       _mesa_enable_extension( ctx, "GL_NV_texture_rectangle");
    radeonInitDriverFuncs( ctx );
    radeonInitIoctlFuncs( ctx );
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h	Wed Jun 11 00:06:16 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_context.h	Thu Jun 26 17:22:54 2003
@@ -129,7 +129,8 @@
 
 #define TEX_0   0x1
 #define TEX_1   0x2
-#define TEX_ALL 0x3
+#define TEX_2   0x4
+#define TEX_ALL 0x7
 
 typedef struct radeon_tex_obj radeonTexObj, *radeonTexObjPtr;
 
@@ -260,8 +261,8 @@
 #define TEX_STATE_SIZE              9
 
 #define TXR_CMD_0                   0 /* rectangle textures */
-#define TXR_PP_TEX_SIZE             1 /* 0x1d04, 0x1d0c for NPOT! */
-#define TXR_PP_TEX_PITCH            2 /* 0x1d08, 0x1d10 for NPOT! */
+#define TXR_PP_TEX_SIZE             1 /* for NPOT */
+#define TXR_PP_TEX_PITCH            2 /* for NPOT */
 #define TXR_STATE_SIZE              3
 
 #define ZBS_CMD_0              0
@@ -417,17 +418,17 @@
    struct radeon_state_atom vpt;
    struct radeon_state_atom tcl;
    struct radeon_state_atom msc;
-   struct radeon_state_atom tex[2];
+   struct radeon_state_atom tex[3];  /* RADEON_MAX_TEXTURE_UNITS */
+   struct radeon_state_atom txr[3];  /* RADEON_MAX_TEXTURE_UNITS, for NPOT */
    struct radeon_state_atom zbs;
    struct radeon_state_atom mtl; 
-   struct radeon_state_atom mat[5]; 
+   struct radeon_state_atom mat[6]; /* 3 + RADEON_MAX_TEXTURE_UNITS */
    struct radeon_state_atom lit[8]; /* includes vec, scl commands */
    struct radeon_state_atom ucp[6];
    struct radeon_state_atom eye; /* eye pos */
    struct radeon_state_atom grd; /* guard band clipping */
    struct radeon_state_atom fog; 
    struct radeon_state_atom glt; 
-   struct radeon_state_atom txr[2]; /* for NPOT */
 };
 
 struct radeon_state {
@@ -635,30 +636,37 @@
    GLuint prim;
 };
 
+/* FIXME: do we really need add. 2 to prevent segfault if someone */
+/* specifies GL_TEXTURE3 (esp. for the codegen-path) ? */
+#define RADEON_MAX_VERTEX_SIZE 19    /* 17 + 2 */
+
 struct radeon_vbinfo {
    GLint counter, initial_counter;
    GLint *dmaptr;
    void (*notify)( void );
    GLint vertex_size;
 
-   /* A maximum total of 15 elements per vertex:  3 floats for position, 3
+   /* A maximum total of 17 elements per vertex:  3 floats for position, 3
     * floats for normal, 4 floats for color, 4 bytes for secondary color,
-    * 2 floats for each texture unit (4 floats total).
+    * 2 floats for each texture unit (6 floats total).
     * 
-    * As soon as the 3rd TMU is supported or cube maps (or 3D textures) are
+    * As soon as cube maps (or 3D textures) are
     * supported, this value will grow.
     * 
     * The position data is never actually stored here, so 3 elements could be
     * trimmed out of the buffer.
     */
-   union { float f; int i; radeon_color_t color; } vertex[15];
+   union { float f; int i; radeon_color_t color; } vertex[RADEON_MAX_VERTEX_SIZE];
 
    GLfloat *normalptr;
    GLfloat *floatcolorptr;
    radeon_color_t *colorptr;
    GLfloat *floatspecptr;
    radeon_color_t *specptr;
-   GLfloat *texcoordptr[2];
+   GLfloat *texcoordptr[4]; /* this should be RADEON_MAX_TEXTURE_UNITS but */
+   			    /* the extra one is needed in radeon_vtxfmt_c.c and */
+   			    /* in the codegen-path if someone specifies GL_TEXTURE3 */
+   			    /* maybe we should just use mesas MAX_TEXTURE_UNITS here */
 
    GLenum *prim;		/* &ctx->Driver.CurrentExecPrimitive */
    GLuint primflags;
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c	Wed Jun 11 00:06:17 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_ioctl.c	Tue Jun 24 23:14:40 2003
@@ -422,7 +422,7 @@
 
    cmd[0].i = 0;
    cmd[0].header.cmd_type = RADEON_CMD_PACKET3;
-   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16); /* FIXME: is this the right package? */
+   cmd[1].i = RADEON_CP_PACKET3_CNTL_BITBLT_MULTI | (5 << 16);
    cmd[2].i = (RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
 	       RADEON_GMC_DST_PITCH_OFFSET_CNTL |
 	       RADEON_GMC_BRUSH_NONE |
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c	Fri May  2 13:01:53 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_arrays.c	Thu Jun 19 14:02:42 2003
@@ -514,7 +514,7 @@
    }
 
    vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
-	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+	  ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
       
    if (inputs & VERT_BIT_TEX0) {
       if (!rmesa->tcl.tex[0].buf)
@@ -554,6 +554,25 @@
       component[nr++] = &rmesa->tcl.tex[1];
    }
 
+   if (inputs & VERT_BIT_TEX2) {
+      if (!rmesa->tcl.tex[2].buf)
+	 emit_tex_vector( ctx, 
+			  &(rmesa->tcl.tex[2]), 
+			  (char *)VB->TexCoordPtr[2]->data,
+			  VB->TexCoordPtr[2]->size,
+			  VB->TexCoordPtr[2]->stride,
+			  count );
+	 
+      switch( VB->TexCoordPtr[2]->size ) {
+      case 4: 
+	 vtx |= RADEON_TCL_VTX_Q2;
+	 vfmt |= RADEON_CP_VC_FRMT_Q2;
+      default: 
+	 vfmt |= RADEON_CP_VC_FRMT_ST2;
+      }
+      component[nr++] = &rmesa->tcl.tex[2];
+   }
+
    if (vtx != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT]) {
       RADEON_STATECHANGE( rmesa, tcl );
       rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] = vtx;
@@ -588,4 +607,7 @@
 
    if (newinputs & VERT_BIT_TEX1)
       radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[1], __FUNCTION__ );
+
+   if (newinputs & VERT_BIT_TEX2)
+      radeonReleaseDmaRegion( rmesa, &rmesa->tcl.tex[2], __FUNCTION__ );
 }
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h	Mon Nov 25 21:20:09 2002
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_vbtmp.h	Thu Jun 26 17:28:09 2003
@@ -78,14 +78,19 @@
    coord_stride = VB->ObjPtr->stride;
 
    if (DO_TEX2) {
-      const GLuint t2 = GET_TEXSOURCE(2);
-      tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
-      tc2_stride = VB->TexCoordPtr[t2]->stride;
-      if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
-	 if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
-	    VB->import_data( ctx, VERT_BIT_TEX2, VEC_NOT_WRITEABLE );
+      if (VB->TexCoordPtr[2]) {
+	 const GLuint t2 = GET_TEXSOURCE(2);
+	 tc2 = (GLuint (*)[4])VB->TexCoordPtr[t2]->data;
+	 tc2_stride = VB->TexCoordPtr[t2]->stride;
+	 if (DO_PTEX && VB->TexCoordPtr[t2]->size < 4) {
+	    if (VB->TexCoordPtr[t2]->flags & VEC_NOT_WRITEABLE) {
+	       VB->import_data( ctx, VERT_BIT_TEX2, VEC_NOT_WRITEABLE );
+	    }
+	    _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
 	 }
-	 _mesa_vector4f_clean_elem( VB->TexCoordPtr[t2], VB->Count, 3 );
+      } else {
+	 tc2 = (GLuint (*)[4])&ctx->Current.Attrib[VERT_ATTRIB_TEX2]; /* could be anything, really */
+	 tc2_stride = 0;
       }
    }
 
@@ -263,8 +268,10 @@
 	 if (DO_TEX2) {
 	    v[0].ui = tc2[0][0];
 	    v[1].ui = tc2[0][1];
+	    if (TCL_DEBUG) fprintf(stderr, "t2: %.2f %.2f ", v[0].f, v[1].f);
 	    if (DO_PTEX) {
 	       v[2].ui = tc2[0][3];
+	       if (TCL_DEBUG) fprintf(stderr, "%.2f ", v[2].f);
 	       v += 3;
 	    } 
 	    else
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c	Fri May  2 13:01:53 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_maos_verts.c	Wed Jun 25 00:17:17 2003
@@ -51,7 +51,7 @@
 #include "radeon_maos.h"
 
 
-#define RADEON_TCL_MAX_SETUP 13
+#define RADEON_TCL_MAX_SETUP 19
 
 union emit_union { float f; GLuint ui; radeon_color_t specular; };
 
@@ -67,10 +67,10 @@
 #define DO_FOG  (IND & RADEON_CP_VC_FRMT_PKSPEC)
 #define DO_TEX0 (IND & RADEON_CP_VC_FRMT_ST0)
 #define DO_TEX1 (IND & RADEON_CP_VC_FRMT_ST1)
+#define DO_TEX2 (IND & RADEON_CP_VC_FRMT_ST2)
 #define DO_PTEX (IND & RADEON_CP_VC_FRMT_Q0)
 #define DO_NORM (IND & RADEON_CP_VC_FRMT_N0)
 
-#define DO_TEX2 0
 #define DO_TEX3 0
 
 #define GET_TEXSOURCE(n)  n
@@ -170,7 +170,7 @@
 	     RADEON_CP_VC_FRMT_ST0|		\
 	     RADEON_CP_VC_FRMT_ST1|		\
 	     RADEON_CP_VC_FRMT_N0)
-#define TAG(x) x##_rgpa_spec_st_st_n
+#define TAG(x) x##_rgba_spec_st_st_n
 #include "radeon_maos_vbtmp.h"
 
 #define IDX 10
@@ -204,7 +204,82 @@
 	     RADEON_CP_VC_FRMT_ST1|		\
 	     RADEON_CP_VC_FRMT_Q1|		\
 	     RADEON_CP_VC_FRMT_N0)
-#define TAG(x) x##_w_rgpa_spec_stq_stq_n
+#define TAG(x) x##_w_rgba_spec_stq_stq_n
+#include "radeon_maos_vbtmp.h"
+
+/* FIXME: someone should check if the ordering is right or */
+/* if we can eliminate some of them or */
+/* if it could help some programs to have some more */
+#define IDX 13
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_st_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 14
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2)
+#define TAG(x) x##_rgba_spec_st_st_st
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 15
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 16
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_rgba_spec_st_st_st_n
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 17
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_Q2)
+#define TAG(x) x##_rgba_stq_stq_stq
+#include "radeon_maos_vbtmp.h"
+
+#define IDX 18
+#define IND (RADEON_CP_VC_FRMT_XY|		\
+	     RADEON_CP_VC_FRMT_Z|		\
+	     RADEON_CP_VC_FRMT_W0|		\
+	     RADEON_CP_VC_FRMT_PKCOLOR|		\
+	     RADEON_CP_VC_FRMT_PKSPEC|		\
+	     RADEON_CP_VC_FRMT_ST0|		\
+	     RADEON_CP_VC_FRMT_Q0|		\
+	     RADEON_CP_VC_FRMT_ST1|		\
+	     RADEON_CP_VC_FRMT_Q1|		\
+	     RADEON_CP_VC_FRMT_ST2|		\
+	     RADEON_CP_VC_FRMT_Q2|		\
+	     RADEON_CP_VC_FRMT_N0)
+#define TAG(x) x##_w_rgba_spec_stq_stq_stq_n
 #include "radeon_maos_vbtmp.h"
 
 
@@ -227,10 +302,16 @@
    init_rgba_st_n();
    init_rgba_spec_st_st();
    init_st_st_n();
-   init_rgpa_spec_st_st_n();
+   init_rgba_spec_st_st_n();
    init_rgba_stq();
    init_rgba_stq_stq();
-   init_w_rgpa_spec_stq_stq_n();
+   init_w_rgba_spec_stq_stq_n();
+   init_rgba_st_st_st();
+   init_rgba_spec_st_st_st();
+   init_st_st_st_n();
+   init_rgba_spec_st_st_st_n();
+   init_rgba_stq_stq_stq();
+   init_w_rgba_spec_stq_stq_stq_n();
 }
 
 
@@ -240,7 +321,7 @@
    struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
    GLuint req = 0;
    GLuint vtx = (rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &
-		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1));
+		 ~(RADEON_TCL_VTX_Q0|RADEON_TCL_VTX_Q1|RADEON_TCL_VTX_Q2));
    int i;
    static int firsttime = 1;
 
@@ -283,6 +364,15 @@
       if (VB->TexCoordPtr[1]->size == 4) {
 	 req |= RADEON_CP_VC_FRMT_Q1;
 	 vtx |= RADEON_TCL_VTX_Q1;
+      }
+   }
+
+   if (inputs & VERT_BIT_TEX2) {
+      req |= RADEON_CP_VC_FRMT_ST2;
+
+      if (VB->TexCoordPtr[2]->size == 4) {
+	 req |= RADEON_CP_VC_FRMT_Q2;
+	 vtx |= RADEON_TCL_VTX_Q2;
       }
    }
 
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c	Wed Jun 11 00:06:17 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_sanity.c	Thu Jun 26 18:31:49 2003
@@ -136,7 +136,7 @@
 	{ 0, 5, "R200_PP_CUBIC_OFFSET_F1_5" },
    { RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0" },
    { RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },
-   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_1" },
+   { RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },
 };
 
 struct reg_names {
@@ -177,22 +177,22 @@
    { RADEON_PP_TXFILTER_2, "RADEON_PP_TXFILTER_2" },
    { RADEON_PP_TXFORMAT_0, "RADEON_PP_TXFORMAT_0" },
    { RADEON_PP_TXFORMAT_1, "RADEON_PP_TXFORMAT_1" },
-   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_3" },
+   { RADEON_PP_TXFORMAT_2, "RADEON_PP_TXFORMAT_2" },
    { RADEON_PP_TXOFFSET_0, "RADEON_PP_TXOFFSET_0" },
    { RADEON_PP_TXOFFSET_1, "RADEON_PP_TXOFFSET_1" },
-   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_3" },
+   { RADEON_PP_TXOFFSET_2, "RADEON_PP_TXOFFSET_2" },
    { RADEON_PP_TXCBLEND_0, "RADEON_PP_TXCBLEND_0" },
    { RADEON_PP_TXCBLEND_1, "RADEON_PP_TXCBLEND_1" },
-   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_3" },
+   { RADEON_PP_TXCBLEND_2, "RADEON_PP_TXCBLEND_2" },
    { RADEON_PP_TXABLEND_0, "RADEON_PP_TXABLEND_0" },
    { RADEON_PP_TXABLEND_1, "RADEON_PP_TXABLEND_1" },
-   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_3" },
+   { RADEON_PP_TXABLEND_2, "RADEON_PP_TXABLEND_2" },
    { RADEON_PP_TFACTOR_0, "RADEON_PP_TFACTOR_0" },
    { RADEON_PP_TFACTOR_1, "RADEON_PP_TFACTOR_1" },
-   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_3" },
+   { RADEON_PP_TFACTOR_2, "RADEON_PP_TFACTOR_2" },
    { RADEON_PP_BORDER_COLOR_0, "RADEON_PP_BORDER_COLOR_0" },
    { RADEON_PP_BORDER_COLOR_1, "RADEON_PP_BORDER_COLOR_1" },
-   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_3" },
+   { RADEON_PP_BORDER_COLOR_2, "RADEON_PP_BORDER_COLOR_2" },
    { RADEON_SE_ZBIAS_FACTOR, "RADEON_SE_ZBIAS_FACTOR" },
    { RADEON_SE_ZBIAS_CONSTANT, "RADEON_SE_ZBIAS_CONSTANT" },
    { RADEON_SE_TCL_OUTPUT_VTX_FMT, "RADEON_SE_TCL_OUTPUT_VTXFMT" },
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c	Tue Jun 10 11:58:47 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state.c	Thu Jun 26 17:45:34 2003
@@ -1959,7 +1959,7 @@
 
    rmesa->TexMatEnabled = 0;
 
-   for (unit = 0 ; unit < 2; unit++) {
+   for (unit = 0 ; unit < ctx->Const.MaxTextureUnits; unit++) {
       if (!ctx->Texture.Unit[unit]._ReallyEnabled) {
       }
       else if (ctx->TextureMatrixStack[unit].Top->type != MATRIX_IDENTITY) {
@@ -1994,7 +1994,8 @@
    tpc = (rmesa->TexMatEnabled | rmesa->TexGenEnabled);
 
    vs &= ~((0xf << RADEON_TCL_TEX_0_OUTPUT_SHIFT) |
-	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT));
+	   (0xf << RADEON_TCL_TEX_1_OUTPUT_SHIFT) |
+	   (0xf << RADEON_TCL_TEX_2_OUTPUT_SHIFT));
 
    if (tpc & RADEON_TEXGEN_TEXMAT_0_ENABLE)
       vs |= RADEON_TCL_TEX_COMPUTED_TEX_0 << RADEON_TCL_TEX_0_OUTPUT_SHIFT;
@@ -2005,6 +2006,11 @@
       vs |= RADEON_TCL_TEX_COMPUTED_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
    else
       vs |= RADEON_TCL_TEX_INPUT_TEX_1 << RADEON_TCL_TEX_1_OUTPUT_SHIFT;
+
+   if (tpc & RADEON_TEXGEN_TEXMAT_2_ENABLE)
+      vs |= RADEON_TCL_TEX_COMPUTED_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT;
+   else
+      vs |= RADEON_TCL_TEX_INPUT_TEX_2 << RADEON_TCL_TEX_2_OUTPUT_SHIFT;
 
    if (tpc != rmesa->hw.tcl.cmd[TCL_TEXTURE_PROC_CTL] ||
        vs != rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXSEL]) {
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c	Wed Jun 11 00:06:18 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_state_init.c	Thu Jun 26 18:37:36 2003
@@ -112,10 +112,15 @@
 CHECK( always, GL_TRUE )
 CHECK( tex0, ctx->Texture.Unit[0]._ReallyEnabled )
 CHECK( tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+CHECK( tex2, ctx->Texture.Unit[2]._ReallyEnabled )
+CHECK( txr0, ctx->Texture.Unit[0]._ReallyEnabled & TEXTURE_RECT_BIT)
+CHECK( txr1, ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)
+CHECK( txr2, ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)
 CHECK( fog, ctx->Fog.Enabled )
 TCL_CHECK( tcl, GL_TRUE )
 TCL_CHECK( tcl_tex0, ctx->Texture.Unit[0]._ReallyEnabled )
 TCL_CHECK( tcl_tex1, ctx->Texture.Unit[1]._ReallyEnabled )
+TCL_CHECK( tcl_tex2, ctx->Texture.Unit[2]._ReallyEnabled )
 TCL_CHECK( tcl_lighting, ctx->Light.Enabled )
 TCL_CHECK( tcl_eyespace_or_lighting, ctx->_NeedEyeCoords || ctx->Light.Enabled )
 TCL_CHECK( tcl_lit0, ctx->Light.Enabled && ctx->Light.Light[0].Enabled )
@@ -134,9 +139,6 @@
 TCL_CHECK( tcl_ucp5, (ctx->Transform.ClipPlanesEnabled & 0x20) )
 TCL_CHECK( tcl_eyespace_or_fog, ctx->_NeedEyeCoords || ctx->Fog.Enabled ) 
 
-CHECK( txr0, ctx->Texture.Unit[0]._ReallyEnabled )
-CHECK( txr1, ctx->Texture.Unit[1]._ReallyEnabled )
-
 
 
 /* Initialize the context's hardware state.
@@ -230,11 +232,16 @@
    ALLOC_STATE( eye, tcl_lighting, EYE_STATE_SIZE, "EYE/eye-vector", 1 );
    ALLOC_STATE( tex[0], tex0, TEX_STATE_SIZE, "TEX/tex-0", 0 );
    ALLOC_STATE( tex[1], tex1, TEX_STATE_SIZE, "TEX/tex-1", 0 );
+   ALLOC_STATE( tex[2], tex2, TEX_STATE_SIZE, "TEX/tex-2", 0 );
+   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
+   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
+   ALLOC_STATE( txr[2], txr2, TXR_STATE_SIZE, "TXR/txr-2", 0 );
    ALLOC_STATE( mat[0], tcl, MAT_STATE_SIZE, "MAT/modelproject", 1 );
    ALLOC_STATE( mat[1], tcl_eyespace_or_fog, MAT_STATE_SIZE, "MAT/modelview", 1 );
    ALLOC_STATE( mat[2], tcl_eyespace_or_lighting, MAT_STATE_SIZE, "MAT/it-modelview", 1 );
    ALLOC_STATE( mat[3], tcl_tex0, MAT_STATE_SIZE, "MAT/texmat0", 1 );
    ALLOC_STATE( mat[4], tcl_tex1, MAT_STATE_SIZE, "MAT/texmat1", 1 );
+   ALLOC_STATE( mat[5], tcl_tex2, MAT_STATE_SIZE, "MAT/texmat2", 1 );
    ALLOC_STATE( ucp[0], tcl_ucp0, UCP_STATE_SIZE, "UCP/userclip-0", 1 );
    ALLOC_STATE( ucp[1], tcl_ucp1, UCP_STATE_SIZE, "UCP/userclip-1", 1 );
    ALLOC_STATE( ucp[2], tcl_ucp2, UCP_STATE_SIZE, "UCP/userclip-2", 1 );
@@ -249,8 +256,6 @@
    ALLOC_STATE( lit[5], tcl_lit5, LIT_STATE_SIZE, "LIT/light-5", 1 );
    ALLOC_STATE( lit[6], tcl_lit6, LIT_STATE_SIZE, "LIT/light-6", 1 );
    ALLOC_STATE( lit[7], tcl_lit7, LIT_STATE_SIZE, "LIT/light-7", 1 );
-   ALLOC_STATE( txr[0], txr0, TXR_STATE_SIZE, "TXR/txr-0", 0 );
-   ALLOC_STATE( txr[1], txr1, TXR_STATE_SIZE, "TXR/txr-1", 0 );
 
 
    /* Fill in the packet headers:
@@ -269,12 +274,15 @@
    rmesa->hw.tex[0].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_0);
    rmesa->hw.tex[1].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_1);
    rmesa->hw.tex[1].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_1);
+   rmesa->hw.tex[2].cmd[TEX_CMD_0] = cmdpkt(RADEON_EMIT_PP_TXFILTER_2);
+   rmesa->hw.tex[2].cmd[TEX_CMD_1] = cmdpkt(RADEON_EMIT_PP_BORDER_COLOR_2);
+   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
+   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
+   rmesa->hw.txr[2].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_2);
    rmesa->hw.zbs.cmd[ZBS_CMD_0] = cmdpkt(RADEON_EMIT_SE_ZBIAS_FACTOR);
    rmesa->hw.tcl.cmd[TCL_CMD_0] = cmdpkt(RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT);
    rmesa->hw.mtl.cmd[MTL_CMD_0] = 
       cmdpkt(RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED);
-   rmesa->hw.txr[0].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_0);
-   rmesa->hw.txr[1].cmd[TXR_CMD_0] = cmdpkt(RADEON_EMIT_PP_TEX_SIZE_1);
    rmesa->hw.grd.cmd[GRD_CMD_0] = 
       cmdscl( RADEON_SS_VERT_GUARD_CLIP_ADJ_ADDR, 1, 4 );
    rmesa->hw.fog.cmd[FOG_CMD_0] = 
@@ -284,7 +292,7 @@
    rmesa->hw.eye.cmd[EYE_CMD_0] = 
       cmdvec( RADEON_VS_EYE_VECTOR_ADDR, 1, 4 );
 
-   for (i = 0 ; i < 5; i++) {
+   for (i = 0 ; i < 6; i++) {
       rmesa->hw.mat[i].cmd[MAT_CMD_0] = 
 	 cmdvec( RADEON_VS_MATRIX_0_ADDR + i*4, 1, 16);
    }
@@ -466,7 +474,8 @@
    rmesa->hw.tcl.cmd[TCL_MATRIX_SELECT_1] = 
       ((MODEL_PROJ << RADEON_MODELPROJECT_0_SHIFT) |
        (TEXMAT_0 << RADEON_TEXMAT_0_SHIFT) |
-       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT));
+       (TEXMAT_1 << RADEON_TEXMAT_1_SHIFT) |
+       (TEXMAT_2 << RADEON_TEXMAT_2_SHIFT));
 
    rmesa->hw.tcl.cmd[TCL_UCP_VERT_BLEND_CTL] = 
       (RADEON_UCP_IN_CLIP_SPACE |
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c	Wed Jun 11 00:06:19 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_swtcl.c	Thu Jun 26 17:52:00 2003
@@ -63,8 +63,9 @@
 #define RADEON_SPEC_BIT		0x04
 #define RADEON_TEX0_BIT		0x08
 #define RADEON_TEX1_BIT		0x10
-#define RADEON_PTEX_BIT		0x20
-#define RADEON_MAX_SETUP	0x40
+#define RADEON_TEX2_BIT		0x20
+#define RADEON_PTEX_BIT		0x40
+#define RADEON_MAX_SETUP	0x80
 
 static void flush_last_swtcl_prim( radeonContextPtr rmesa  );
 static void flush_last_swtcl_prim_compat( radeonContextPtr rmesa );
@@ -115,9 +116,30 @@
 					 RADEON_CP_VC_FRMT_ST1 |	\
 					 RADEON_CP_VC_FRMT_Q1)
 
-#define TEX2_VERTEX_FORMAT 0
+#define TEX2_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_ST1 |	\
+					 RADEON_CP_VC_FRMT_ST2)
+
+/* question: there isnt a PROJ_TEX2_VERTEX_FORMAT in the template */
+/*           is it good to use the PROJ_TEX3_VERTEX_FORMAT instead ? */
+#define PROJ_TEX3_VERTEX_FORMAT	        (RADEON_CP_VC_FRMT_XY |		\
+					 RADEON_CP_VC_FRMT_Z |		\
+					 RADEON_CP_VC_FRMT_W0 |		\
+					 RADEON_CP_VC_FRMT_PKCOLOR |	\
+					 RADEON_CP_VC_FRMT_PKSPEC |	\
+					 RADEON_CP_VC_FRMT_ST0 |	\
+					 RADEON_CP_VC_FRMT_Q0 |         \
+					 RADEON_CP_VC_FRMT_ST1 |	\
+					 RADEON_CP_VC_FRMT_Q1 |         \
+					 RADEON_CP_VC_FRMT_ST2 |	\
+					 RADEON_CP_VC_FRMT_Q2)
+
 #define TEX3_VERTEX_FORMAT 0
-#define PROJ_TEX3_VERTEX_FORMAT 0
 
 #define DO_XYZW (IND & RADEON_XYZW_BIT)
 #define DO_RGBA (IND & RADEON_RGBA_BIT)
@@ -125,7 +147,7 @@
 #define DO_FOG  (IND & RADEON_SPEC_BIT)
 #define DO_TEX0 (IND & RADEON_TEX0_BIT)
 #define DO_TEX1 (IND & RADEON_TEX1_BIT)
-#define DO_TEX2 0
+#define DO_TEX2 (IND & RADEON_TEX2_BIT)
 #define DO_TEX3 0
 #define DO_PTEX (IND & RADEON_PTEX_BIT)
 
@@ -148,7 +170,7 @@
 #define HAVE_NOTEX_VERTICES 1
 #define HAVE_TEX0_VERTICES  1
 #define HAVE_TEX1_VERTICES  1
-#define HAVE_TEX2_VERTICES  0
+#define HAVE_TEX2_VERTICES  1
 #define HAVE_TEX3_VERTICES  0
 #define HAVE_PTEX_VERTICES  1
 
@@ -194,10 +216,20 @@
 #include "tnl_dd/t_dd_vbtmp.h"
 
 #define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
+             RADEON_TEX2_BIT)
+#define TAG(x) x##_wgt0t1t2
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
              RADEON_PTEX_BIT)
 #define TAG(x) x##_wgpt0t1
 #include "tnl_dd/t_dd_vbtmp.h"
 
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_TEX0_BIT|RADEON_TEX1_BIT|\
+             RADEON_TEX2_BIT|RADEON_PTEX_BIT)
+#define TAG(x) x##_wgpt0t1t2
+#include "tnl_dd/t_dd_vbtmp.h"
+
 #define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT)
 #define TAG(x) x##_wgfs
 #include "tnl_dd/t_dd_vbtmp.h"
@@ -218,10 +250,21 @@
 #include "tnl_dd/t_dd_vbtmp.h"
 
 #define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_TEX2_BIT)
+#define TAG(x) x##_wgfst0t1t2
+#include "tnl_dd/t_dd_vbtmp.h"
+
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
 	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_PTEX_BIT)
 #define TAG(x) x##_wgfspt0t1
 #include "tnl_dd/t_dd_vbtmp.h"
 
+#define IND (RADEON_XYZW_BIT|RADEON_RGBA_BIT|RADEON_SPEC_BIT|\
+	     RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_TEX2_BIT|\
+	     RADEON_PTEX_BIT)
+#define TAG(x) x##_wgfspt0t1t2
+#include "tnl_dd/t_dd_vbtmp.h"
+
 
 /***********************************************************************
  *                         Initialization 
@@ -233,19 +276,23 @@
    init_wgt0();
    init_wgpt0();
    init_wgt0t1();
+   init_wgt0t1t2();
    init_wgpt0t1();
+   init_wgpt0t1t2();
    init_wgfs();
    init_wgfst0();
    init_wgfspt0();
    init_wgfst0t1();
+   init_wgfst0t1t2();
    init_wgfspt0t1();
+   init_wgfspt0t1t2();
 }
 
 
 
 void radeonPrintSetupFlags(char *msg, GLuint flags )
 {
-   fprintf(stderr, "%s(%x): %s%s%s%s%s%s\n",
+   fprintf(stderr, "%s(%x): %s%s%s%s%s%s%s\n",
 	   msg,
 	   (int)flags,
 	   (flags & RADEON_XYZW_BIT)      ? " xyzw," : "",
@@ -253,6 +300,7 @@
 	   (flags & RADEON_SPEC_BIT)     ? " spec/fog," : "",
 	   (flags & RADEON_TEX0_BIT)     ? " tex-0," : "",
 	   (flags & RADEON_TEX1_BIT)     ? " tex-1," : "",
+	   (flags & RADEON_TEX2_BIT)     ? " tex-2," : "",
 	   (flags & RADEON_PTEX_BIT)     ? " proj-tex," : "");
 }
 
@@ -317,7 +365,10 @@
    if (ctx->Fog.Enabled || (ctx->_TriangleCaps & DD_SEPARATE_SPECULAR))
       ind |= RADEON_SPEC_BIT;
 
-   if (ctx->Texture._EnabledUnits & 0x2)
+   if (ctx->Texture._EnabledUnits & 0x4)
+      /* unit 2 enabled */
+      ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT|RADEON_TEX2_BIT;
+   else if (ctx->Texture._EnabledUnits & 0x2)
       /* unit 1 enabled */
       ind |= RADEON_TEX0_BIT|RADEON_TEX1_BIT;
    else if (ctx->Texture._EnabledUnits & 0x1)
@@ -737,6 +788,9 @@
       if (ctx->Texture.Unit[1]._ReallyEnabled)
 	 inputs |= VERT_BIT_TEX1;
 
+      if (ctx->Texture.Unit[2]._ReallyEnabled)
+	 inputs |= VERT_BIT_TEX2;
+
       if (ctx->Fog.Enabled)
 	 inputs |= VERT_BIT_FOG;
    }
@@ -857,6 +911,9 @@
 
    if (ctx->Texture.Unit[1]._ReallyEnabled & TEXTURE_RECT_BIT)
       flags |= VERT_BIT_TEX1;
+
+   if (ctx->Texture.Unit[2]._ReallyEnabled & TEXTURE_RECT_BIT)
+      flags |= VERT_BIT_TEX2;
 
    stage->inputs = flags;
    stage->outputs = flags;
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c	Fri May  2 13:01:54 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.c	Thu Jun 19 14:02:42 2003
@@ -385,6 +385,16 @@
 	 }
       }
 
+      if (ctx->Texture.Unit[2]._ReallyEnabled) {
+	 if (ctx->Texture.Unit[2].TexGenEnabled) {
+	    if (rmesa->TexGenNeedNormals[2]) {
+	       inputs |= VERT_BIT_NORMAL;
+	    }
+	 } else {
+	    inputs |= VERT_BIT_TEX2;
+	 }
+      }
+
       stage->inputs = inputs;
       stage->active = 1;
    }
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h	Wed Jun 11 00:06:19 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_tcl.h	Sun Jun 22 22:05:04 2003
@@ -61,7 +61,7 @@
 #define RADEON_TCL_FALLBACK_TEXRECT_1         0x200 /* texture rectangle */
 #define RADEON_TCL_FALLBACK_TEXRECT_2         0x400 /* texture rectangle */
 
-#define RADEON_MAX_TCL_VERTSIZE (15*4)
+#define RADEON_MAX_TCL_VERTSIZE (RADEON_MAX_VERTEX_SIZE*4) /* FIXME: is this assumption true? */
 
 #define TCL_FALLBACK( ctx, bit, mode )	radeonTclFallback( ctx, bit, mode )
 
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c	Wed Jun 11 00:06:20 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_texstate.c	Sun Jun 22 01:10:05 2003
@@ -1419,11 +1419,15 @@
       case 0:
 	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST0 |
 						   RADEON_TCL_VTX_Q0);
-	    break;
+	 break;
       case 1:
 	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST1 |
 						   RADEON_TCL_VTX_Q1);
 	 break;
+      case 2:
+	 rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] &= ~(RADEON_TCL_VTX_ST2 |
+						   RADEON_TCL_VTX_Q2);
+	 break;
       default:
 	 break;
       }
@@ -1553,10 +1557,19 @@
 
       RADEON_STATECHANGE( rmesa, tcl );
 
-      if (unit == 0)
+      switch( unit) {
+      case 0:
 	  rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST0;
-      else 
+	  break;
+      case 1:
 	  rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST1;
+	  break;
+      case 2:
+	  rmesa->hw.tcl.cmd[TCL_OUTPUT_VTXFMT] |= RADEON_TCL_VTX_ST2;
+	  break;
+      default:
+	  break;
+      }
 
       rmesa->recheck_texgen[unit] = GL_TRUE;
    }
@@ -1619,7 +1632,8 @@
    GLboolean ok;
 
    ok = (radeonUpdateTextureUnit( ctx, 0 ) &&
-	 radeonUpdateTextureUnit( ctx, 1 ));
+	 radeonUpdateTextureUnit( ctx, 1 ) &&
+	 radeonUpdateTextureUnit( ctx, 2 ));
 
    FALLBACK( rmesa, RADEON_FALLBACK_TEXTURE, !ok );
 
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c	Fri May  2 13:01:56 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt.c	Thu Jun 26 17:55:47 2003
@@ -146,6 +146,13 @@
       ctx->Current.Attrib[VERT_ATTRIB_TEX1][3] = 1.0F;
    }
 
+   if (rmesa->vb.vertex_format & RADEON_CP_VC_FRMT_ST2) {
+      ctx->Current.Attrib[VERT_ATTRIB_TEX2][0] = rmesa->vb.texcoordptr[2][0];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX2][1] = rmesa->vb.texcoordptr[2][1];
+      ctx->Current.Attrib[VERT_ATTRIB_TEX2][2] = 0.0F;
+      ctx->Current.Attrib[VERT_ATTRIB_TEX2][3] = 1.0F;
+   }
+
    ctx->Driver.NeedFlush &= ~FLUSH_UPDATE_CURRENT;
 }
 
@@ -262,7 +269,7 @@
  * memory.  Could also use the counter/notify mechanism to populate
  * tmp on the fly as vertices are generated.  
  */
-static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[15] )
+static GLuint copy_dma_verts( radeonContextPtr rmesa, GLfloat (*tmp)[RADEON_MAX_VERTEX_SIZE] )
 {
    GLuint ovf, i;
    GLuint nr = (rmesa->vb.initial_counter - rmesa->vb.counter) - rmesa->vb.primlist[rmesa->vb.nrprims].start;
@@ -354,7 +361,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat tmp[3][15];
+   GLfloat tmp[3][RADEON_MAX_VERTEX_SIZE];
    GLuint i, prim;
    GLuint ind = rmesa->vb.vertex_format;
    GLuint nrverts;
@@ -430,6 +437,11 @@
 	 glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, &tmp[i][offset] );
 	 offset += 2;
       }
+
+      if (ind & RADEON_CP_VC_FRMT_ST2) {
+	 glMultiTexCoord2fvARB( GL_TEXTURE2_ARB, &tmp[i][offset] );
+	 offset += 2;
+      }
       glVertex3fv( &tmp[i][0] );
    }
 
@@ -460,6 +472,9 @@
 
    if (ind & RADEON_CP_VC_FRMT_ST1) 
       glMultiTexCoord2fvARB( GL_TEXTURE1_ARB, rmesa->vb.texcoordptr[1] );
+
+   if (ind & RADEON_CP_VC_FRMT_ST2) 
+      glMultiTexCoord2fvARB( GL_TEXTURE2_ARB, rmesa->vb.texcoordptr[2] );
 }
 
 
@@ -468,7 +483,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat tmp[3][15];
+   GLfloat tmp[3][RADEON_MAX_VERTEX_SIZE];
    GLuint i, nrverts;
 
    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_PRIMS))
@@ -611,6 +626,22 @@
       }
    }
 
+   if (ctx->Texture.Unit[2]._ReallyEnabled) {
+      if (ctx->Texture.Unit[2].TexGenEnabled) {
+	 if (rmesa->TexGenNeedNormals[2]) {
+	    ind |= RADEON_CP_VC_FRMT_N0;
+	 }
+      } else {
+	 if (ctx->Current.Attrib[VERT_ATTRIB_TEX2][2] != 0.0F ||
+	     ctx->Current.Attrib[VERT_ATTRIB_TEX2][3] != 1.0) {
+	    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_FALLBACKS))
+	       fprintf(stderr, "%s: rq2\n", __FUNCTION__);
+	    return GL_FALSE;
+	 }
+	 ind |= RADEON_CP_VC_FRMT_ST2;
+      }
+   }
+
    if (RADEON_DEBUG & (DEBUG_VFMT|DEBUG_STATE))
       fprintf(stderr, "%s: format: 0x%x\n", __FUNCTION__, ind );
 
@@ -626,6 +657,11 @@
    rmesa->vb.floatspecptr = ctx->Current.Attrib[VERT_ATTRIB_COLOR1];
    rmesa->vb.texcoordptr[0] = ctx->Current.Attrib[VERT_ATTRIB_TEX0];
    rmesa->vb.texcoordptr[1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1];
+   rmesa->vb.texcoordptr[2] = ctx->Current.Attrib[VERT_ATTRIB_TEX2];
+   rmesa->vb.texcoordptr[3] = ctx->Current.Attrib[VERT_ATTRIB_TEX0]; /* dummy to prevent segfault when someone */
+   								     /* specifies GL_TEXTURE3 */
+   								     /* question: could we use VERT_ATTRIB_TEX3, too without */
+   								     /* the risk of "broken" vtxfmt and codegen path ? */
 
    /* Run through and initialize the vertex components in the order
     * the hardware understands:
@@ -681,6 +717,13 @@
       rmesa->vb.vertex_size += 2;
       rmesa->vb.texcoordptr[1][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][0];
       rmesa->vb.texcoordptr[1][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX1][1];
+   } 
+
+   if (ind & RADEON_CP_VC_FRMT_ST2) {
+      rmesa->vb.texcoordptr[2] = &rmesa->vb.vertex[rmesa->vb.vertex_size].f;
+      rmesa->vb.vertex_size += 2;
+      rmesa->vb.texcoordptr[2][0] = ctx->Current.Attrib[VERT_ATTRIB_TEX2][0];
+      rmesa->vb.texcoordptr[2][1] = ctx->Current.Attrib[VERT_ATTRIB_TEX2][1];
    } 
 
    if (rmesa->vb.installed_vertex_format != rmesa->vb.vertex_format) {
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c	Fri May  2 13:01:56 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_c.c	Thu Jun 26 17:57:22 2003
@@ -548,12 +548,16 @@
  * with 0x1F.  Masking with 0x1F and then masking with 0x01 is redundant, so
  * the subtraction has been omitted.
  */
+/* question: should we continue using this and make the texcoordptr 4 elements
+ *           or should we mask and verify that the index doesn't get bigger than 2 ?
+ *           We have this issue in the codegen stuff, too
+ */
 
 static void radeon_MultiTexCoord1fARB( GLenum target, GLfloat s  )
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 3];
    dest[0] = s;
    dest[1] = 0;
 }
@@ -562,7 +566,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 3];
    dest[0] = v[0];
    dest[1] = 0;
 }
@@ -571,7 +575,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 3];
    dest[0] = s;
    dest[1] = t;
 }
@@ -580,7 +584,7 @@
 {
    GET_CURRENT_CONTEXT(ctx);
    radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
-   GLfloat *dest = rmesa->vb.texcoordptr[target & 1];
+   GLfloat *dest = rmesa->vb.texcoordptr[target & 3];
    dest[0] = v[0];
    dest[1] = v[1];
 }
@@ -736,17 +740,20 @@
 
 #define ACTIVE_ST0 RADEON_CP_VC_FRMT_ST0
 #define ACTIVE_ST1 RADEON_CP_VC_FRMT_ST1
-#define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST0)
+#define ACTIVE_ST2 RADEON_CP_VC_FRMT_ST2
+#define ACTIVE_ST_ALL (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2)
 
 /* Each codegen function should be able to be fully specified by a
  * subsetted version of rmesa->vb.vertex_format.
  */
+ /* question: this is strange... could someone explain ? */
 #define MASK_NORM    (ACTIVE_XYZW)
 #define MASK_COLOR   (MASK_NORM|ACTIVE_NORM)
 #define MASK_SPEC    (MASK_COLOR|ACTIVE_COLOR)
 #define MASK_ST0     (MASK_SPEC|ACTIVE_SPEC)
 #define MASK_ST1     (MASK_ST0|ACTIVE_ST0)
-#define MASK_ST_ALL  (MASK_ST1|ACTIVE_ST1)
+#define MASK_ST2     (MASK_ST1|ACTIVE_ST1)
+#define MASK_ST_ALL  (MASK_ST2|ACTIVE_ST2)
 #define MASK_VERTEX  (MASK_ST_ALL|ACTIVE_FPALPHA) 
 
 
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c	Fri May  2 13:01:56 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_sse.c	Thu Jun 26 18:58:10 2003
@@ -178,13 +178,21 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
 
-   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
-      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
-      DFN ( _sse_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 18, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);	
-   } else {
-      DFN ( _sse_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+/* question: should we just let the case for RADEON_CP_VC_FRMT_ST0 for the */
+/* default path? Maybe some programs just use glMultiTexCoord2f(v)ARB for */
+/* every TMU, as GL_TEXTURE0_ARB is also allowed */
+   switch (key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2))
+   {
+      case RADEON_CP_VC_FRMT_ST0:
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1):
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2):
+         DFN ( _sse_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+         FIXUP(dfn->code, 18, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
+         break;
+      default:
+         DFN ( _sse_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+         FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+         break;
    }
    return dfn;
 }
@@ -197,13 +205,18 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
 
-   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
-      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
-      DFN ( _sse_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 16, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);	
-   } else {
-      DFN ( _sse_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 15, 0x0, (int)rmesa->vb.texcoordptr);
+   switch (key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2))
+   {
+      case RADEON_CP_VC_FRMT_ST0:
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1):
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2):
+         DFN ( _sse_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+         FIXUP(dfn->code, 16, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);	
+         break;
+      default:
+         DFN ( _sse_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+         FIXUP(dfn->code, 15, 0x0, (int)rmesa->vb.texcoordptr);
+         break;
    }
    return dfn;
 }
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c	Fri May  2 13:01:56 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxfmt_x86.c	Thu Jun 26 18:11:45 2003
@@ -75,7 +75,7 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, rmesa->vb.vertex_size );
 
-   switch (rmesa->vb.vertex_size) {
+   switch (rmesa->vb.vertex_size) {		/* FIXME: do we need something here for 3rd TMU? */
    case 4: {
 
       DFN ( _x86_Vertex3f_4, rmesa->vb.dfn_cache.Vertex3f );
@@ -127,7 +127,7 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x %d\n", __FUNCTION__, key, rmesa->vb.vertex_size );
 
-   switch (rmesa->vb.vertex_size) {
+   switch (rmesa->vb.vertex_size) {	/* FIXME: do we need something here for 3rd TMU? */
    case 6: {
 
       DFN ( _x86_Vertex3fv_6, rmesa->vb.dfn_cache.Vertex3fv );
@@ -359,14 +359,19 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
 
-   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
-      (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
-      DFN ( _x86_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 21, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
-      FIXUP(dfn->code, 27, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4);
-   } else {
-      DFN ( _x86_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
-      FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+   switch (key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2))
+   {
+      case RADEON_CP_VC_FRMT_ST0:
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1):
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2):
+         DFN ( _x86_MultiTexCoord2fv, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+         FIXUP(dfn->code, 21, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
+         FIXUP(dfn->code, 27, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4);
+         break;
+      default:
+         DFN ( _x86_MultiTexCoord2fv_2, rmesa->vb.dfn_cache.MultiTexCoord2fvARB );
+         FIXUP(dfn->code, 14, 0x0, (int)rmesa->vb.texcoordptr);
+         break;
    }
    return dfn;
 }
@@ -380,18 +385,22 @@
    if (RADEON_DEBUG & DEBUG_CODEGEN)
       fprintf(stderr, "%s 0x%08x\n", __FUNCTION__, key );
 
-   if ((key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) ==
-       (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1)) {
-      DFN ( _x86_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 20, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
-      FIXUP(dfn->code, 26, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4); 
-   }
-   else {
+   switch (key & (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2))
+   {
+      case RADEON_CP_VC_FRMT_ST0:
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1):
+      case (RADEON_CP_VC_FRMT_ST0|RADEON_CP_VC_FRMT_ST1|RADEON_CP_VC_FRMT_ST2):
+         DFN ( _x86_MultiTexCoord2f, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+         FIXUP(dfn->code, 20, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]);
+         FIXUP(dfn->code, 26, 0xdeadbeef, (int)rmesa->vb.texcoordptr[0]+4); 
+         break;
+      default:
       /* Note: this might get generated multiple times, even though the
        * actual emitted code is the same.
        */
-      DFN ( _x86_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
-      FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.texcoordptr); 
+         DFN ( _x86_MultiTexCoord2f_2, rmesa->vb.dfn_cache.MultiTexCoord2fARB );
+         FIXUP(dfn->code, 18, 0x0, (int)rmesa->vb.texcoordptr); 
+         break;
    }      
    return dfn;
 }
diff -ru trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S
--- trunk_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S	Fri May  2 13:01:57 2003
+++ tex3_20030619/xc/xc/lib/GL/mesa/src/drv/radeon/radeon_vtxtmp_x86.S	Thu Jun 26 18:15:35 2003
@@ -333,12 +333,12 @@
 
 
 /*
-	MultiTexCoord2fv st0/st1
+	MultiTexCoord2fv st0 / st0,1 / st0,1,2
 */
 GLOBL( _x86_MultiTexCoord2fv )
 	movl	4(%esp), %eax
 	movl	8(%esp), %ecx
-	and	$1, %eax
+	and	$3, %eax
 	movl	(%ecx), %edx
 	shl	$3, %eax
 	movl	4(%ecx), %ecx
@@ -354,7 +354,7 @@
 GLOBL( _x86_MultiTexCoord2fv_2 )
 	movl	4(%esp,1), %eax
 	movl	8(%esp,1), %ecx
-	and	$0x1, %eax
+	and	$0x3, %eax
 	movl	0(,%eax,4), %edx
 	movl	(%ecx), %eax
 	movl	%eax, (%edx)
@@ -364,13 +364,13 @@
 GLOBL( _x86_MultiTexCoord2fv_2_end )
 
 /*
-	MultiTexCoord2f st0/st1
+	MultiTexCoord2f  st0 / st0,1 / st0,1,2
 */
 GLOBL( _x86_MultiTexCoord2f )
 	movl	4(%esp), %eax
 	movl	8(%esp), %edx
 	movl	12(%esp), %ecx
-	and	$1, %eax
+	and	$3, %eax
 	shl	$3, %eax
 	movl	%edx, 0xdeadbeef(%eax)
 	movl	%ecx, 0xdeadbeef(%eax)
@@ -384,7 +384,7 @@
 	movl	4(%esp), %eax
 	movl	8(%esp), %edx
 	movl	12(%esp,1), %ecx
-	and	$1,%eax
+	and	$3,%eax
 	movl	0(,%eax,4), %eax
 	movl	%edx, (%eax)
 	movl	%ecx, 4(%eax)
@@ -444,12 +444,12 @@
 GLOBL( _sse_Attribute2f_end )
 
 /*
-	MultiTexCoord2fv st0/st1
+	MultiTexCoord2fv st0 / st0,1 / st0,1,2
 */
 GLOBL( _sse_MultiTexCoord2fv )
 	movl	4(%esp), %eax
 	movl	8(%esp), %ecx
-	and	$1, %eax
+	and	$3, %eax
 	movlps	(%ecx), %xmm0
 	movlps	%xmm0, 0xdeadbeef(,%eax,8)
 	ret
@@ -461,7 +461,7 @@
 GLOBL( _sse_MultiTexCoord2fv_2 )
 	movl	4(%esp), %eax
 	movl	8(%esp), %ecx
-	and	$0x1, %eax
+	and	$0x3, %eax
 	movl	0(,%eax,4), %edx
 	movlps	(%ecx), %xmm0
 	movlps	%xmm0, (%edx)
@@ -469,11 +469,11 @@
 GLOBL( _sse_MultiTexCoord2fv_2_end )
 
 /*
-	MultiTexCoord2f st0/st1
+	MultiTexCoord2f st0 / st0,1 / st0,1,2
 */
 GLOBL( _sse_MultiTexCoord2f )
 	movl	4(%esp), %eax
-	and	$1, %eax
+	and	$3, %eax
 	movlps	8(%esp), %xmm0
 	movlps	%xmm0, 0xdeadbeef(,%eax,8)
 	ret
@@ -485,7 +485,7 @@
 GLOBL( _sse_MultiTexCoord2f_2 )
 	movl	4(%esp), %eax
 	movlps	8(%esp), %xmm0
-	and	$1,%eax
+	and	$3,%eax
 	movl	0(,%eax,4), %eax
 	movlps	%xmm0, (%eax)
 	ret

Reply via email to