I've rewritten and split the patches.

0001-r300-Print-reg-address-when-debugging-is-enabled.patch
0002-r300-silence-valgrind.patch
0003-r300-add-few-macros-for-RS-setup.patch
0004-r300-rewrite-and-hopefully-simplify-RS-setup.patch
0005-r300-route-fog-coord-and-W-pos-correctly.patch
0006-r300-enable-EXT_fog_coord-extension.patch

One thing doesn't work for me on rs690 (it didn't work before): fragment depth 
write support.

Any test reports and comments are appreciated.

Maciej
From aae627860f6543c4af48819b9fb0a45b494a65b3 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Wed, 4 Feb 2009 13:37:57 +0100
Subject: [PATCH] r300: Print reg address when debugging is enabled

---
 src/mesa/drivers/dri/r300/r300_cmdbuf.c |   10 ++++++----
 1 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_cmdbuf.c b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
index c9e1dfe..12b34fd 100644
--- a/src/mesa/drivers/dri/r300/r300_cmdbuf.c
+++ b/src/mesa/drivers/dri/r300/r300_cmdbuf.c
@@ -130,16 +130,18 @@ int r300FlushCmdBuf(r300ContextPtr r300, const char *caller)
 
 static void r300PrintStateAtom(r300ContextPtr r300, struct r300_state_atom *state)
 {
-	int i;
+	int i, reg;
 	int dwords = (*state->check) (r300, state);
 
 	fprintf(stderr, "  emit %s %d/%d\n", state->name, dwords,
 		state->cmd_size);
 
 	if (RADEON_DEBUG & DEBUG_VERBOSE) {
-		for (i = 0; i < dwords; i++) {
-			fprintf(stderr, "      %s[%d]: %08x\n",
-				state->name, i, state->cmd[i]);
+		reg = (state->cmd[0] & 0xffff0000) >> 16;
+		for (i = 1; i < dwords; i++) {
+			fprintf(stderr, "      %s[%d]: 0x%04x = %08x\n",
+				state->name, i, reg, state->cmd[i]);
+			reg += 4;
 		}
 	}
 }
-- 
1.5.6.3

From f85743bcd293fdfbcee31f6defa7177897e778ab Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 8 Feb 2009 02:28:27 +0100
Subject: [PATCH] r300: silence valgrind

---
 src/mesa/drivers/dri/r300/radeon_ioctl.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/radeon_ioctl.c b/src/mesa/drivers/dri/r300/radeon_ioctl.c
index 36502eb..724016d 100644
--- a/src/mesa/drivers/dri/r300/radeon_ioctl.c
+++ b/src/mesa/drivers/dri/r300/radeon_ioctl.c
@@ -78,7 +78,7 @@ uint32_t radeonGetAge(radeonContextPtr radeon)
 {
 	drm_radeon_getparam_t gp;
 	int ret;
-	uint32_t age;
+	uint32_t age = 0;
 
 	gp.param = RADEON_PARAM_LAST_CLEAR;
 	gp.value = (int *)&age;
-- 
1.5.6.3

From 5624334047a446d053d83f6ebc3d0d8803acf233 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Wed, 4 Feb 2009 13:40:48 +0100
Subject: [PATCH] r300: add few macros for RS setup

---
 src/mesa/drivers/dri/r300/r300_reg.h |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_reg.h b/src/mesa/drivers/dri/r300/r300_reg.h
index 872a33e..8f1a663 100644
--- a/src/mesa/drivers/dri/r300/r300_reg.h
+++ b/src/mesa/drivers/dri/r300/r300_reg.h
@@ -1224,6 +1224,10 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define R500_RS_INST_COL_ADDR_SHIFT			18
 #define R500_RS_INST_TEX_ADJ				(1 << 25)
 #define R500_RS_INST_W_CN				(1 << 26)
+#define R500_RS_INST_TEX_ID(x)				((x) << R500_RS_INST_TEX_ID_SHIFT)
+#define R500_RS_INST_TEX_ADDR(x)			((x) << R500_RS_INST_TEX_ADDR_SHIFT)
+#define R500_RS_INST_COL_ID(x)				((x) << R500_RS_INST_COL_ID_SHIFT)
+#define R500_RS_INST_COL_ADDR(x)			((x) << R500_RS_INST_COL_ADDR_SHIFT)
 
 /* These DWORDs control how vertex data is routed into fragment program
  * registers, after interpolators.
@@ -1239,9 +1243,11 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_RS_INST_TEX_ID(x)  		((x) << 0)
 #	define R300_RS_INST_TEX_CN_WRITE 	(1 << 3)
 #	define R300_RS_INST_TEX_ADDR_SHIFT 	6
+#	define R300_RS_INST_TEX_ADDR(x)		((x) << R300_RS_INST_TEX_ADDR_SHIFT)
 #	define R300_RS_INST_COL_ID(x)		((x) << 11)
 #	define R300_RS_INST_COL_CN_WRITE	(1 << 14)
 #	define R300_RS_INST_COL_ADDR_SHIFT	17
+#	define R300_RS_INST_COL_ADDR(x)		((x) << R300_RS_INST_COL_ADDR_SHIFT)
 #	define R300_RS_INST_TEX_ADJ		(1 << 22)
 #	define R300_RS_COL_BIAS_UNUSED_SHIFT    23
 
-- 
1.5.6.3

From e12ee8d0ad9a90d8b7183c31dc77e0cb9c86fb61 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 15 Feb 2009 20:57:32 +0100
Subject: [PATCH] r300: route fog coord and W pos correctly

---
 src/mesa/drivers/dri/r300/r300_emit.c  |   15 +++++--
 src/mesa/drivers/dri/r300/r300_swtcl.c |   67 ++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_emit.c b/src/mesa/drivers/dri/r300/r300_emit.c
index 80bd338..28c3157 100644
--- a/src/mesa/drivers/dri/r300/r300_emit.c
+++ b/src/mesa/drivers/dri/r300/r300_emit.c
@@ -314,10 +314,6 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_2_PRESENT |
 		    R300_VAP_OUTPUT_VTX_FMT_0__COLOR_3_PRESENT;
 
-#if 0
-	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) ;
-#endif
-
 	if (OutputsWritten & (1 << VERT_RESULT_PSIZ))
 		ret |= R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
 
@@ -326,12 +322,21 @@ GLuint r300VAPOutputCntl0(GLcontext * ctx, GLuint OutputsWritten)
 
 GLuint r300VAPOutputCntl1(GLcontext * ctx, GLuint OutputsWritten)
 {
-	GLuint i, ret = 0;
+	GLuint i, ret = 0, first_free_texcoord = 0;
 
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
 		if (OutputsWritten & (1 << (VERT_RESULT_TEX0 + i))) {
 			ret |= (4 << (3 * i));
+			++first_free_texcoord;
+		}
+	}
+
+	if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+		if (first_free_texcoord > 8) {
+			fprintf(stderr, "\tout of free texcoords to write fog coord\n");
+			_mesa_exit(-1);
 		}
+		ret |= 4 << (3 * first_free_texcoord);
 	}
 
 	return ret;
diff --git a/src/mesa/drivers/dri/r300/r300_swtcl.c b/src/mesa/drivers/dri/r300/r300_swtcl.c
index b6e7ce1..f77f07f 100644
--- a/src/mesa/drivers/dri/r300/r300_swtcl.c
+++ b/src/mesa/drivers/dri/r300/r300_swtcl.c
@@ -85,15 +85,15 @@ static void r300SetVertexFormat( GLcontext *ctx )
 	struct vertex_buffer *VB = &tnl->vb;
 	DECLARE_RENDERINPUTS(index_bitset);
 	GLuint InputsRead = 0, OutputsWritten = 0;
-	int vap_fmt_0 = 0;
-	int vap_vte_cntl = 0;
+	int vap_fmt_1 = 0;
 	int offset = 0;
 	int vte = 0;
+	int fog_id;
 	GLint inputs[VERT_ATTRIB_MAX];
 	GLint tab[VERT_ATTRIB_MAX];
 	int swizzle[VERT_ATTRIB_MAX][4];
 	GLuint i, nr;
-	GLuint sz, vap_fmt_1 = 0;
+	GLuint sz;
 
 	DECLARE_RENDERINPUTS(render_inputs_bitset);
 	RENDERINPUTS_COPY(render_inputs_bitset, tnl->render_inputs_bitset);
@@ -132,7 +132,6 @@ static void r300SetVertexFormat( GLcontext *ctx )
 
 	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POINTSIZE )) {
 		EMIT_ATTR( _TNL_ATTRIB_POINTSIZE, EMIT_1F );
-		vap_fmt_0 |=  R300_VAP_OUTPUT_VTX_FMT_0__PT_SIZE_PRESENT;
 		offset += 1;
 	}
 
@@ -154,6 +153,33 @@ static void r300SetVertexFormat( GLcontext *ctx )
 		OutputsWritten |= 1 << VERT_RESULT_COL1;
 	}
 
+	fog_id = -1;
+	if (RENDERINPUTS_TEST(index_bitset, _TNL_ATTRIB_FOG)) {
+		/* find first free tex coord slot */
+		if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+			int i;
+			for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+				if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+					fog_id = i;
+					break;
+				}
+			}
+		} else {
+			fog_id = 0;
+		}
+		
+		if (fog_id == -1) {
+			fprintf(stderr, "\tout of free texcoords to do fog\n");
+			_mesa_exit(-1);
+		}
+
+		sz = VB->AttribPtr[VERT_ATTRIB_FOG]->size;
+		EMIT_ATTR( _TNL_ATTRIB_FOG, EMIT_1F + sz - 1);
+		InputsRead |= 1 << VERT_ATTRIB_FOG;
+		OutputsWritten |= 1 << VERT_RESULT_FOGC;
+		vap_fmt_1 |= sz << (3 * fog_id);
+	}
+
 	if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
 		int i;
 
@@ -168,6 +194,37 @@ static void r300SetVertexFormat( GLcontext *ctx )
 		}
 	}
 
+	/* RS can't put fragment position on the pixel stack, so stuff it in texcoord if needed */
+	if (RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_POS) && (ctx->FragmentProgram._Current->Base.InputsRead & FRAG_BIT_WPOS)) {
+		int first_free_tex = -1;
+		if (fog_id >= 0) {
+			first_free_tex = fog_id+1;
+		} else {
+			if (RENDERINPUTS_TEST_RANGE( index_bitset, _TNL_FIRST_TEX, _TNL_LAST_TEX )) {
+				int i;
+				for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+					if (!RENDERINPUTS_TEST( index_bitset, _TNL_ATTRIB_TEX(i) )) {
+						first_free_tex = i;
+						break;
+					}
+				}
+			} else {
+				first_free_tex = 0;
+			}
+		}
+		
+		if (first_free_tex == -1) {
+			fprintf(stderr, "\tout of free texcoords to write w pos\n");
+			_mesa_exit(-1);
+		}
+
+		sz = VB->AttribPtr[VERT_ATTRIB_POS]->size;
+		InputsRead |= 1 << (VERT_ATTRIB_TEX0 + first_free_tex);
+		OutputsWritten |= 1 << (VERT_RESULT_TEX0 + first_free_tex);
+		EMIT_ATTR( _TNL_ATTRIB_POS, EMIT_1F + sz - 1 );
+		vap_fmt_1 |= sz << (3 * first_free_tex);
+	}
+
 	for (i = 0, nr = 0; i < VERT_ATTRIB_MAX; i++) {
 		if (InputsRead & (1 << i)) {
 			inputs[i] = nr++;
@@ -183,6 +240,8 @@ static void r300SetVertexFormat( GLcontext *ctx )
 		inputs[VERT_ATTRIB_COLOR0] = 2;
 	if (InputsRead & (1 << VERT_ATTRIB_COLOR1))
 		inputs[VERT_ATTRIB_COLOR1] = 3;
+	if (InputsRead & (1 << VERT_ATTRIB_FOG))
+		inputs[VERT_ATTRIB_FOG] = 6 + fog_id;
 	for (i = VERT_ATTRIB_TEX0; i <= VERT_ATTRIB_TEX7; i++)
 		if (InputsRead & (1 << i))
 			inputs[i] = 6 + (i - VERT_ATTRIB_TEX0);
-- 
1.5.6.3

From bbd219f4e5d305abe0c4d53eba07bc472bdbe901 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 15 Feb 2009 21:58:18 +0100
Subject: [PATCH] r300: enable EXT_fog_coord extension

Remove fixed function fog setup.
---
 src/mesa/drivers/dri/r300/r300_context.c |    4 +-
 src/mesa/drivers/dri/r300/r300_state.c   |  177 +++---------------------------
 2 files changed, 20 insertions(+), 161 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_context.c b/src/mesa/drivers/dri/r300/r300_context.c
index 4c14c7e..fddd87b 100644
--- a/src/mesa/drivers/dri/r300/r300_context.c
+++ b/src/mesa/drivers/dri/r300/r300_context.c
@@ -83,7 +83,7 @@ int hw_tcl_on = 1;
 #define need_GL_EXT_blend_equation_separate
 #define need_GL_EXT_blend_func_separate
 #define need_GL_EXT_blend_minmax
-//#define need_GL_EXT_fog_coord
+#define need_GL_EXT_fog_coord
 #define need_GL_EXT_gpu_program_parameters
 #define need_GL_EXT_secondary_color
 #define need_GL_EXT_stencil_two_side
@@ -111,7 +111,7 @@ const struct dri_extension card_extensions[] = {
   {"GL_EXT_blend_func_separate",	GL_EXT_blend_func_separate_functions},
   {"GL_EXT_blend_minmax",		GL_EXT_blend_minmax_functions},
   {"GL_EXT_blend_subtract",		NULL},
-//  {"GL_EXT_fog_coord",			GL_EXT_fog_coord_functions },
+  {"GL_EXT_fog_coord",			GL_EXT_fog_coord_functions },
   {"GL_EXT_gpu_program_parameters",     GL_EXT_gpu_program_parameters_functions},
   {"GL_EXT_secondary_color", 		GL_EXT_secondary_color_functions},
   {"GL_EXT_shadow_funcs",		NULL},
diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index d6bc5fd..ef1012b 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -730,134 +730,6 @@ static void r300ColorMask(GLcontext * ctx,
 }
 
 /* =============================================================
- * Fog
- */
-static void r300Fogfv(GLcontext * ctx, GLenum pname, const GLfloat * param)
-{
-	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	union {
-		int i;
-		float f;
-	} fogScale, fogStart;
-
-	(void)param;
-
-	fogScale.i = r300->hw.fogp.cmd[R300_FOGP_SCALE];
-	fogStart.i = r300->hw.fogp.cmd[R300_FOGP_START];
-
-	switch (pname) {
-	case GL_FOG_MODE:
-		switch (ctx->Fog.Mode) {
-		case GL_LINEAR:
-			R300_STATECHANGE(r300, fogs);
-			r300->hw.fogs.cmd[R300_FOGS_STATE] =
-			    (r300->hw.fogs.
-			     cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
-			    R300_FG_FOG_BLEND_FN_LINEAR;
-
-			if (ctx->Fog.Start == ctx->Fog.End) {
-				fogScale.f = -1.0;
-				fogStart.f = 1.0;
-			} else {
-				fogScale.f =
-				    1.0 / (ctx->Fog.End - ctx->Fog.Start);
-				fogStart.f =
-				    -ctx->Fog.Start / (ctx->Fog.End -
-						       ctx->Fog.Start);
-			}
-			break;
-		case GL_EXP:
-			R300_STATECHANGE(r300, fogs);
-			r300->hw.fogs.cmd[R300_FOGS_STATE] =
-			    (r300->hw.fogs.
-			     cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
-			    R300_FG_FOG_BLEND_FN_EXP;
-			fogScale.f = 0.0933 * ctx->Fog.Density;
-			fogStart.f = 0.0;
-			break;
-		case GL_EXP2:
-			R300_STATECHANGE(r300, fogs);
-			r300->hw.fogs.cmd[R300_FOGS_STATE] =
-			    (r300->hw.fogs.
-			     cmd[R300_FOGS_STATE] & ~R300_FG_FOG_BLEND_FN_MASK) |
-			    R300_FG_FOG_BLEND_FN_EXP2;
-			fogScale.f = 0.3 * ctx->Fog.Density;
-			fogStart.f = 0.0;
-                        break;
-		default:
-			return;
-		}
-		break;
-	case GL_FOG_DENSITY:
-		switch (ctx->Fog.Mode) {
-		case GL_EXP:
-			fogScale.f = 0.0933 * ctx->Fog.Density;
-			fogStart.f = 0.0;
-			break;
-		case GL_EXP2:
-			fogScale.f = 0.3 * ctx->Fog.Density;
-			fogStart.f = 0.0;
-		default:
-			break;
-		}
-		break;
-	case GL_FOG_START:
-	case GL_FOG_END:
-		if (ctx->Fog.Mode == GL_LINEAR) {
-			if (ctx->Fog.Start == ctx->Fog.End) {
-				fogScale.f = -1.0;
-				fogStart.f = 1.0;
-			} else {
-				fogScale.f =
-				    1.0 / (ctx->Fog.End - ctx->Fog.Start);
-				fogStart.f =
-				    -ctx->Fog.Start / (ctx->Fog.End -
-						       ctx->Fog.Start);
-			}
-		}
-		break;
-	case GL_FOG_COLOR:
-		R300_STATECHANGE(r300, fogc);
-		r300->hw.fogc.cmd[R300_FOGC_R] =
-		    (GLuint) (ctx->Fog.Color[0] * 1023.0F) & 0x3FF;
-		r300->hw.fogc.cmd[R300_FOGC_G] =
-		    (GLuint) (ctx->Fog.Color[1] * 1023.0F) & 0x3FF;
-		r300->hw.fogc.cmd[R300_FOGC_B] =
-		    (GLuint) (ctx->Fog.Color[2] * 1023.0F) & 0x3FF;
-		break;
-	case GL_FOG_COORD_SRC:
-		break;
-	default:
-		return;
-	}
-
-	if (fogScale.i != r300->hw.fogp.cmd[R300_FOGP_SCALE] ||
-	    fogStart.i != r300->hw.fogp.cmd[R300_FOGP_START]) {
-		R300_STATECHANGE(r300, fogp);
-		r300->hw.fogp.cmd[R300_FOGP_SCALE] = fogScale.i;
-		r300->hw.fogp.cmd[R300_FOGP_START] = fogStart.i;
-	}
-}
-
-static void r300SetFogState(GLcontext * ctx, GLboolean state)
-{
-	r300ContextPtr r300 = R300_CONTEXT(ctx);
-
-	R300_STATECHANGE(r300, fogs);
-	if (state) {
-		r300->hw.fogs.cmd[R300_FOGS_STATE] |= R300_FG_FOG_BLEND_ENABLE;
-
-		r300Fogfv(ctx, GL_FOG_MODE, NULL);
-		r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
-		r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
-		r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
-		r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
-	} else {
-		r300->hw.fogs.cmd[R300_FOGS_STATE] &= ~R300_FG_FOG_BLEND_ENABLE;
-	}
-}
-
-/* =============================================================
  * Point state
  */
 static void r300PointSize(GLcontext * ctx, GLfloat size)
@@ -2153,7 +2025,7 @@ static void r300Enable(GLcontext * ctx, GLenum cap, GLboolean state)
 		/* empty */
 		break;
 	case GL_FOG:
-		r300SetFogState(ctx, state);
+		/* empty */
 		break;
 	case GL_ALPHA_TEST:
 		r300SetAlphaState(ctx);
@@ -2307,11 +2179,9 @@ static void r300ResetHwState(r300ContextPtr r300)
 		break;
 	}
 
-	/* XXX: set to 0 when fog is disabled? */
-	r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = R300_GB_FOG_SELECT_1_1_W;
-
 	/* XXX: Enable anti-aliasing? */
 	r300->hw.gb_misc.cmd[R300_GB_MISC_AA_CONFIG] = GB_AA_CONFIG_AA_DISABLE;
+	r300->hw.gb_misc.cmd[R300_GB_MISC_SELECT] = 0;
 
 	r300->hw.ga_point_s0.cmd[1] = r300PackFloat32(0.0);
 	r300->hw.ga_point_s0.cmd[2] = r300PackFloat32(0.0);
@@ -2360,17 +2230,11 @@ static void r300ResetHwState(r300ContextPtr r300)
 	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
 	r300->hw.us_out_fmt.cmd[4] = R500_OUT_FMT_UNUSED |
 	  R500_C0_SEL_B | R500_C1_SEL_G | R500_C2_SEL_R | R500_C3_SEL_A;
-	r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W24;
-
-	r300Enable(ctx, GL_FOG, ctx->Fog.Enabled);
-	r300Fogfv(ctx, GL_FOG_MODE, NULL);
-	r300Fogfv(ctx, GL_FOG_DENSITY, &ctx->Fog.Density);
-	r300Fogfv(ctx, GL_FOG_START, &ctx->Fog.Start);
-	r300Fogfv(ctx, GL_FOG_END, &ctx->Fog.End);
-	r300Fogfv(ctx, GL_FOG_COLOR, ctx->Fog.Color);
-	r300Fogfv(ctx, GL_FOG_COORDINATE_SOURCE_EXT, NULL);
+	r300->hw.us_out_fmt.cmd[5] = R300_W_FMT_W0 | R300_W_SRC_US;
 
-	r300->hw.fg_depth_src.cmd[1] = 0;
+	/* disable fog unit */
+	r300->hw.fogs.cmd[R300_FOGS_STATE] = 0;
+	r300->hw.fg_depth_src.cmd[1] = R300_FG_DEPTH_SRC_SHADER;
 
 	r300->hw.rb3d_cctl.cmd[1] = 0;
 
@@ -2625,16 +2489,6 @@ static void r500SetupPixelShader(r300ContextPtr rmesa)
 	}
 	code = &fp->code;
 
-	if (fp->mesa_program.FogOption != GL_NONE) {
-		/* Enable HW fog. Try not to squish GL context.
-		 * (Anybody sane remembered to set glFog() opts first!) */
-		r300SetFogState(ctx, GL_TRUE);
-		ctx->Fog.Mode = fp->mesa_program.FogOption;
-		r300Fogfv(ctx, GL_FOG_MODE, NULL);
-	} else
-		/* Make sure HW is matching GL context. */
-		r300SetFogState(ctx, ctx->Fog.Enabled);
-
 	r300SetupTextures(ctx);
 
 	R300_STATECHANGE(rmesa, fp);
@@ -2683,12 +2537,18 @@ void r300UpdateShaderStates(r300ContextPtr rmesa)
 	r300UpdateTextureState(ctx);
 	r300SetEarlyZState(ctx);
 
-	GLuint fgdepthsrc = R300_FG_DEPTH_SRC_SCAN;
-	if (current_fragment_program_writes_depth(ctx))
-		fgdepthsrc = R300_FG_DEPTH_SRC_SHADER;
-	if (fgdepthsrc != rmesa->hw.fg_depth_src.cmd[1]) {
-		R300_STATECHANGE(rmesa, fg_depth_src);
-		rmesa->hw.fg_depth_src.cmd[1] = fgdepthsrc;
+	/* w_fmt value is set to get best performance
+	 * see p.130 R5xx 3D acceleration guide v1.3 */
+	GLuint w_fmt;
+	if (current_fragment_program_writes_depth(ctx)) {
+		w_fmt = R300_W_FMT_W24 | R300_W_SRC_US;
+	} else {
+		w_fmt = R300_W_FMT_W0 | R300_W_SRC_US;
+	}
+	
+	if (w_fmt != rmesa->hw.us_out_fmt.cmd[5]) {
+		R300_STATECHANGE(rmesa, us_out_fmt);
+		rmesa->hw.us_out_fmt.cmd[5] = w_fmt;
 	}
 
 	if (rmesa->radeon.radeonScreen->chip_family >= CHIP_FAMILY_RV515)
@@ -2806,7 +2666,6 @@ void r300InitStateFuncs(struct dd_function_table *functions)
 	functions->DepthFunc = r300DepthFunc;
 	functions->DepthMask = r300DepthMask;
 	functions->CullFace = r300CullFace;
-	functions->Fogfv = r300Fogfv;
 	functions->FrontFace = r300FrontFace;
 	functions->ShadeModel = r300ShadeModel;
 	functions->LogicOpcode = r300LogicOpcode;
-- 
1.5.6.3

From 1408cb08bd641170f40c302ea9c9a8defb4ffc79 Mon Sep 17 00:00:00 2001
From: Maciej Cencora <[email protected]>
Date: Sun, 15 Feb 2009 20:43:22 +0100
Subject: [PATCH] r300: rewrite and hopefully simplify RS setup

---
 src/mesa/drivers/dri/r300/r300_state.c          |  411 ++++++++++++-----------
 src/mesa/drivers/dri/r300/r300_vertprog.c       |    9 +-
 src/mesa/drivers/dri/r300/radeon_program_pair.c |   28 +-
 3 files changed, 230 insertions(+), 218 deletions(-)

diff --git a/src/mesa/drivers/dri/r300/r300_state.c b/src/mesa/drivers/dri/r300/r300_state.c
index c192fec..d6bc5fd 100644
--- a/src/mesa/drivers/dri/r300/r300_state.c
+++ b/src/mesa/drivers/dri/r300/r300_state.c
@@ -1595,18 +1595,14 @@ union r300_outputs_written {
 static void r300SetupRSUnit(GLcontext * ctx)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	/* I'm still unsure if these are needed */
-	GLuint interp_col[8];
         TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct vertex_buffer *VB = &tnl->vb;
 	union r300_outputs_written OutputsWritten;
 	GLuint InputsRead;
 	int fp_reg, high_rr;
-	int col_interp_nr;
-	int rs_tex_count = 0, rs_col_count = 0;
-	int i, count;
-
-	memset(interp_col, 0, sizeof(interp_col));
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, count, col_fmt;
 
 	if (hw_tcl_on)
 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
@@ -1624,51 +1620,66 @@ static void r300SetupRSUnit(GLcontext * ctx)
 	R300_STATECHANGE(r300, rc);
 	R300_STATECHANGE(r300, rr);
 
-	fp_reg = col_interp_nr = high_rr = 0;
-
-	r300->hw.rr.cmd[R300_RR_INST_1] = 0;
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
 
-	if (InputsRead & FRAG_BIT_WPOS) {
-		for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-			if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
-				break;
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
 
-		if (i == ctx->Const.MaxTextureUnits) {
-			fprintf(stderr, "\tno free texcoord found...\n");
-			_mesa_exit(-1);
-		}
+	for (i=0; i<R300_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
 
-		InputsRead |= (FRAG_BIT_TEX0 << i);
-		InputsRead &= ~FRAG_BIT_WPOS;
-	}
 
 	if (InputsRead & FRAG_BIT_COL0) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-		interp_col[0] |= R300_RS_COL_PTR(rs_col_count);
-		if (count == 3)
-			interp_col[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
-		rs_col_count += count;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL0;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
+		}
 	}
-	else
-		interp_col[0] = R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
 
 	if (InputsRead & FRAG_BIT_COL1) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-		if (count == 3)
-			interp_col[1] |= R300_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
-		interp_col[1] |= R300_RS_COL_PTR(1);
-		rs_col_count += count;
-	}
-
-	if (InputsRead & FRAG_BIT_FOGC) {
-		/* XXX FIX THIS
-		 * Just turn off the bit for now.
-		 * Need to do something similar to the color/texcoord inputs.
-		 */
-		InputsRead &= ~FRAG_BIT_FOGC;
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R300_RS_COL_PTR(col_ip) | R300_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R300_RS_INST_COL_ID(col_ip) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_COL1;
+			++col_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+		}
 	}
 
 	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
+		
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
+		}
+
 		int swiz;
 
 		/* with TCL we always seem to route 4 components */
@@ -1677,7 +1688,6 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		else
 		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
 
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | rs_tex_count;
 		switch(count) {
 		case 4: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3); break;
 		case 3: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
@@ -1686,63 +1696,48 @@ static void r300SetupRSUnit(GLcontext * ctx)
 		case 2: swiz = R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(R300_RS_SEL_K0) | R300_RS_SEL_Q(R300_RS_SEL_K1); break;
 		};
 
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] |= swiz;
-
-		r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-
-			rs_tex_count += count;
-
-			//assert(r300->state.texture.tc_count != 0);
-			r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R300_RS_INST_TEX_CN_WRITE | i	/* source INTERP */
-			    | (fp_reg << R300_RS_INST_TEX_ADDR_SHIFT);
-			high_rr = fp_reg;
-
-			/* Passing invalid data here can lock the GPU. */
-			if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
-				InputsRead &= ~(FRAG_BIT_TEX0 << i);
-				fp_reg++;
-			} else {
-				WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
-			}
-		}
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz | R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += count;
+		++tex_ip;
+		++fp_reg;
 	}
 
-	if (InputsRead & FRAG_BIT_COL0) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-			r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL0;
-			col_interp_nr++;
-		} else {
-			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
-		}
-	}
-
-	if (InputsRead & FRAG_BIT_COL1) {
-		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-			r300->hw.rr.cmd[R300_RR_INST_1] |= R300_RS_INST_COL_ID(1) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-			InputsRead &= ~FRAG_BIT_COL1;
-			if (high_rr < 1)
-				high_rr = 1;
-			col_interp_nr++;
+	if (InputsRead & FRAG_BIT_FOGC) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
+			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_FOGC;
+			rs_tex_count += 4;
+			++tex_ip;
+			++fp_reg;
 		} else {
-			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
+			WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
 		}
 	}
 
-	/* Need at least one. This might still lock as the values are undefined... */
-	if (rs_tex_count == 0 && col_interp_nr == 0) {
-		r300->hw.rr.cmd[R300_RR_INST_0] |= R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | (fp_reg++ << R300_RS_INST_COL_ADDR_SHIFT);
-		col_interp_nr++;
-	}
-
-	r300->hw.rc.cmd[1] = 0 | (rs_tex_count << R300_IT_COUNT_SHIFT)
-	  | (col_interp_nr << R300_IC_COUNT_SHIFT)
-	  | R300_HIRES_EN;
-
-	assert(high_rr >= 0);
-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr + 1);
-	r300->hw.rc.cmd[2] = high_rr;
+	if (InputsRead & FRAG_BIT_WPOS) {
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |=  R300_RS_SEL_S(0) | R300_RS_SEL_T(1) | R300_RS_SEL_R(2) | R300_RS_SEL_Q(3) |  R300_RS_TEX_PTR(rs_tex_count);
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R300_RS_INST_TEX_ID(tex_ip) | R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~FRAG_BIT_WPOS;
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
+	}
+	InputsRead &= ~FRAG_BIT_WPOS;
+	
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] = R300_RS_INST_COL_ID(0) | R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(0) | R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
+	}
+	
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] |= high_rr - 1;
+	
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R300_RS_INST_0, high_rr);
 
 	if (InputsRead)
 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
@@ -1751,18 +1746,15 @@ static void r300SetupRSUnit(GLcontext * ctx)
 static void r500SetupRSUnit(GLcontext * ctx)
 {
 	r300ContextPtr r300 = R300_CONTEXT(ctx);
-	/* I'm still unsure if these are needed */
-	GLuint interp_col[8];
-	union r300_outputs_written OutputsWritten;
         TNLcontext *tnl = TNL_CONTEXT(ctx);
 	struct vertex_buffer *VB = &tnl->vb;
+	union r300_outputs_written OutputsWritten;
 	GLuint InputsRead;
 	int fp_reg, high_rr;
-	int rs_col_count = 0;
-	int in_texcoords, col_interp_nr;
-	int i, count;
+	int col_ip, tex_ip;
+	int rs_tex_count = 0;
+	int i, count, col_fmt;
 
-	memset(interp_col, 0, sizeof(interp_col));
 	if (hw_tcl_on)
 		OutputsWritten.vp_outputs = CURRENT_VERTEX_SHADER(ctx)->key.OutputsWritten;
 	else
@@ -1779,100 +1771,32 @@ static void r500SetupRSUnit(GLcontext * ctx)
 	R300_STATECHANGE(r300, rc);
 	R300_STATECHANGE(r300, rr);
 
-	fp_reg = col_interp_nr = high_rr = in_texcoords = 0;
-
-	r300->hw.rr.cmd[R300_RR_INST_1] = 0;
-
-	if (InputsRead & FRAG_BIT_WPOS) {
-		for (i = 0; i < ctx->Const.MaxTextureUnits; i++)
-			if (!(InputsRead & (FRAG_BIT_TEX0 << i)))
-				break;
+	fp_reg = col_ip = tex_ip = col_fmt = 0;
 
-		if (i == ctx->Const.MaxTextureUnits) {
-			fprintf(stderr, "\tno free texcoord found...\n");
-			_mesa_exit(-1);
-		}
+	r300->hw.rc.cmd[1] = 0;
+	r300->hw.rc.cmd[2] = 0;
+	for (i=0; i<R300_RR_CMDSIZE-1; ++i)
+		r300->hw.rr.cmd[R300_RR_INST_0 + i] = 0;
 
-		InputsRead |= (FRAG_BIT_TEX0 << i);
-		InputsRead &= ~FRAG_BIT_WPOS;
-	}
+	for (i=0; i<R500_RI_CMDSIZE-1; ++i)
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = 0;
 
-	if (InputsRead & FRAG_BIT_COL0) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
-		interp_col[0] |= R500_RS_COL_PTR(rs_col_count);
-		if (count == 3)
-			interp_col[0] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB1);
-		rs_col_count += count;
-	}
-	else
-		interp_col[0] = R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
-
-	if (InputsRead & FRAG_BIT_COL1) {
-		count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
-		interp_col[1] |= R500_RS_COL_PTR(1);
-		if (count == 3)
-			interp_col[1] |= R500_RS_COL_FMT(R300_RS_COL_FMT_RGB0);
-		rs_col_count += count;
-	}
-
-	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
-		GLuint swiz = 0;
-
-		/* with TCL we always seem to route 4 components */
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-
-		  if (hw_tcl_on)
-		    count = 4;
-		  else
-		    count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
-
-		  /* always have on texcoord */
-		  swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_S_SHIFT;
-		  if (count >= 2)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_T_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
-
-		  if (count >= 3)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_R_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
-
-		  if (count == 4)
-		    swiz |= in_texcoords++ << R500_RS_IP_TEX_PTR_Q_SHIFT;
-		  else
-		    swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
-
-		} else
-		   swiz = (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT) |
-		          (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT) |
-		          (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
-		          (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT);
-
-		r300->hw.ri.cmd[R300_RI_INTERP_0 + i] = interp_col[i] | swiz;
-
-		r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] = 0;
-		if (InputsRead & (FRAG_BIT_TEX0 << i)) {
-			//assert(r300->state.texture.tc_count != 0);
-			r300->hw.rr.cmd[R300_RR_INST_0 + fp_reg] |= R500_RS_INST_TEX_CN_WRITE | i	/* source INTERP */
-			    | (fp_reg << R500_RS_INST_TEX_ADDR_SHIFT);
-			high_rr = fp_reg;
-
-			/* Passing invalid data here can lock the GPU. */
-			if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
-				InputsRead &= ~(FRAG_BIT_TEX0 << i);
-				fp_reg++;
-			} else {
-				WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
-			}
-		}
-	}
 
 	if (InputsRead & FRAG_BIT_COL0) {
 		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL0, _TNL_ATTRIB_COLOR0)) {
-			r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR0]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
 			InputsRead &= ~FRAG_BIT_COL0;
-			col_interp_nr++;
+			++col_ip;
+			++fp_reg;
 		} else {
 			WARN_ONCE("fragprog wants col0, vp doesn't provide it\n");
 		}
@@ -1880,29 +1804,118 @@ static void r500SetupRSUnit(GLcontext * ctx)
 
 	if (InputsRead & FRAG_BIT_COL1) {
 		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_COL1, _TNL_ATTRIB_COLOR1)) {
-			r300->hw.rr.cmd[R300_RR_INST_1] |= (1 << 12) | R500_RS_INST_COL_CN_WRITE |  (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
+			count = VB->AttribPtr[_TNL_ATTRIB_COLOR1]->size;
+			if (count == 4)
+			    col_fmt = R300_RS_COL_FMT_RGBA;
+			else if (count == 3)
+			    col_fmt = R300_RS_COL_FMT_RGB1;
+			else
+			    col_fmt = R300_RS_COL_FMT_0001;
+
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + col_ip] = R500_RS_COL_PTR(col_ip) | R500_RS_COL_FMT(col_fmt);
+			r300->hw.rr.cmd[R300_RR_INST_0 + col_ip] = R500_RS_INST_COL_ID(col_ip) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_reg);
 			InputsRead &= ~FRAG_BIT_COL1;
-			if (high_rr < 1)
-				high_rr = 1;
-			col_interp_nr++;
+			++col_ip;
+			++fp_reg;
 		} else {
 			WARN_ONCE("fragprog wants col1, vp doesn't provide it\n");
 		}
 	}
 
-	/* Need at least one. This might still lock as the values are undefined... */
-	if (in_texcoords == 0 && col_interp_nr == 0) {
-		r300->hw.rr.cmd[R300_RR_INST_0] |= 0 | R500_RS_INST_COL_CN_WRITE | (fp_reg++ << R500_RS_INST_COL_ADDR_SHIFT);
-		col_interp_nr++;
+
+	for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
+		if (! ( InputsRead & FRAG_BIT_TEX(i) ) )
+		    continue;
+		
+		if (!R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_TEX0 + i, _TNL_ATTRIB_TEX(i))) {
+		    WARN_ONCE("fragprog wants coords for tex%d, vp doesn't provide them!\n", i);
+		    continue;
+		}
+
+		int swiz = 0;
+
+		/* with TCL we always seem to route 4 components */
+		if (hw_tcl_on)
+		  count = 4;
+		else
+		  count = VB->AttribPtr[_TNL_ATTRIB_TEX(i)]->size;
+
+		if (count == 4) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= (rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 3) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= (rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 2) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= (rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else if (count == 1) {
+			swiz |= (rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		} else {
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_S_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_T_SHIFT;
+			swiz |= R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT;
+			swiz |= R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT;
+		}
+
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= swiz;
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~(FRAG_BIT_TEX0 << i);
+		rs_tex_count += count;
+		++tex_ip;
+		++fp_reg;
 	}
 
-	r300->hw.rc.cmd[1] = 0 | (in_texcoords << R300_IT_COUNT_SHIFT)
-	  | (col_interp_nr << R300_IC_COUNT_SHIFT)
-	  | R300_HIRES_EN;
+	if (InputsRead & FRAG_BIT_FOGC) {
+		if (R300_OUTPUTS_WRITTEN_TEST(OutputsWritten, VERT_RESULT_FOGC, _TNL_ATTRIB_FOG)) {
+			r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+			
+			r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+			InputsRead &= ~FRAG_BIT_FOGC;
+			rs_tex_count += 4;
+			++tex_ip;
+			++fp_reg;
+		} else {
+			WARN_ONCE("fragprog wants fogc, vp doesn't provide it\n");
+		}
+	}
 
-	assert(high_rr >= 0);
-	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr + 1);
-	r300->hw.rc.cmd[2] = 0xC0 | high_rr;
+	if (InputsRead & FRAG_BIT_WPOS) {
+		r300->hw.ri.cmd[R300_RI_INTERP_0 + tex_ip] |= ((rs_tex_count + 0) << R500_RS_IP_TEX_PTR_S_SHIFT) |
+				((rs_tex_count + 1) << R500_RS_IP_TEX_PTR_T_SHIFT) |
+				((rs_tex_count + 2) << R500_RS_IP_TEX_PTR_R_SHIFT) |
+				((rs_tex_count + 3) << R500_RS_IP_TEX_PTR_Q_SHIFT);
+
+		r300->hw.rr.cmd[R300_RR_INST_0 + tex_ip] |= R500_RS_INST_TEX_ID(tex_ip) | R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_reg);
+		InputsRead &= ~FRAG_BIT_WPOS;
+		rs_tex_count += 4;
+		++tex_ip;
+		++fp_reg;
+	}
+	
+	/* Setup default color if no color or tex was set */
+	if (rs_tex_count == 0 && col_ip == 0) {
+		r300->hw.rr.cmd[R300_RR_INST_0] |= R500_RS_INST_COL_ID(0) | R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(0) | R500_RS_COL_FMT(R300_RS_COL_FMT_0001);
+		++col_ip;
+	}
+	
+	high_rr = (col_ip > tex_ip) ? col_ip : tex_ip;
+	r300->hw.rc.cmd[1] |= (rs_tex_count << R300_IT_COUNT_SHIFT)  | (col_ip << R300_IC_COUNT_SHIFT) | R300_HIRES_EN;
+	r300->hw.rc.cmd[2] |= 0xC0 | (high_rr - 1);
+	
+	r300->hw.rr.cmd[R300_RR_CMD_0] = cmdpacket0(R500_RS_INST_0, high_rr);
 
 	if (InputsRead)
 		WARN_ONCE("Don't know how to satisfy InputsRead=0x%08x\n", InputsRead);
diff --git a/src/mesa/drivers/dri/r300/r300_vertprog.c b/src/mesa/drivers/dri/r300/r300_vertprog.c
index c4e325e..b6a4d30 100644
--- a/src/mesa/drivers/dri/r300/r300_vertprog.c
+++ b/src/mesa/drivers/dri/r300/r300_vertprog.c
@@ -993,17 +993,16 @@ static void t_inputs_outputs(struct r300_vertex_program *vp)
 		    vp->outputs[VERT_RESULT_COL0] + 3;
 		cur_reg = vp->outputs[VERT_RESULT_BFC1] + 1;
 	}
-#if 0
-	if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
-		vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
-	}
-#endif
 
 	for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
 		if (vp->key.OutputsWritten & (1 << i)) {
 			vp->outputs[i] = cur_reg++;
 		}
 	}
+
+	if (vp->key.OutputsWritten & (1 << VERT_RESULT_FOGC)) {
+		vp->outputs[VERT_RESULT_FOGC] = cur_reg++;
+	}
 }
 
 static void r300TranslateVertexShader(struct r300_vertex_program *vp,
diff --git a/src/mesa/drivers/dri/r300/radeon_program_pair.c b/src/mesa/drivers/dri/r300/radeon_program_pair.c
index 58bc0d5..56489d8 100644
--- a/src/mesa/drivers/dri/r300/radeon_program_pair.c
+++ b/src/mesa/drivers/dri/r300/radeon_program_pair.c
@@ -451,19 +451,7 @@ static void allocate_input_registers(struct pair_state *s)
 	int i;
 	GLuint hwindex = 0;
 
-	/* Texcoords come first */
-	for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
-		if (InputsRead & (FRAG_BIT_TEX0 << i))
-			alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
-	}
-	InputsRead &= ~FRAG_BITS_TEX_ANY;
-
-	/* fragment position treated as a texcoord */
-	if (InputsRead & FRAG_BIT_WPOS)
-		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
-	InputsRead &= ~FRAG_BIT_WPOS;
-
-	/* Then primary colour */
+	/* Primary colour */
 	if (InputsRead & FRAG_BIT_COL0)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL0, hwindex++);
 	InputsRead &= ~FRAG_BIT_COL0;
@@ -473,11 +461,23 @@ static void allocate_input_registers(struct pair_state *s)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_COL1, hwindex++);
 	InputsRead &= ~FRAG_BIT_COL1;
 
-	/* Fog coordinate */
+	/* Texcoords */
+	for (i = 0; i < s->Ctx->Const.MaxTextureUnits; i++) {
+		if (InputsRead & (FRAG_BIT_TEX0 << i))
+			alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_TEX0+i, hwindex++);
+	}
+	InputsRead &= ~FRAG_BITS_TEX_ANY;
+
+	/* Fogcoords treated as a texcoord */
 	if (InputsRead & FRAG_BIT_FOGC)
 		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_FOGC, hwindex++);
 	InputsRead &= ~FRAG_BIT_FOGC;
 
+	/* fragment position treated as a texcoord */
+	if (InputsRead & FRAG_BIT_WPOS)
+		alloc_hw_reg(s, PROGRAM_INPUT, FRAG_ATTRIB_WPOS, hwindex++);
+	InputsRead &= ~FRAG_BIT_WPOS;
+
 	/* Anything else */
 	if (InputsRead)
 		error("Don't know how to handle inputs 0x%x\n", InputsRead);
-- 
1.5.6.3

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Mesa3d-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Reply via email to