Roland Scheidegger wrote:
Ok, I was FINALLY able to come up with something for texture tiling
which seems to work - this was very very annoying, it _almost_ worked
literally within minutes, but I needed a lot of time until it finally
did really work.
I needed to convert back the drivers to use a multi-byte upload scheme
again (they did this years ago until it was abandoned for a simpler
method), since if the blitter auto-tiles the textures on upload color
format and texture pitch need to match the exact texture properties
(at least I was unable to make it work otherwise).
Also, there are tons of special cases for small textures in the drm,
which I don't like but couldn't avoid.
I could not make it work for texture rectangles, I think at least on
r200 microtiling for that case should work, but it didn't. Well I
don't think it's important.
It is only tested with "normal" 16-bit and 32-bit RGB/RGBA textures,
in theory all other formats should probably work too I hope. I lack a
good test, tests/manytex and redbook/mipmap are a bit too simple (and
you can't see if the tiling is actually correct...). 8-bit formats
should work too, if I got the math correctly...
Also, there seem to be some minor differences between r100 and r200
chips as far as macro tiling is concerned (didn't have much time to
test on r100 though), and for the heck of it I couldn't figure out
what that second micro-tile bit is good for on r100.
Quake3 got about a 15% boost on a 9000pro, and 11% on a 7200 sdr, if
the highest texture setting/trilinear/32bit was used. Well, compressed
textures are still faster :-).
For the drm, I've also included Andreas Stenglein's cube map patch for
the r100 (since this needs a version bump as well).
I've only attached the -core version, I actually plan to do a non-core
version too, but I've noticed the non-core version does no longer seem
to get all fixes the core version does. So am I the only one who still
checks things in for that version? In that case I'll immediately stop
touching it...
Here is a non core drm patch. It works fine here (rv100).
Stephane
Index: radeon.h
===================================================================
RCS file: /cvs/dri/drm/shared/radeon.h,v
retrieving revision 1.35
diff -u -r1.35 radeon.h
--- radeon.h 26 Jan 2005 17:48:59 -0000 1.35
+++ radeon.h 8 Feb 2005 23:29:09 -0000
@@ -42,10 +42,10 @@
#define DRIVER_NAME "radeon"
#define DRIVER_DESC "ATI Radeon"
-#define DRIVER_DATE "20050125"
+#define DRIVER_DATE "20050208"
#define DRIVER_MAJOR 1
-#define DRIVER_MINOR 14
+#define DRIVER_MINOR 15
#define DRIVER_PATCHLEVEL 0
/* Interface history:
@@ -86,6 +86,8 @@
* - Add hyperz support, add hyperz flags to clear ioctl.
* 1.14- Add support for color tiling
* - Add R100/R200 surface allocation/free support
+ * 1.15- Add support for texture micro tiling
+ * - Add support for r100 cube maps
*/
#define DRIVER_IOCTLS \
[DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \
Index: radeon_drm.h
===================================================================
RCS file: /cvs/dri/drm/shared/radeon_drm.h,v
retrieving revision 1.26
diff -u -r1.26 radeon_drm.h
--- radeon_drm.h 26 Jan 2005 17:48:59 -0000 1.26
+++ radeon_drm.h 8 Feb 2005 23:29:09 -0000
@@ -146,7 +146,13 @@
#define RADEON_EMIT_PP_TEX_SIZE_2 75
#define R200_EMIT_RB3D_BLENDCOLOR 76
#define R200_EMIT_TCL_POINT_SPRITE_CNTL 77
-#define RADEON_MAX_STATE_PACKETS 78
+#define RADEON_EMIT_PP_CUBIC_FACES_0 78
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T0 79
+#define RADEON_EMIT_PP_CUBIC_FACES_1 80
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T1 81
+#define RADEON_EMIT_PP_CUBIC_FACES_2 82
+#define RADEON_EMIT_PP_CUBIC_OFFSETS_T2 83
+#define RADEON_MAX_STATE_PACKETS 84
/* Commands understood by cmd_buffer ioctl. More can be added but
Index: radeon_drv.h
===================================================================
RCS file: /cvs/dri/drm/shared/radeon_drv.h,v
retrieving revision 1.40
diff -u -r1.40 radeon_drv.h
--- radeon_drv.h 26 Jan 2005 17:48:59 -0000 1.40
+++ radeon_drv.h 8 Feb 2005 23:29:10 -0000
@@ -765,6 +765,13 @@
#define RADEON_PP_TEX_SIZE_2 0x1d14
+#define RADEON_PP_CUBIC_FACES_0 0x1d24
+#define RADEON_PP_CUBIC_FACES_1 0x1d28
+#define RADEON_PP_CUBIC_FACES_2 0x1d2c
+#define RADEON_PP_CUBIC_OFFSET_T0_0 0x1dd0 /* bits [31:5] */
+#define RADEON_PP_CUBIC_OFFSET_T1_0 0x1e00
+#define RADEON_PP_CUBIC_OFFSET_T2_0 0x1e14
+
#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001
#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000
#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012
Index: radeon_state.c
===================================================================
RCS file: /cvs/dri/drm/shared/radeon_state.c,v
retrieving revision 1.43
diff -u -r1.43 radeon_state.c
--- radeon_state.c 7 Feb 2005 21:11:59 -0000 1.43
+++ radeon_state.c 8 Feb 2005 23:29:12 -0000
@@ -145,6 +145,22 @@
break;
}
+ case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
+ case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
+ case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
+ int i;
+ for (i = 0; i < 5; i++) {
+ if (radeon_check_and_fixup_offset(dev_priv,
+ filp_priv,
+ &data[i])) {
+ DRM_ERROR
+ ("Invalid R100 cubic texture
offset\n");
+ return DRM_ERR(EINVAL);
+ }
+ }
+ }
+ break;
+
case RADEON_EMIT_RB3D_COLORPITCH:
case RADEON_EMIT_RE_LINE_PATTERN:
case RADEON_EMIT_SE_LINE_WIDTH:
@@ -206,6 +222,9 @@
case RADEON_EMIT_PP_TEX_SIZE_2:
case R200_EMIT_RB3D_BLENDCOLOR:
case R200_EMIT_TCL_POINT_SPRITE_CNTL:
+ case RADEON_EMIT_PP_CUBIC_FACES_0:
+ case RADEON_EMIT_PP_CUBIC_FACES_1:
+ case RADEON_EMIT_PP_CUBIC_FACES_2:
/* These packets don't contain memory offsets */
break;
@@ -1513,6 +1532,7 @@
int size, dwords, tex_width, blit_width;
u32 height;
int i;
+ u32 texpitch, microtile;
RING_LOCALS;
DRM_GET_PRIV_WITH_RETURN( filp_priv, filp );
@@ -1575,6 +1595,16 @@
DRM_ERROR( "invalid texture format %d\n", tex->format );
return DRM_ERR(EINVAL);
}
+ texpitch = tex->pitch;
+ if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
+ microtile = 1;
+ if (tex_width < 64) {
+ texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
+ /* we got tiled coordinates, untile them */
+ image->x *= 2;
+ }
+ }
+ else microtile = 0;
DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
@@ -1627,7 +1657,7 @@
RADEON_GMC_CLR_CMP_CNTL_DIS |
RADEON_GMC_WR_MSK_DIS);
- buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+ buffer[2] = (texpitch << 22) | (tex->offset >> 10);
buffer[3] = 0xffffffff;
buffer[4] = 0xffffffff;
buffer[5] = (image->y << 16) | image->x;
@@ -1635,30 +1665,109 @@
buffer[7] = dwords;
buffer += 8;
- if ( tex_width >= 32 ) {
- /* Texture image width is larger than the minimum, so we
- * can upload it directly.
- */
- if ( DRM_COPY_FROM_USER( buffer, data,
- dwords * sizeof(u32) ) ) {
- DRM_ERROR( "EFAULT on data, %d dwords\n",
- dwords );
- return DRM_ERR(EFAULT);
+ if (microtile) {
+ /* texture micro tiling in use, minimum texture width
is thus 16 bytes.
+ however, we cannot use blitter directly for texture
width < 64 bytes,
+ since minimum tex pitch is 64 bytes and we need this
to match
+ the texture width, otherwise the blitter will tile
it wrong.
+ Thus, tiling manually in this case. Additionally,
need to special
+ case tex height = 1, since our actual image will
have height 2
+ and we need to ensure we don't read beyond the
texture size
+ from user space. */
+ if (tex->height == 1) {
+ if (tex_width >= 64 || tex_width <= 16) {
+ if (DRM_COPY_FROM_USER(buffer, data,
+ tex_width *
sizeof(u32))) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ } else if (tex_width == 32) {
+ if (DRM_COPY_FROM_USER(buffer, data,
16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ if (DRM_COPY_FROM_USER(buffer + 8, data
+ 16, 16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ }
+ } else if (tex_width >= 64 || tex_width == 16) {
+ if (DRM_COPY_FROM_USER(buffer, data,
+ dwords * sizeof(u32))) {
+ DRM_ERROR("EFAULT on data, %d dwords\n",
+ dwords);
+ return DRM_ERR(EFAULT);
+ }
+ } else if (tex_width < 16) {
+ for (i = 0; i < tex->height; i++) {
+ if (DRM_COPY_FROM_USER(buffer, data,
tex_width)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ buffer += 4;
+ data += tex_width;
+ }
+ } else if (tex_width == 32) {
+ /* TODO: make sure this works when not fitting in one
buffer
+ (i.e. 32bytes x 2048...) */
+ for (i = 0; i < tex->height; i += 2) {
+ if (DRM_COPY_FROM_USER(buffer, data,
16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ data += 16;
+ if (DRM_COPY_FROM_USER(buffer + 8,
data, 16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ data += 16;
+ if (DRM_COPY_FROM_USER(buffer + 4,
data, 16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ data += 16;
+ if (DRM_COPY_FROM_USER(buffer + 12,
data, 16)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ data += 16;
+ buffer += 16;
+ }
}
- } else {
- /* Texture image width is less than the minimum, so we
- * need to pad out each image scanline to the minimum
- * width.
- */
- for ( i = 0 ; i < tex->height ; i++ ) {
- if ( DRM_COPY_FROM_USER( buffer, data,
- tex_width ) ) {
- DRM_ERROR( "EFAULT on pad, %d bytes\n",
- tex_width );
+ }
+ else {
+ if (tex_width >= 32) {
+ /* Texture image width is larger than the
minimum, so we
+ * can upload it directly.
+ */
+ if (DRM_COPY_FROM_USER(buffer, data,
+ dwords * sizeof(u32))) {
+ DRM_ERROR("EFAULT on data, %d dwords\n",
+ dwords);
return DRM_ERR(EFAULT);
}
- buffer += 8;
- data += tex_width;
+ } else {
+ /* Texture image width is less than the
minimum, so we
+ * need to pad out each image scanline to the
minimum
+ * width.
+ */
+ for (i = 0; i < tex->height; i++) {
+ if (DRM_COPY_FROM_USER(buffer, data,
tex_width)) {
+ DRM_ERROR("EFAULT on pad, %d
bytes\n",
+ tex_width);
+ return DRM_ERR(EFAULT);
+ }
+ buffer += 8;
+ data += tex_width;
+ }
}
}