I started looking into the issue of how we handle various texture
formats on R300 on big-endian machines.  It became evident that
textures were getting byte-swapped on their way to the framebuffer.
Setting RADEON_HOST_DATA_SWAP_32BIT in RADEON_RBBM_GUICNTL doesn't
seem to have any effect on R300.

We can cope with the byte-swap for textures with 4 bytes/texel, but
not for textures with 2 or 1 byte/texel.  So instead of using a
HOSTDATA_BLT in radeon_cp_dispatch_texture, I changed it to use a
BITBLT_MULTI.  I still copy the texture into gart memory, but instead
of using an indirect buffer I just put the blit command into the ring
buffer.  This avoids the byte swap that the CP does and gets the
texture to the framebuffer without being byte-swapped.  It should be
just as fast this way as with the HOSTDATA_BLT.

The patch below implements this.  With this patch we also need a patch
to the r300 client driver code, which I will post shortly.

Paul.

diff -urN cvs/r300_driver/drm/shared-core/radeon_state.c 
r300_driver/drm/shared-core/radeon_state.c
--- cvs/r300_driver/drm/shared-core/radeon_state.c      2005-03-05 
09:26:06.000000000 +1100
+++ r300_driver/drm/shared-core/radeon_state.c  2005-03-09 10:32:45.000000000 
+1100
@@ -1471,7 +1471,7 @@
 
 }
 
-#define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32))
+#define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
 
 static int radeon_cp_dispatch_texture(DRMFILE filp,
                                      drm_device_t * dev,
@@ -1488,6 +1488,7 @@
        u32 height;
        int i;
        u32 texpitch, microtile;
+       u32 offset;
        RING_LOCALS;
 
        DRM_GET_PRIV_WITH_RETURN(filp_priv, filp);
@@ -1508,16 +1509,6 @@
        RADEON_WAIT_UNTIL_IDLE();
        ADVANCE_RING();
 
-#ifdef __BIG_ENDIAN
-       /* The Mesa texture functions provide the data in little endian as the
-        * chip wants it, but we need to compensate for the fact that the CP
-        * ring gets byte-swapped
-        */
-       BEGIN_RING(2);
-       OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT);
-       ADVANCE_RING();
-#endif
-
        /* The compiler won't optimize away a division by a variable,
         * even if the only legal values are powers of two.  Thus, we'll
         * use a shift instead.
@@ -1601,23 +1592,6 @@
                buffer =
                    (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
                dwords = size / 4;
-               buffer[0] = CP_PACKET3(RADEON_CNTL_HOSTDATA_BLT, dwords + 6);
-               buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-                            RADEON_GMC_BRUSH_NONE |
-                            (format << 8) |
-                            RADEON_GMC_SRC_DATATYPE_COLOR |
-                            RADEON_ROP3_S |
-                            RADEON_DP_SRC_SOURCE_HOST_DATA |
-                            RADEON_GMC_CLR_CMP_CNTL_DIS |
-                            RADEON_GMC_WR_MSK_DIS);
-
-               buffer[2] = (texpitch << 22) | (tex->offset >> 10);
-               buffer[3] = 0xffffffff;
-               buffer[4] = 0xffffffff;
-               buffer[5] = (image->y << 16) | image->x;
-               buffer[6] = (height << 16) | image->width;
-               buffer[7] = dwords;
-               buffer += 8;
 
                if (microtile) {
                        /* texture micro tiling in use, minimum texture width 
is thus 16 bytes.
@@ -1726,8 +1700,26 @@
                }
 
                buf->filp = filp;
-               buf->used = (dwords + 8) * sizeof(u32);
-               radeon_cp_dispatch_indirect(dev, buf, 0, buf->used);
+               buf->used = size;
+               offset = dev_priv->gart_buffers_offset + buf->offset;
+               BEGIN_RING(7);
+               OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
+               OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
+                        RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+                        RADEON_GMC_BRUSH_NONE |
+                        (format << 8) |
+                        RADEON_GMC_SRC_DATATYPE_COLOR |
+                        RADEON_ROP3_S |
+                        RADEON_DP_SRC_SOURCE_MEMORY |
+                        RADEON_GMC_CLR_CMP_CNTL_DIS |
+                        RADEON_GMC_WR_MSK_DIS );
+               OUT_RING((texpitch << 22) | (offset >> 10));
+               OUT_RING((texpitch << 22) | (tex->offset >> 10));
+               OUT_RING(0);
+               OUT_RING((image->x << 16) | image->y);
+               OUT_RING((image->width << 16) | height);
+               ADVANCE_RING();
+
                radeon_cp_discard_buffer(dev, buf);
 
                /* Update the input parameters for next time */


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
--
_______________________________________________
Dri-devel mailing list
Dri-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel

Reply via email to