Hi,

The attached patch adds readpixel acceleration to the r100 based on what's done for the r200.
This speeds-up readpix by a factor of 2 on x86, and much more on non-x86 systems which don't benefit from Ian's patch (like 8 times on itanium 2).


Stephane

diff -Naur radeon.old/Makefile radeon/Makefile
--- radeon.old/Makefile 2004-11-08 21:38:48.000000000 +0100
+++ radeon/Makefile     2004-11-03 17:07:59.000000000 +0100
@@ -22,6 +22,7 @@
        radeon_context.c \
        radeon_ioctl.c \
        radeon_lock.c \
+       radeon_pixel.c \
        radeon_screen.c \
        radeon_state.c \
        radeon_state_init.c \
diff -Naur radeon.old/radeon_context.c radeon/radeon_context.c
--- radeon.old/radeon_context.c 2004-11-08 21:38:48.000000000 +0100
+++ radeon/radeon_context.c     2004-11-08 21:00:47.000000000 +0100
@@ -343,7 +343,14 @@
    ctx->Const.MaxLineWidth = 10.0;
    ctx->Const.MaxLineWidthAA = 10.0;
    ctx->Const.LineWidthGranularity = 0.0625;
-
+   if (screen->cpp == 4)
+   {
+      ctx->Const.ColorReadFormat = GL_BGRA;
+      ctx->Const.ColorReadType = GL_UNSIGNED_INT_8_8_8_8_REV;
+   } else {
+      ctx->Const.ColorReadFormat = GL_BGR;
+      ctx->Const.ColorReadType = GL_UNSIGNED_SHORT_5_6_5_REV;
+   }
    /* Set maxlocksize (and hence vb size) small enough to avoid
     * fallbacks in radeon_tcl.c.  ie. guarentee that all vertices can
     * fit in a single dma buffer for indexed rendering of quad strips,
diff -Naur radeon.old/radeon_context.h radeon/radeon_context.h
--- radeon.old/radeon_context.h 2004-11-08 21:38:48.000000000 +0100
+++ radeon/radeon_context.h     2004-11-08 21:25:50.000000000 +0100
@@ -847,6 +847,7 @@
 #define DEBUG_DRI       0x200
 #define DEBUG_DMA       0x400
 #define DEBUG_SANITY    0x800
+#define DEBUG_PIXEL     0x2000
 
 #endif
 #endif /* __RADEON_CONTEXT_H__ */
diff -Naur radeon.old/radeon_ioctl.c radeon/radeon_ioctl.c
--- radeon.old/radeon_ioctl.c   2004-11-08 21:38:48.000000000 +0100
+++ radeon/radeon_ioctl.c       2004-11-08 21:01:33.000000000 +0100
@@ -59,7 +59,7 @@
 
 
 static void radeonWaitForIdle( radeonContextPtr rmesa );
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
                                    const char * caller );
 
 static void radeonSaveHwState( radeonContextPtr rmesa )
@@ -518,7 +518,7 @@
 }
 
 
-static int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
                                    const char * caller )
 {
    int ret, i;
@@ -752,6 +752,8 @@
    rmesa->dma.current.ptr += bytes; /* bug - if alignment > 7 */
    rmesa->dma.current.start = 
       rmesa->dma.current.ptr = (rmesa->dma.current.ptr + 0x7) & ~0x7;  
+   
+   assert( rmesa->dma.current.ptr <= rmesa->dma.current.end );
 }
 
 void radeonAllocDmaRegionVerts( radeonContextPtr rmesa, 
@@ -1233,6 +1235,30 @@
       radeonWaitForIdle( rmesa );
 }
 
+GLboolean radeonIsGartMemory( radeonContextPtr rmesa, const GLvoid *pointer,
+                          GLint size )
+{
+   ptrdiff_t offset = (char *)pointer - (char 
*)rmesa->radeonScreen->gartTextures.map;
+   int valid = (size >= 0 &&
+               offset >= 0 &&
+               offset + size < rmesa->radeonScreen->gartTextures.size);
+   
+   if (RADEON_DEBUG & DEBUG_IOCTL)
+      fprintf(stderr, "radeonIsGartMemory( %p ) : %d\n", pointer, valid );
+   
+   return valid;
+}
+
+GLuint radeonGartOffsetFromVirtual( radeonContextPtr rmesa, const GLvoid 
*pointer )
+{
+   ptrdiff_t offset = (char *)pointer - (char 
*)rmesa->radeonScreen->gartTextures.map;
+
+   if (offset < 0 || offset > rmesa->radeonScreen->gartTextures.size)
+      return ~0;
+   else
+      return rmesa->radeonScreen->gart_texture_offset + offset;
+}
+
 
 void radeonInitIoctlFuncs( GLcontext *ctx )
 {
diff -Naur radeon.old/radeon_ioctl.h radeon/radeon_ioctl.h
--- radeon.old/radeon_ioctl.h   2004-11-08 21:38:48.000000000 +0100
+++ radeon/radeon_ioctl.h       2004-11-03 17:04:48.000000000 +0100
@@ -77,6 +77,8 @@
 
 extern void radeonEmitWait( radeonContextPtr rmesa, GLuint flags );
 
+extern int radeonFlushCmdBufLocked( radeonContextPtr rmesa, 
+                                   const char * caller );
 extern void radeonFlushCmdBuf( radeonContextPtr rmesa, const char * );
 extern void radeonRefillCurrentDmaRegion( radeonContextPtr rmesa );
 
@@ -99,6 +101,12 @@
 extern void radeonPageFlip( const __DRIdrawablePrivate *drawable );
 extern void radeonFlush( GLcontext *ctx );
 extern void radeonFinish( GLcontext *ctx );
+extern GLboolean radeonIsGartMemory( radeonContextPtr rmesa, 
+                               const GLvoid *pointer,
+                               GLint size );
+extern GLuint radeonGartOffsetFromVirtual( radeonContextPtr rmesa, 
+                               const GLvoid *pointer );
+ 
 extern void radeonWaitForIdleLocked( radeonContextPtr rmesa );
 extern void radeonWaitForVBlank( radeonContextPtr rmesa );
 extern void radeonInitIoctlFuncs( GLcontext *ctx );
diff -Naur radeon.old/radeon_pixel.c radeon/radeon_pixel.c
--- radeon.old/radeon_pixel.c   1970-01-01 01:00:00.000000000 +0100
+++ radeon/radeon_pixel.c       2004-11-08 21:21:42.000000000 +0100
@@ -0,0 +1,313 @@
+#include "glheader.h"
+#include "enums.h"
+#include "mtypes.h"
+#include "macros.h"
+#include "swrast/swrast.h"
+
+#include "radeon_context.h"
+#include "radeon_ioctl.h"
+#include "radeon_pixel.h"
+#include "radeon_swtcl.h"
+
+
+static GLboolean
+check_color( const GLcontext *ctx, GLenum type, GLenum format,
+            const struct gl_pixelstore_attrib *packing,
+            const void *pixels, GLint sz )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint cpp = rmesa->radeonScreen->cpp;
+
+   if (RADEON_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (        ctx->_ImageTransferState ||
+       packing->SwapBytes ||
+       packing->LsbFirst) {
+      if (RADEON_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: failed 1\n", __FUNCTION__);
+      return GL_FALSE;
+   }
+
+   if ( type == GL_UNSIGNED_SHORT_5_6_5_REV && 
+       cpp == 2 && 
+       format == GL_BGR ) {
+      if (RADEON_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
+      return GL_TRUE;
+   }
+
+   if ( type == GL_UNSIGNED_INT_8_8_8_8_REV && 
+       cpp == 4 && 
+       format == GL_BGRA ) {
+      if (RADEON_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s: passed 2\n", __FUNCTION__);
+      return GL_TRUE;
+   }
+ 
+     fprintf(stderr, "type %d %d    cpp %d %d    format %d 
%d\n",type,GL_UNSIGNED_INT_8_8_8_8_REV,cpp,4,format,GL_BGRA);
+
+   if (RADEON_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s: failed\n", __FUNCTION__);
+
+   return GL_FALSE;
+}
+
+
+static GLboolean
+clip_pixelrect( const GLcontext *ctx,
+               const GLframebuffer *buffer,
+               GLint *x, GLint *y,
+               GLsizei *width, GLsizei *height,
+               GLint *size )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+
+   /* left clipping */
+   if (*x < buffer->_Xmin) {
+      *width -= (buffer->_Xmin - *x);
+      *x = buffer->_Xmin;
+   }
+
+   /* right clipping */
+   if (*x + *width > buffer->_Xmax)
+      *width -= (*x + *width - buffer->_Xmax - 1);
+
+   if (*width <= 0)
+      return GL_FALSE;
+
+   /* bottom clipping */
+   if (*y < buffer->_Ymin) {
+      *height -= (buffer->_Ymin - *y);
+      *y = buffer->_Ymin;
+   }
+
+   /* top clipping */
+   if (*y + *height > buffer->_Ymax)
+      *height -= (*y + *height - buffer->_Ymax - 1);
+
+   if (*height <= 0)
+      return GL_FALSE;
+
+   *size = ((*y + *height - 1) * rmesa->radeonScreen->frontPitch +
+           (*x + *width - 1) * rmesa->radeonScreen->cpp);
+
+   return GL_TRUE;
+}
+
+static void do_read_pix( GLcontext *ctx,
+                        GLint x, GLint y, GLsizei width, GLsizei height,
+                        GLint pitch,
+                        const void *pixels,
+                        int blit_format)
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;
+   int nbox = dPriv->numClipRects;
+   int src_offset = rmesa->state.color.drawOffset
+          + rmesa->radeonScreen->fbLocation;
+   int dst_offset;
+   drm_clip_rect_t *box = dPriv->pClipRects;
+   int i;
+   int src_pitch = rmesa->state.color.drawPitch * rmesa->radeonScreen->cpp;
+   int dst_pitch = pitch;
+   GLint mx1,mx2,my1,my2,mw,mh;
+ 
+   if (!nbox)
+          return;
+
+   y = dPriv->h - y - height;
+   x += dPriv->x;
+   y += dPriv->y;
+
+
+   if (RADEON_DEBUG & DEBUG_PIXEL)
+          fprintf(stderr, "readpixel blit src_pitch %d dst_pitch %d\n",
+                          src_pitch, dst_pitch);
+
+   mx1 = box[0].x1;
+   my1 = box[0].y1;
+   mx2 = box[0].x2;
+   my2 = box[0].y2;
+   /* merge the rects */
+   for (i = 1 ; i < nbox ; i++)
+   {
+          if (box[i].x1 < mx1) mx1 = box[i].x1;
+          if (box[i].y1 < my1) my1 = box[i].y1;
+          if (box[i].x2 > mx2) mx2 = box[i].x2;
+          if (box[i].y2 > my2) my2 = box[i].y2;
+   }
+   mw=mx2-mx1;
+   mh=my2-my1;
+
+   /* intersect with the area we are supposed to read */
+   if (mx1 < x) mw -= x - mx1, mx1 = x;
+   if (my1 < y) mh -= y - my1, my1 = y;
+   if (mx1 + mw > x + width) mw = x + width - mx1;
+   if (my1 + mh > y + height) mh = y + height - my1;
+   if (mw <= 0) return;
+   if (mh <= 0) return;
+   mx2=mx1+mw;
+   my2=my1+mh;
+
+   dst_offset = radeonGartOffsetFromVirtual( rmesa, pixels );
+   radeonEmitWait( rmesa, RADEON_WAIT_3D ); 
+
+   radeonEmitBlit( rmesa,
+          blit_format,
+          src_pitch, src_offset,
+          dst_pitch, dst_offset,
+          mx1, my1,
+          mx1 - x, my1 - y,
+          mx2 - mx1, my2 - my1 );
+
+   radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+}
+
+static GLboolean
+radeonTryReadPixels( GLcontext *ctx,
+                 GLint x, GLint y, GLsizei width, GLsizei height,
+                 GLenum format, GLenum type,
+                 const struct gl_pixelstore_attrib *pack,
+                 GLvoid *pixels )
+{
+   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
+   GLuint cpp = rmesa->radeonScreen->cpp;
+   GLint pitch = (pack->RowLength ? pack->RowLength : width) * cpp;
+   GLint size;
+   GLint blit_format;
+  
+   pitch= (pitch+63)&~63;
+
+   size=pitch * height;
+
+   if (RADEON_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   /* this is the threshold above which a blit is faster than video memory 
acccess by the CPU 
+    * tuning for this is approximate */
+   if (size<64)
+      return GL_FALSE;
+
+   if (!check_color(ctx, type, format, pack, pixels, size))
+      return GL_FALSE;
+
+   switch ( rmesa->radeonScreen->cpp ) {
+   case 2:
+      blit_format = RADEON_GMC_DST_16BPP;
+      break;
+   case 4:
+      blit_format = RADEON_GMC_DST_32BPP;
+      break;
+   default:
+      return GL_FALSE;
+   }
+
+  
+   /* Although the blits go on the command buffer, need to do this and
+    * fire with lock held to guarentee cliprects and drawOffset are
+    * correct.
+    *
+    * This is an unusual situation however, as the code which flushes
+    * a full command buffer expects to be called unlocked.  As a
+    * workaround, immediately flush the buffer on aquiring the lock.
+    */
+   LOCK_HARDWARE( rmesa );
+
+   if (rmesa->store.cmd_used)
+      radeonFlushCmdBufLocked( rmesa, __FUNCTION__ );
+
+   if (!clip_pixelrect(ctx, ctx->ReadBuffer, &x, &y, &width, &height,
+                      &size)) {
+      UNLOCK_HARDWARE( rmesa );
+      if (RADEON_DEBUG & DEBUG_PIXEL)
+        fprintf(stderr, "%s totally clipped -- nothing to do\n",
+                __FUNCTION__);
+      return GL_TRUE;
+   }
+
+   {
+      /* Pixels is in regular memory -- get dma buffers and perform
+       * upload through them.
+       */
+      
+      int ret;
+      int region_offset;
+      drm_radeon_mem_alloc_t alloc;
+      drm_radeon_mem_free_t memfree;
+      char* region;
+      int yl;
+
+      alloc.region = RADEON_MEM_REGION_GART;
+      alloc.alignment = 1024;
+      alloc.size = size;
+      alloc.region_offset = &region_offset;
+
+      ret = drmCommandWriteRead( rmesa->radeonScreen->driScreen->fd,
+                     DRM_RADEON_ALLOC,
+                     &alloc, sizeof(alloc));
+      if (ret) {
+             UNLOCK_HARDWARE( rmesa );
+             return GL_FALSE;
+      }
+      region =(void *)((char *)rmesa->radeonScreen->gartTextures.map + 
region_offset);
+      
+      do_read_pix( ctx, x, y, width, height, pitch, region, blit_format);
+      UNLOCK_HARDWARE( rmesa );
+      radeonFinish( ctx ); /* required for GL */
+
+      /* the thing is upside down on the card, swap it */
+      for (yl=0;yl<height;yl++)
+      {
+         memcpy(((char*)pixels)+yl*(pack->RowLength ? pack->RowLength : 
width)*cpp, 
+                        (void *)((char *)rmesa->radeonScreen->gartTextures.map 
+ region_offset + (height-1-yl)*pitch), 
+                        (pack->RowLength ? pack->RowLength : width)*cpp);
+      }              
+      
+      memfree.region = RADEON_MEM_REGION_GART;
+      memfree.region_offset = region_offset;
+
+      ret = drmCommandWrite( rmesa->radeonScreen->driScreen->fd,
+                     DRM_RADEON_FREE,
+                     &memfree, sizeof(memfree));
+      if (ret)
+             fprintf(stderr, "%s: DRM_RADEON_FREE ret %d\n", __FUNCTION__, 
ret);
+      return GL_TRUE;
+   }
+
+}
+
+static void
+radeonReadPixels( GLcontext *ctx,
+                GLint x, GLint y, GLsizei width, GLsizei height,
+                GLenum format, GLenum type,
+                const struct gl_pixelstore_attrib *pack,
+                GLvoid *pixels )
+{
+   if (RADEON_DEBUG & DEBUG_PIXEL)
+      fprintf(stderr, "%s\n", __FUNCTION__);
+
+   if (!radeonTryReadPixels( ctx, x, y, width, height, format, type, pack, 
+                          pixels))
+      _swrast_ReadPixels( ctx, x, y, width, height, format, type, pack, 
+                         pixels);
+}
+
+
+void radeonInitPixelFuncs( GLcontext *ctx )
+{
+   /* Pixel path fallbacks.
+    */
+   ctx->Driver.Accum = _swrast_Accum;
+   ctx->Driver.Bitmap = _swrast_Bitmap;
+   ctx->Driver.CopyPixels = _swrast_CopyPixels;
+   ctx->Driver.DrawPixels = _swrast_DrawPixels;
+   ctx->Driver.ReadPixels = _swrast_ReadPixels;
+
+   if (!getenv("RADEON_NO_BLITS") && RADEON_CONTEXT(ctx)->dri.drmMinor >= 6) {
+      ctx->Driver.ReadPixels = radeonReadPixels;  
+   }
+}
+
+
+
diff -Naur radeon.old/radeon_pixel.h radeon/radeon_pixel.h
--- radeon.old/radeon_pixel.h   1970-01-01 01:00:00.000000000 +0100
+++ radeon/radeon_pixel.h       2004-11-03 02:00:54.000000000 +0100
@@ -0,0 +1,40 @@
+/* $XFree86: xc/lib/GL/mesa/src/drv/r200/r200_pixel.h,v 1.1 2002/10/30 
12:51:52 alanh Exp $ */
+/*
+Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
+
+The Weather Channel (TM) funded Tungsten Graphics to develop the
+initial release of the Radeon 8500 driver under the XFree86 license.
+This notice must be preserved.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+/*
+ * Authors:
+ *   Keith Whitwell <[EMAIL PROTECTED]>
+ */
+
+#ifndef __RADEON_PIXEL_H__
+#define __RADEON_PIXEL_H__
+
+extern void radeonInitPixelFuncs( GLcontext *ctx );
+
+#endif
diff -Naur radeon.old/radeon_state.c radeon/radeon_state.c
--- radeon.old/radeon_state.c   2004-11-08 21:38:48.000000000 +0100
+++ radeon/radeon_state.c       2004-11-03 17:01:57.000000000 +0100
@@ -55,6 +55,7 @@
 #include "radeon_tex.h"
 #include "radeon_swtcl.h"
 #include "radeon_vtxfmt.h"
+#include "radeon_pixel.h"
 
 /* =============================================================
  * Alpha blending
@@ -2225,11 +2226,7 @@
 
    /* Pixel path fallbacks
     */
-   ctx->Driver.Accum                    = _swrast_Accum;
-   ctx->Driver.Bitmap                   = _swrast_Bitmap;
-   ctx->Driver.CopyPixels               = _swrast_CopyPixels;
-   ctx->Driver.DrawPixels               = _swrast_DrawPixels;
-   ctx->Driver.ReadPixels               = _swrast_ReadPixels;
+   radeonInitPixelFuncs(ctx);
 
    /* Swrast hooks for imaging extensions:
     */

Reply via email to