On Mon, 27 Jun 2005 01:57:56 +0200
Roland Scheidegger <[EMAIL PROTECTED]> wrote:

> Ben Skeggs wrote:
> > S3TC does seem to be the killer for UT2004.  I started porting over the
> > S3TC stuff from the r200 driver a while
> > back, but haven't had a lot of time recently to fix a couple of issues
> > with it.  Overall fps doesn't seem to take a
> > huge gain, but the sudden drops to 1-2fps in certain levels
> > (CTF-Faceclassic) disappear when S3TC's enabled.
> That's true, but to avoid the huge drops you could also just decrease 
> texture detail. Or implement the second texture heap in main memory and 
> use gart texturing (though you'd also need to manually increase the gart 
> size). There are some problems with that for r200, and the strategy for 
> what textures to put where may not be optimal currently, but the drops 
> should be gone.
> That said, the performance in ut2k4 is probably really slow (apart from 
> that problem) due to deficiencies in drawArrays handling, at least that 
> was the case for r200 last time I checked...

First "hack" attempts to improve it.

Later two patches workaround RADEON_BUFFER_SIZE limit.
While this actually appears to work theres no speed boost in general.

-- 
Aapo Tahkola
Index: t_array_api.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/tnl/t_array_api.c,v
retrieving revision 1.52
diff -u -b -B -u -r1.52 t_array_api.c
--- t_array_api.c       18 Jul 2005 12:31:30 -0000      1.52
+++ t_array_api.c       27 Jul 2005 20:28:16 -0000
@@ -78,21 +78,20 @@
 }
 
 
-/* Note this function no longer takes a 'start' value, the range is
- * assumed to start at zero.  The old trick of subtracting 'start'
- * from each index won't work if the indices are not in writeable
- * memory.
- */
 static void _tnl_draw_range_elements( GLcontext *ctx, GLenum mode,
+                                     GLuint min_index,
                                      GLuint max_index,
                                      GLsizei index_count, GLuint *indices )
 
 {
    TNLcontext *tnl = TNL_CONTEXT(ctx);
    struct tnl_prim prim;
+   int i;
+   static int size=0;
+   static GLuint *ind=NULL;
    FLUSH_CURRENT( ctx, 0 );
    
-   _tnl_vb_bind_arrays( ctx, 0, max_index );
+   _tnl_vb_bind_arrays( ctx, min_index, max_index );
 
    tnl->vb.Primitive = &prim;
    tnl->vb.Primitive[0].mode = mode | PRIM_BEGIN | PRIM_END;
@@ -100,8 +99,15 @@
    tnl->vb.Primitive[0].count = index_count;
    tnl->vb.PrimitiveCount = 1;
 
-   tnl->vb.Elts = (GLuint *)indices;
+   if(index_count > size){
+          size = index_count;
+          free(ind);
+          ind = malloc(index_count * sizeof(GLuint));
+   }
+   for(i=0; i < index_count; i++)
+          ind[i] = indices[i] - min_index;
 
+   tnl->vb.Elts = ind;
    tnl->Driver.RunPipeline( ctx );
 }
 
@@ -297,20 +301,19 @@
        * at the whole locked range.
        */
 
-      if (start == 0 && ctx->Array.LockFirst == 0 && 
-         end < (ctx->Array.LockFirst + ctx->Array.LockCount))
-        _tnl_draw_range_elements( ctx, mode,
+      if (end-start+1 < (ctx->Array.LockFirst + ctx->Array.LockCount)){
+        _tnl_draw_range_elements( ctx, mode, start,
                                   ctx->Array.LockCount,
                                   count, ui_indices );
-      else {
+        } else {
         fallback_drawelements( ctx, mode, count, ui_indices );
       }
    }
-   else if (start == 0 && end < ctx->Const.MaxArrayLockSize) {
+   else if (end-start+1 < ctx->Const.MaxArrayLockSize) {
       /* The arrays aren't locked but we can still fit them inside a
        * single vertexbuffer.
        */
-      _tnl_draw_range_elements( ctx, mode, end + 1, count, ui_indices );
+      _tnl_draw_range_elements( ctx, mode, start, end + 1, count, ui_indices );
    }
    else {
       /* Range is too big to optimize:
@@ -352,7 +355,7 @@
 
    if (ctx->Array.LockCount) {
       if (ctx->Array.LockFirst == 0)
-        _tnl_draw_range_elements( ctx, mode,
+        _tnl_draw_range_elements( ctx, mode, 0,
                                   ctx->Array.LockCount,
                                   count, ui_indices );
       else
@@ -361,16 +364,18 @@
    else {
       /* Scan the index list and see if we can use the locked path anyway.
        */
-      GLuint max_elt = 0;
+      GLuint max_elt = 0, min_elt = ~0;
       GLint i;
 
-      for (i = 0 ; i < count ; i++)
+      for (i = 0 ; i < count ; i++){
         if (ui_indices[i] > max_elt)
             max_elt = ui_indices[i];
-
-      if (max_elt < ctx->Const.MaxArrayLockSize && /* can we use it? */
-         max_elt < (GLuint) count)                /* do we want to use it? */
-        _tnl_draw_range_elements( ctx, mode, max_elt+1, count, ui_indices );
+        if (ui_indices[i] < min_elt)
+            min_elt = ui_indices[i];
+      } 
+      if (max_elt-min_elt+1 < ctx->Const.MaxArrayLockSize && /* can we use it? 
*/
+         max_elt-min_elt+1 < (GLuint) count)              /* do we want to use 
it? */
+        _tnl_draw_range_elements( ctx, mode, min_elt, max_elt+1, count, 
ui_indices );
       else
         fallback_drawelements( ctx, mode, count, ui_indices );
    }
Index: radeon_cp.c
===================================================================
RCS file: /cvsroot/r300/r300_driver/drm/shared-core/radeon_cp.c,v
retrieving revision 1.11
diff -u -b -B -u -r1.11 radeon_cp.c
--- radeon_cp.c 27 Jun 2005 19:46:30 -0000      1.11
+++ radeon_cp.c 27 Jul 2005 21:33:48 -0000
@@ -1977,6 +1977,7 @@
        return DRM_ERR(EBUSY);
 }
 
+#if 0
 static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev,
                                 drm_dma_t * d)
 {
@@ -2001,7 +2002,70 @@
        }
        return 0;
 }
+#else
+static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev,
+                                drm_dma_t * d)
+{
+       int i, start;
+       drm_buf_t *buf;
+       drm_buf_t *buffers[32];
+       drm_radeon_private_t *dev_priv = dev->dev_private;
+       drm_device_dma_t *dma = dev->dma;
+       drm_radeon_buf_priv_t *buf_priv;
+       
+       start = dev_priv->last_buf+1;
+       if (start >= dma->buf_count)
+               start = 0;
 
+       d->granted_count = 0;
+       
+       if(d->request_count > 32)
+               return DRM_ERR(EINVAL);
+
+       again:  
+       for(i=start; i < dma->buf_count; i++){
+               u32 done_age = GET_SCRATCH(1);
+               
+               buf = dma->buflist[i];
+               buf_priv = buf->dev_private;
+               if (buf->filp == 0 || (buf->pending &&
+                                      buf_priv->age <= done_age)) {
+                       buffers[d->granted_count] = buf;
+                       d->granted_count++;
+               }else d->granted_count = 0;
+               
+               if(d->granted_count == d->request_count)
+                       goto done;
+       }
+       if(start != 0){
+               d->granted_count = 0;
+               start = 0;
+               goto again;
+       }
+       done:
+       if(d->granted_count != d->request_count)
+               return DRM_ERR(EBUSY);  /* NOTE: broken client */
+       
+       dev_priv->last_buf += d->granted_count;
+       if (dev_priv->last_buf >= dma->buf_count)
+               dev_priv->last_buf = 0; /* XXX */
+                       
+       for(i=0; i < d->granted_count; i++){
+               buffers[i]->pending = 0;
+               buffers[i]->filp = filp;
+               
+               if (DRM_COPY_TO_USER(&d->request_indices[i], &buffers[i]->idx,
+                                    sizeof(buffers[i]->idx)))
+                       return DRM_ERR(EFAULT);
+               
+               if (DRM_COPY_TO_USER(&d->request_sizes[i], &buffers[i]->total,
+                                    sizeof(buffers[i]->total)))
+                       return DRM_ERR(EFAULT);
+       }
+       
+       return 0;
+}
+#endif
 int radeon_cp_buffers(DRM_IOCTL_ARGS)
 {
        DRM_DEVICE;

Attachment: dma_ext_r300.patch
Description: Binary data

Reply via email to