On Mon, 27 Jun 2005 01:57:56 +0200 Roland Scheidegger <[EMAIL PROTECTED]> wrote:
> Ben Skeggs wrote: > > S3TC does seem to be the killer for UT2004. I started porting over the > > S3TC stuff from the r200 driver a while > > back, but haven't had a lot of time recently to fix a couple of issues > > with it. Overall fps doesn't seem to take a > > huge gain, but the sudden drops to 1-2fps in certain levels > > (CTF-Faceclassic) disappear when S3TC's enabled. > That's true, but to avoid the huge drops you could also just decrease > texture detail. Or implement the second texture heap in main memory and > use gart texturing (though you'd also need to manually increase the gart > size). There are some problems with that for r200, and the strategy for > what textures to put where may not be optimal currently, but the drops > should be gone. > That said, the performance in ut2k4 is probably really slow (apart from > that problem) due to deficiencies in drawArrays handling, at least that > was the case for r200 last time I checked... First "hack" attempts to improve it. Later two patches workaround RADEON_BUFFER_SIZE limit. While this actually appears to work theres no speed boost in general. -- Aapo Tahkola
Index: t_array_api.c =================================================================== RCS file: /cvs/mesa/Mesa/src/mesa/tnl/t_array_api.c,v retrieving revision 1.52 diff -u -b -B -u -r1.52 t_array_api.c --- t_array_api.c 18 Jul 2005 12:31:30 -0000 1.52 +++ t_array_api.c 27 Jul 2005 20:28:16 -0000 @@ -78,21 +78,20 @@ } -/* Note this function no longer takes a 'start' value, the range is - * assumed to start at zero. The old trick of subtracting 'start' - * from each index won't work if the indices are not in writeable - * memory. - */ static void _tnl_draw_range_elements( GLcontext *ctx, GLenum mode, + GLuint min_index, GLuint max_index, GLsizei index_count, GLuint *indices ) { TNLcontext *tnl = TNL_CONTEXT(ctx); struct tnl_prim prim; + int i; + static int size=0; + static GLuint *ind=NULL; FLUSH_CURRENT( ctx, 0 ); - _tnl_vb_bind_arrays( ctx, 0, max_index ); + _tnl_vb_bind_arrays( ctx, min_index, max_index ); tnl->vb.Primitive = &prim; tnl->vb.Primitive[0].mode = mode | PRIM_BEGIN | PRIM_END; @@ -100,8 +99,15 @@ tnl->vb.Primitive[0].count = index_count; tnl->vb.PrimitiveCount = 1; - tnl->vb.Elts = (GLuint *)indices; + if(index_count > size){ + size = index_count; + free(ind); + ind = malloc(index_count * sizeof(GLuint)); + } + for(i=0; i < index_count; i++) + ind[i] = indices[i] - min_index; + tnl->vb.Elts = ind; tnl->Driver.RunPipeline( ctx ); } @@ -297,20 +301,19 @@ * at the whole locked range. */ - if (start == 0 && ctx->Array.LockFirst == 0 && - end < (ctx->Array.LockFirst + ctx->Array.LockCount)) - _tnl_draw_range_elements( ctx, mode, + if (end-start+1 < (ctx->Array.LockFirst + ctx->Array.LockCount)){ + _tnl_draw_range_elements( ctx, mode, start, ctx->Array.LockCount, count, ui_indices ); - else { + } else { fallback_drawelements( ctx, mode, count, ui_indices ); } } - else if (start == 0 && end < ctx->Const.MaxArrayLockSize) { + else if (end-start+1 < ctx->Const.MaxArrayLockSize) { /* The arrays aren't locked but we can still fit them inside a * single vertexbuffer. */ - _tnl_draw_range_elements( ctx, mode, end + 1, count, ui_indices ); + _tnl_draw_range_elements( ctx, mode, start, end + 1, count, ui_indices ); } else { /* Range is too big to optimize: @@ -352,7 +355,7 @@ if (ctx->Array.LockCount) { if (ctx->Array.LockFirst == 0) - _tnl_draw_range_elements( ctx, mode, + _tnl_draw_range_elements( ctx, mode, 0, ctx->Array.LockCount, count, ui_indices ); else @@ -361,16 +364,18 @@ else { /* Scan the index list and see if we can use the locked path anyway. */ - GLuint max_elt = 0; + GLuint max_elt = 0, min_elt = ~0; GLint i; - for (i = 0 ; i < count ; i++) + for (i = 0 ; i < count ; i++){ if (ui_indices[i] > max_elt) max_elt = ui_indices[i]; - - if (max_elt < ctx->Const.MaxArrayLockSize && /* can we use it? */ - max_elt < (GLuint) count) /* do we want to use it? */ - _tnl_draw_range_elements( ctx, mode, max_elt+1, count, ui_indices ); + if (ui_indices[i] < min_elt) + min_elt = ui_indices[i]; + } + if (max_elt-min_elt+1 < ctx->Const.MaxArrayLockSize && /* can we use it? */ + max_elt-min_elt+1 < (GLuint) count) /* do we want to use it? */ + _tnl_draw_range_elements( ctx, mode, min_elt, max_elt+1, count, ui_indices ); else fallback_drawelements( ctx, mode, count, ui_indices ); }
Index: radeon_cp.c =================================================================== RCS file: /cvsroot/r300/r300_driver/drm/shared-core/radeon_cp.c,v retrieving revision 1.11 diff -u -b -B -u -r1.11 radeon_cp.c --- radeon_cp.c 27 Jun 2005 19:46:30 -0000 1.11 +++ radeon_cp.c 27 Jul 2005 21:33:48 -0000 @@ -1977,6 +1977,7 @@ return DRM_ERR(EBUSY); } +#if 0 static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev, drm_dma_t * d) { @@ -2001,7 +2002,70 @@ } return 0; } +#else +static int radeon_cp_get_buffers(DRMFILE filp, drm_device_t * dev, + drm_dma_t * d) +{ + int i, start; + drm_buf_t *buf; + drm_buf_t *buffers[32]; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_radeon_buf_priv_t *buf_priv; + + start = dev_priv->last_buf+1; + if (start >= dma->buf_count) + start = 0; + d->granted_count = 0; + + if(d->request_count > 32) + return DRM_ERR(EINVAL); + + again: + for(i=start; i < dma->buf_count; i++){ + u32 done_age = GET_SCRATCH(1); + + buf = dma->buflist[i]; + buf_priv = buf->dev_private; + if (buf->filp == 0 || (buf->pending && + buf_priv->age <= done_age)) { + buffers[d->granted_count] = buf; + d->granted_count++; + }else d->granted_count = 0; + + if(d->granted_count == d->request_count) + goto done; + } + if(start != 0){ + d->granted_count = 0; + start = 0; + goto again; + } + done: + if(d->granted_count != d->request_count) + return DRM_ERR(EBUSY); /* NOTE: broken client */ + + dev_priv->last_buf += d->granted_count; + if (dev_priv->last_buf >= dma->buf_count) + dev_priv->last_buf = 0; /* XXX */ + + for(i=0; i < d->granted_count; i++){ + buffers[i]->pending = 0; + buffers[i]->filp = filp; + + if (DRM_COPY_TO_USER(&d->request_indices[i], &buffers[i]->idx, + sizeof(buffers[i]->idx))) + return DRM_ERR(EFAULT); + + if (DRM_COPY_TO_USER(&d->request_sizes[i], &buffers[i]->total, + sizeof(buffers[i]->total))) + return DRM_ERR(EFAULT); + } + + return 0; +} +#endif int radeon_cp_buffers(DRM_IOCTL_ARGS) { DRM_DEVICE;
dma_ext_r300.patch
Description: Binary data