The attached patch removes the mandatory emits of all state which were
happening after each cmdbuf flush.  Instead, we set a flag after a
cmdbuf flush saying "save the state at the next unlock," which means
memcpying the state atoms off.  When we actually see the context get
lost, then we "back up" and restore state -- make a new cmdbuf, dirty
all state, emit it, flush it, then put the old cmdbuf back.  I also
removed the dirty/clean state lists and made a single one.  The
reasoning was that we have to walk the entire list on emit (and twice
when the all-dirty is set) anyway, and I felt that this was cleaner.  It
also fixed some bad cmdbufs that were happening for me (drmCommandWrite:
-22) with the CVS code.

This gets about a 5% speedup for me in ipers (which I wish was more
accurate in its reporting), and doesn't touch glxgears.  I didn't have
any interesting apps besides glxgears handy to benchmark with.  Any
thoughts on this?  If people think it's a good idea, I'll do it for
radeon as well.

-- 
Eric Anholt                                [EMAIL PROTECTED]          
http://people.freebsd.org/~anholt/         [EMAIL PROTECTED]

Index: r200_cmdbuf.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_cmdbuf.c,v
retrieving revision 1.7
diff -u -r1.7 r200_cmdbuf.c
--- r200_cmdbuf.c	17 Aug 2004 01:41:32 -0000	1.7
+++ r200_cmdbuf.c	19 Sep 2004 08:59:01 -0000
@@ -58,113 +58,105 @@
 
 }
 
-static void r200_emit_state_list( r200ContextPtr rmesa, 
-				    struct r200_state_atom *list )
+/* The state atoms will be emitted in the order they appear in the atom list,
+ * so this step is important.
+ */
+void r200SetUpAtomList( r200ContextPtr rmesa )
 {
-   struct r200_state_atom *state, *tmp;
-   char *dest;
-   int i, size, mtu;
-
-   size = 0;
-   foreach_s( state, tmp, list ) {
-      if (state->check( rmesa->glCtx, state->idx )) {
-/*	 dest = r200AllocCmdBuf( rmesa, state->cmd_size * 4, __FUNCTION__);
-	 memcpy( dest, state->cmd, state->cmd_size * 4);*/
-         size += state->cmd_size;
-         state->dirty = GL_TRUE;
-	 move_to_head( &(rmesa->hw.clean), state );
-	 if (R200_DEBUG & DEBUG_STATE) 
-	    print_state_atom( state );
-      }
-      else if (R200_DEBUG & DEBUG_STATE)
-	 fprintf(stderr, "skip state %s\n", state->name);
-   }
-
-   if (!size)
-      return;
+   int i, mtu;
 
-   dest = r200AllocCmdBuf( rmesa, size * 4, __FUNCTION__);
    mtu = rmesa->glCtx->Const.MaxTextureUnits;
 
-#define EMIT_ATOM(ATOM) \
-do { \
-   if (rmesa->hw.ATOM.dirty) { \
-      rmesa->hw.ATOM.dirty = GL_FALSE; \
-      memcpy( dest, rmesa->hw.ATOM.cmd, rmesa->hw.ATOM.cmd_size * 4); \
-      dest += rmesa->hw.ATOM.cmd_size * 4; \
-   } \
-} while (0)
-
-   EMIT_ATOM (ctx);
-   EMIT_ATOM (set);
-   EMIT_ATOM (lin);
-   EMIT_ATOM (msk);
-   EMIT_ATOM (vpt);
-   EMIT_ATOM (vtx);
-   EMIT_ATOM (vap);
-   EMIT_ATOM (vte);
-   EMIT_ATOM (msc);
-   EMIT_ATOM (cst);
-   EMIT_ATOM (zbs);
-   EMIT_ATOM (tcl);
-   EMIT_ATOM (msl);
-   EMIT_ATOM (tcg);
-   EMIT_ATOM (grd);
-   EMIT_ATOM (fog);
-   EMIT_ATOM (tam);
-   EMIT_ATOM (tf);
-   for (i = 0; i < mtu; ++i) {
-       EMIT_ATOM (tex[i]);
-   }
-   for (i = 0; i < mtu; ++i) {
-       EMIT_ATOM (cube[i]);
-   }
+   make_empty_list(&rmesa->hw.atomlist);
+   rmesa->hw.atomlist.name = "atom-list";
+
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ctx );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.set );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lin );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msk );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vpt );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vtx );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vap );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.vte );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msc );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cst );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.zbs );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcl );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.msl );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tcg );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.grd );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.fog );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tam );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tf );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.tex[i] );
+   for (i = 0; i < mtu; ++i)
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.cube[i] );
    for (i = 0; i < 3 + mtu; ++i)
-       EMIT_ATOM (mat[i]);
-   EMIT_ATOM (eye);
-   EMIT_ATOM (glt);
-   for (i = 0; i < 2; ++i) {
-      EMIT_ATOM (mtl[i]);
-   }
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mat[i] );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.eye );
+   insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.glt );
+   for (i = 0; i < 2; ++i)
+      insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.mtl[i] );
    for (i = 0; i < 8; ++i)
-       EMIT_ATOM (lit[i]);
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.lit[i] );
    for (i = 0; i < 6; ++i)
-       EMIT_ATOM (ucp[i]);
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.ucp[i] );
    for (i = 0; i < 6; ++i)
-       EMIT_ATOM (pix[i]);
-
-#undef EMIT_ATOM
-
+       insert_at_tail( &rmesa->hw.atomlist, &rmesa->hw.pix[i] );
 }
 
-
 void r200EmitState( r200ContextPtr rmesa )
 {
-   struct r200_state_atom *state, *tmp;
+   char *dest;
+   int i, mtu;
+   struct r200_state_atom *atom;
 
    if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS))
       fprintf(stderr, "%s\n", __FUNCTION__);
 
-   /* Somewhat overkill:
-    */
-   if ( rmesa->lost_context) {
-      if (R200_DEBUG & (DEBUG_STATE|DEBUG_PRIMS|DEBUG_IOCTL))
-	 fprintf(stderr, "%s - lost context\n", __FUNCTION__); 
+   if (!rmesa->hw.is_dirty && !rmesa->hw.all_dirty)
+      return;
+
+   mtu = rmesa->glCtx->Const.MaxTextureUnits;
 
-      foreach_s( state, tmp, &(rmesa->hw.clean) ) 
-	 move_to_tail(&(rmesa->hw.dirty), state );
+   /* To avoid going across the entire set of states multiple times, just check
+    * for enough space for the case of emitting all state, and inline the
+    * r200AllocCmdBuf code here without all the checks.
+    */
+   dest = rmesa->store.cmd_buf + rmesa->store.cmd_used;
+   r200EnsureCmdBufSpace( rmesa, rmesa->hw.max_state_size );
 
-      rmesa->lost_context = 0;
+   if (R200_DEBUG & DEBUG_STATE) {
+      foreach( atom, &rmesa->hw.atomlist ) {
+	 if ( atom->dirty || rmesa->hw.all_dirty ) {
+	    if ( atom->check( rmesa->glCtx, atom->idx ) )
+	       print_state_atom( atom );
+	    else
+	       fprintf(stderr, "skip state %s\n", atom->name);
+	 }
+      }
    }
-/*   else {
-      move_to_tail( &rmesa->hw.dirty, &rmesa->hw.mtl[0] );*/
-      /* odd bug? -- isosurf, cycle between reflect & lit */
-/*   }*/
 
-   r200_emit_state_list( rmesa, &rmesa->hw.dirty );
-}
+   foreach( atom, &rmesa->hw.atomlist ) {
+      if ( rmesa->hw.all_dirty )
+	 atom->dirty = GL_TRUE;
+      if ( atom->dirty ) {
+	 if ( atom->check( rmesa->glCtx, atom->idx ) ) {
+	    int size = atom->cmd_size * 4;
+	    memcpy( dest, atom->cmd, size);
+	    dest += size;
+	    rmesa->store.cmd_used += size;
+	    atom->dirty = GL_FALSE;
+	 }
+      }
+   }
 
+   assert( rmesa->store.cmd_used <= R200_CMD_BUF_SZ );
 
+   rmesa->hw.is_dirty = GL_FALSE;
+   rmesa->hw.all_dirty = GL_FALSE;
+}
 
 /* Fire a section of the retained (indexed_verts) buffer as a regular
  * primtive.  
Index: r200_context.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_context.c,v
retrieving revision 1.28
diff -u -r1.28 r200_context.c
--- r200_context.c	8 Sep 2004 08:51:12 -0000	1.28
+++ r200_context.c	18 Sep 2004 21:55:24 -0000
@@ -323,7 +323,7 @@
 	 DRI_CONF_TEXTURE_DEPTH_32 : DRI_CONF_TEXTURE_DEPTH_16;
 
    rmesa->swtcl.RenderIndex = ~0;
-   rmesa->lost_context = 1;
+   rmesa->hw.all_dirty = 1;
 
    /* Set the maximum texture size small enough that we can guarentee that
     * all texture units can bind a maximal texture and have them both in
Index: r200_context.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_context.h,v
retrieving revision 1.20
diff -u -r1.20 r200_context.h
--- r200_context.h	17 Aug 2004 01:41:32 -0000	1.20
+++ r200_context.h	19 Sep 2004 02:46:27 -0000
@@ -191,6 +191,7 @@
    GLuint idx;
    int *cmd;			         /* one or more cmd's */
    int *lastcmd;			 /* one or more cmd's */
+   int *savedcmd;			 /* one or more cmd's */
    GLboolean dirty;
    GLboolean (*check)( GLcontext *, int );    /* is this state active? */
 };
@@ -491,10 +492,8 @@
 
 
 struct r200_hw_state {
-   /* All state should be on one of these lists:
-    */
-   struct r200_state_atom dirty; /* dirty list head placeholder */
-   struct r200_state_atom clean; /* clean list head placeholder */
+   /* Head of the linked list of state atoms. */
+   struct r200_state_atom atomlist;
 
    /* Hardware state, stored as cmdbuf commands:  
     *   -- Need to doublebuffer for
@@ -530,6 +529,7 @@
    struct r200_state_atom glt; 
 
    int max_state_size;	/* Number of bytes necessary for a full state emit. */
+   GLboolean is_dirty, all_dirty;
 };
 
 struct r200_state {
@@ -876,6 +876,7 @@
    drm_clip_rect_t *pClipRects;
    unsigned int lastStamp;
    GLboolean lost_context;
+   GLboolean save_on_next_unlock;
    r200ScreenPtr r200Screen;	/* Screen private DRI data */
    drm_radeon_sarea_t *sarea;		/* Private SAREA data */
 
Index: r200_ioctl.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_ioctl.c,v
retrieving revision 1.17
diff -u -r1.17 r200_ioctl.c
--- r200_ioctl.c	17 Aug 2004 20:10:29 -0000	1.17
+++ r200_ioctl.c	19 Sep 2004 02:02:04 -0000
@@ -58,12 +58,71 @@
 
 static void r200WaitForIdle( r200ContextPtr rmesa );
 
+void r200SaveHwState( r200ContextPtr rmesa )
+{
+   struct r200_state_atom *atom;
+
+   foreach( atom, &rmesa->hw.atomlist )
+      memcpy(atom->savedcmd, atom->cmd, atom->cmd_size * 4);
+}
+
+static void r200SwapHwState( r200ContextPtr rmesa )
+{
+   int *temp;
+   struct r200_state_atom *atom;
+
+   foreach( atom, &rmesa->hw.atomlist ) {
+      temp = atom->cmd;
+      atom->cmd = atom->savedcmd;
+      atom->savedcmd = temp;
+   }
+}
+
+/* At this point we were in FlushCmdBufLocked but we had lost our context, so
+ * we need to unwire our current cmdbuf and hook a new one in, emit that, then
+ * wire the old cmdbuf back in so that FlushCmdBufLocked can continue and the
+ * buffer can depend on the state not being lost across lock/unlock.
+ */
+static void r200BackUpAndEmitLostStateLocked( r200ContextPtr rmesa )
+{
+   GLuint nr_released_bufs;
+   struct r200_store store;
+   struct r200_hw_state temp_state;
+   static int count = 0;
+
+   rmesa->lost_context = GL_FALSE;
+
+   nr_released_bufs = rmesa->dma.nr_released_bufs;
+   store = rmesa->store;
+   rmesa->store.statenr = 0;
+   rmesa->store.primnr = 0;
+   rmesa->store.cmd_used = 0;
+   rmesa->store.elts_start = 0;
+   rmesa->hw.all_dirty = GL_TRUE;
+   r200SwapHwState( rmesa );
+   /* In this case it's okay to EmitState while locked because we won't exhaust
+    * our (empty) cmdbuf.
+    */
+   r200EmitState( rmesa );
+   r200FlushCmdBufLocked( rmesa, __FUNCTION__ );
+
+   r200SwapHwState( rmesa );
+   /* We've just cleared out the dirty flags, so we don't remember what 
+    * actually needed to be emitted for the next state emit.
+    */
+   rmesa->hw.all_dirty = GL_TRUE;
+   rmesa->dma.nr_released_bufs = nr_released_bufs;
+   rmesa->store = store;
+}
 
 int r200FlushCmdBufLocked( r200ContextPtr rmesa, const char * caller )
 {
    int ret, i;
    drm_radeon_cmd_buffer_t cmd;
 
+   if (rmesa->lost_context)
+	r200BackUpAndEmitLostStateLocked( rmesa );
+
    if (R200_DEBUG & DEBUG_IOCTL) {
       fprintf(stderr, "%s from %s\n", __FUNCTION__, caller); 
 
@@ -132,18 +191,7 @@
    rmesa->store.statenr = 0;
    rmesa->store.cmd_used = 0;
    rmesa->dma.nr_released_bufs = 0;
-   /* Set lost_context so that the first state emit on the new buffer is a full
-    * one.  This is because the context might get lost while preparing the next
-    * buffer, and when we lock and find out, we don't have the information to
-    * recreate the state.  This function should always be called before the new
-    * buffer is begun, so it's sufficient to just set lost_context here.
-    *
-    * The alternative to this would be to copy out the state on unlock
-    * (approximately) and if we did lose the context, dispatch a cmdbuf to reset
-    * the state to that old copy before continuing with the accumulated command
-    * buffer.
-    */
-   rmesa->lost_context = 1;
+   rmesa->save_on_next_unlock = 1;
 
    return ret;
 }
@@ -464,7 +512,7 @@
    }
 
    UNLOCK_HARDWARE( rmesa );
-   rmesa->lost_context = 1;
+   rmesa->hw.all_dirty = GL_TRUE;
 
    rmesa->swap_count++;
    (*rmesa->get_ust)( & ust );
@@ -613,13 +661,6 @@
    cx += dPriv->x;
    cy  = dPriv->y + dPriv->h - cy - ch;
 
-   /* We have to emit state along with the clear, since the kernel relies on
-    * some of it.  The EmitState that was above R200_FIREVERTICES was an
-    * attempt to do that, except that another context may come in and cause us
-    * to lose our context while we're unlocked.
-    */
-   r200EmitState( rmesa );
-
    LOCK_HARDWARE( rmesa );
 
    /* Throttle the number of clear ioctls we do.
@@ -722,7 +763,7 @@
    }
 
    UNLOCK_HARDWARE( rmesa );
-   rmesa->lost_context = 1;
+   rmesa->hw.all_dirty = GL_TRUE;
 }
 
 
@@ -763,8 +804,7 @@
    if (rmesa->dma.flush)
       rmesa->dma.flush( rmesa );
 
-   if (!is_empty_list(&rmesa->hw.dirty)) 
-      r200EmitState( rmesa );
+   r200EmitState( rmesa );
    
    if (rmesa->store.cmd_used)
       r200FlushCmdBuf( rmesa, __FUNCTION__ );
Index: r200_ioctl.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_ioctl.h,v
retrieving revision 1.10
diff -u -r1.10 r200_ioctl.h
--- r200_ioctl.h	17 Aug 2004 01:41:32 -0000	1.10
+++ r200_ioctl.h	18 Sep 2004 21:55:24 -0000
@@ -117,6 +117,9 @@
 extern GLuint r200GartOffsetFromVirtual( r200ContextPtr rmesa, 
 					 const GLvoid *pointer );
 
+void r200SaveHwState( r200ContextPtr rmesa );
+void r200SetUpAtomList( r200ContextPtr rmesa );
+
 /* ================================================================
  * Helper macros:
  */
@@ -135,7 +138,8 @@
 #define R200_STATECHANGE( rmesa, ATOM )			\
 do {								\
    R200_NEWPRIM( rmesa );					\
-   move_to_head( &(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+   rmesa->hw.ATOM.dirty = GL_TRUE;				\
+   rmesa->hw.is_dirty = GL_TRUE;				\
 } while (0)
 
 #define R200_DB_STATE( ATOM )			        \
@@ -149,7 +153,8 @@
    if (memcmp(atom->cmd, atom->lastcmd, atom->cmd_size*4)) {
       int *tmp;
       R200_NEWPRIM( rmesa );
-      move_to_head( &(rmesa->hw.dirty), atom );
+      atom->dirty = GL_TRUE;
+      rmesa->hw.is_dirty = GL_TRUE;
       tmp = atom->cmd; 
       atom->cmd = atom->lastcmd;
       atom->lastcmd = tmp;
Index: r200_lock.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_lock.c,v
retrieving revision 1.6
diff -u -r1.6 r200_lock.c
--- r200_lock.c	21 Mar 2004 17:05:03 -0000	1.6
+++ r200_lock.c	19 Sep 2004 02:02:13 -0000
@@ -116,4 +116,6 @@
    for ( i = 0 ; i < rmesa->nr_heaps ; i++ ) {
       DRI_AGE_TEXTURES( rmesa->texture_heaps[ i ] );
    }
+
+   rmesa->lost_context = GL_TRUE;
 }
Index: r200_lock.h
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_lock.h,v
retrieving revision 1.5
diff -u -r1.5 r200_lock.h
--- r200_lock.h	17 Aug 2004 20:10:29 -0000	1.5
+++ r200_lock.h	18 Sep 2004 21:55:24 -0000
@@ -104,6 +104,10 @@
 		  rmesa->dri.hwLock,					\
 		  rmesa->dri.hwContext );				\
       DEBUG_RESET();							\
+      if (rmesa->save_on_next_unlock) {					\
+	 r200SaveHwState( rmesa );					\
+	 rmesa->save_on_next_unlock = GL_FALSE;				\
+      }									\
    } while (0)
 
 #endif
Index: r200_state_init.c
===================================================================
RCS file: /cvs/mesa/Mesa/src/mesa/drivers/dri/r200/r200_state_init.c,v
retrieving revision 1.14
diff -u -r1.14 r200_state_init.c
--- r200_state_init.c	17 Aug 2004 01:41:32 -0000	1.14
+++ r200_state_init.c	19 Sep 2004 00:09:06 -0000
@@ -65,8 +65,9 @@
    fprintf(stderr, msg);
    fprintf(stderr, ": ");
 
-   foreach(l, &(rmesa->hw.dirty)) {
-      fprintf(stderr, "%s, ", l->name);
+   foreach(l, &rmesa->hw.atomlist) {
+      if (l->dirty || rmesa->hw.all_dirty)
+	 fprintf(stderr, "%s, ", l->name);
    }
 
    fprintf(stderr, "\n");
@@ -200,11 +201,6 @@
    rmesa->state.pixel.readOffset = rmesa->state.color.drawOffset;
    rmesa->state.pixel.readPitch  = rmesa->state.color.drawPitch;
 
-   /* Initialize lists:
-    */
-   make_empty_list(&(rmesa->hw.dirty)); rmesa->hw.dirty.name = "DIRTY";
-   make_empty_list(&(rmesa->hw.clean)); rmesa->hw.clean.name = "CLEAN";
-
    rmesa->hw.max_state_size = 0;
 
 #define ALLOC_STATE( ATOM, CHK, SZ, NM, IDX )				\
@@ -212,10 +208,11 @@
       rmesa->hw.ATOM.cmd_size = SZ;				\
       rmesa->hw.ATOM.cmd = (int *)CALLOC(SZ * sizeof(int));	\
       rmesa->hw.ATOM.lastcmd = (int *)CALLOC(SZ * sizeof(int));	\
+      rmesa->hw.ATOM.savedcmd = (int *)CALLOC(SZ * sizeof(int)); \
       rmesa->hw.ATOM.name = NM;					\
       rmesa->hw.ATOM.idx = IDX;					\
-      rmesa->hw.ATOM.check = check_##CHK;				\
-      insert_at_head(&(rmesa->hw.dirty), &(rmesa->hw.ATOM));	\
+      rmesa->hw.ATOM.check = check_##CHK;			\
+      rmesa->hw.ATOM.dirty = GL_FALSE;				\
       rmesa->hw.max_state_size += SZ * sizeof(int);		\
    } while (0)
       
@@ -308,6 +305,7 @@
    ALLOC_STATE( pix[4], tex, PIX_STATE_SIZE, "PIX/pixstage-4", 4 );
    ALLOC_STATE( pix[5], tex, PIX_STATE_SIZE, "PIX/pixstage-5", 5 );
 
+   r200SetUpAtomList( rmesa );
 
    /* Fill in the packet headers:
     */
@@ -772,5 +770,6 @@
 
    r200LightingSpaceChange( ctx );
    
-   rmesa->lost_context = 1;
+   r200SaveHwState( rmesa );
+   rmesa->hw.all_dirty = GL_TRUE;
 }

Reply via email to