This patch adds the capability to place YV12 data in AGP memory while it is being converted to packed YUY2. The single most expensive operation in XVideo rendering on savage (on current implementation) is the upload to the framebuffer. By placing YV12 data in an AGP buffer, the CPU saves itself the work of going through the PCI bus. From benchmarks, this method cuts the upload overhead by up to 25% compared to the framebuffer-upload implementation. An option (AGPforXv) has been added to enable this behavior conditionally (disabled by default). Please review and comment on this.

Changelog:
* Allow YV12 planar data to reside in AGP memory
* Add AGPforXv switch for enabling/disabling implemented behavior
* Update manual to document AGPforXv switch
* Add explanation of limitations of BCI conversion

--
perl -e '$x=2.4;print sprintf("%.0f + %.0f = %.0f\n",$x,$x,$x+$x);'

diff --git a/man/savage.man b/man/savage.man
index 35528b2..9095b72 100644
--- a/man/savage.man
+++ b/man/savage.man
@@ -169,8 +169,26 @@ applies to Savage4 and newer chips.  Default: \*qoff\*q (use COB).
 .BI "Option \*qBCIforXv\*q \*q" boolean \*q  
 Use the BCI to copy and reformat Xv pixel data.  Using the BCI for Xv causes 
 graphics artifacts on some chips.  This option only applies to Savage4 and 
-prosavage/twister chips.  Default: on for prosavage and twister (use BCI for Xv); 
-off for savage4 (do not use the BCI for Xv).
+prosavage/twister chips. On some combinations of chipsets and video players,
+BCI formatting might actually be slower than software formatting (\*qAGPforXv\*q 
+might help in this case). BCI formatting can only be used on video data with
+a width that is a multiple of 16 pixels (which is the vast majority of videos). 
+Other widths are handled through software formatting. Default: on for prosavage 
+and twister (use BCI for Xv); off for savage4 (do not use the BCI for Xv).
+.TP
+.BI "Option \*qAGPforXv\*q \*q" boolean \*q
+Instructs the BCI Xv pixel formatter to use AGP memory as a scratch buffer.
+Ordinarily the BCI formatter uses a an area in framebuffer memory to hold 
+YV12 planar data to be converted for display. This requires a somewhat expensive
+upload of YV12 data to framebuffer memory. The \*qAGPforXv\*q causes the BCI
+formatter to place the YV12 data in AGP memory instead, which can be uploaded
+faster than the framebuffer. Use of this option cuts upload overhead by 25%
+according to benchmarks. This option also smooths out most of the shearing
+present when using BCI for pixel conversion. Currently this option is 
+.B experimental
+and is disabled by default. Video width restrictions that apply to \*qBCIforXv\*q 
+also apply here. Only valid when \*qDRI\*q and \*qBCIforXv\*q are both active, 
+and only on AGP chipsets. Default: \*qoff\*q.
 .TP 
 .BI "Option \*qAGPMode\*q \*q" integer \*q
 Set AGP data transfer rate.
diff --git a/src/savage_dri.c b/src/savage_dri.c
index 0d40222..216c915 100644
--- a/src/savage_dri.c
+++ b/src/savage_dri.c
@@ -520,6 +520,15 @@ static Bool SAVAGEDRIAgpInit(ScreenPtr pScreen)
        }
    }
 
+   if (psav->AGPforXv) {
+       pSAVAGEDRIServer->agpXVideo.offset = offset;
+       pSAVAGEDRIServer->agpXVideo.size = 2 * 1024 * 1024; /* Max XV image is 1024x1024x16bpp */
+       offset += pSAVAGEDRIServer->agpXVideo.size;
+   } else {
+       pSAVAGEDRIServer->agpXVideo.offset = 0;
+       pSAVAGEDRIServer->agpXVideo.size = 0;
+   }
+
    pSAVAGEDRIServer->agpTextures.offset = offset;
    pSAVAGEDRIServer->agpTextures.size = (pSAVAGEDRIServer->agp.size - offset);
 
@@ -581,6 +590,25 @@ static Bool SAVAGEDRIAgpInit(ScreenPtr pScreen)
        }
    }
 
+   /* XVideo buffer
+    */
+   if (pSAVAGEDRIServer->agpXVideo.size != 0) {
+       if ( drmAddMap( psav->drmFD,
+		   pSAVAGEDRIServer->agpXVideo.offset,
+		   pSAVAGEDRIServer->agpXVideo.size,
+		   DRM_AGP, 0,
+		   &pSAVAGEDRIServer->agpXVideo.handle ) < 0 ) {
+	    xf86DrvMsg( pScreen->myNum, X_ERROR,
+		  "[agp] Could not add agpXVideo, will use framebuffer upload (slower) \n" );
+	    pSAVAGEDRIServer->agpXVideo.size = 0;
+	    pSAVAGEDRIServer->agpXVideo.handle = 0;
+       } else {
+	    xf86DrvMsg( pScreen->myNum, X_INFO,
+	       "[agp] agpXVideo handle = 0x%08lx\n",
+	       pSAVAGEDRIServer->agpXVideo.handle );
+       }
+   }
+
    /* AGP textures
     */
    if ( drmAddMap( psav->drmFD,
@@ -1278,6 +1306,12 @@ void SAVAGEDRICloseScreen( ScreenPtr pScreen )
       pSAVAGEDRIServer->aperture.map = NULL;
    }
 
+   if ( pSAVAGEDRIServer->agpXVideo.map ) {
+      drmUnmap( pSAVAGEDRIServer->agpXVideo.map, 
+                pSAVAGEDRIServer->agpXVideo.size );
+      pSAVAGEDRIServer->agpXVideo.map = NULL;
+   }
+
    if ( pSAVAGEDRIServer->agpTextures.map ) {
       drmUnmap( pSAVAGEDRIServer->agpTextures.map, 
                 pSAVAGEDRIServer->agpTextures.size );
@@ -1293,6 +1327,9 @@ void SAVAGEDRICloseScreen( ScreenPtr pScreen )
    if (pSAVAGEDRIServer->aperture.handle)
        drmRmMap(psav->drmFD,pSAVAGEDRIServer->registers.handle);
 
+   if (pSAVAGEDRIServer->agpXVideo.handle)
+       drmRmMap(psav->drmFD,pSAVAGEDRIServer->agpXVideo.handle);
+
    if (pSAVAGEDRIServer->agpTextures.handle)
        drmRmMap(psav->drmFD,pSAVAGEDRIServer->agpTextures.handle);
 
diff --git a/src/savage_driver.c b/src/savage_driver.c
index 919bd1a..da472e9 100644
--- a/src/savage_driver.c
+++ b/src/savage_driver.c
@@ -278,6 +278,7 @@ typedef enum {
     ,OPTION_AGP_SIZE
     ,OPTION_DRI
     ,OPTION_IGNORE_EDID
+    ,OPTION_AGP_FOR_XV
 } SavageOpts;
 
 
@@ -312,6 +313,7 @@ static const OptionInfoRec SavageOptions[] =
     { OPTION_AGP_MODE,	"AGPMode",	OPTV_INTEGER, {0}, FALSE },
     { OPTION_AGP_SIZE,	"AGPSize",	OPTV_INTEGER, {0}, FALSE },
     { OPTION_DRI,       "DRI",          OPTV_BOOLEAN, {0}, TRUE },
+    { OPTION_AGP_FOR_XV,   "AGPforXv",    OPTV_BOOLEAN, {0}, FALSE },
 #endif
     { -1,		NULL,		OPTV_NONE,    {0}, FALSE }
 };
@@ -1870,6 +1872,20 @@ static Bool SavagePreInit(ScrnInfoPtr pScrn, int flags)
                    "%s DVI port support (Savage4 only)\n",(psav->dvi?"Force":"Disable"));
     }
 
+    psav->AGPforXv = FALSE;
+#ifdef XF86DRI
+    if (xf86GetOptValBool(psav->Options, OPTION_AGP_FOR_XV, &psav->AGPforXv)) {
+        if (psav->AGPforXv) {
+            if (psav->agpSize == 0) {
+                psav->AGPforXv = FALSE;
+                xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "AGP not available, cannot use AGP for Xv\n");
+            }
+        }
+        xf86DrvMsg(pScrn->scrnIndex, X_CONFIG,
+                   "Option: %s use of AGP buffer for Xv\n",(psav->AGPforXv?"Enable":"Disable"));
+    }
+#endif
+
     /* Add more options here. */
 
 
@@ -3678,6 +3694,11 @@ static Bool SavageScreenInit(int scrnIndex, ScreenPtr pScreen,
         else
             xf86DrvMsg(pScrn->scrnIndex,X_CONFIG,"XvMC is not enabled\n");
     }
+
+    if (!psav->directRenderingEnabled && psav->AGPforXv) {
+        xf86DrvMsg(pScrn->scrnIndex,X_ERROR,"AGPforXV requires DRI to be enabled.\n");
+	psav->AGPforXv = FALSE;
+    }
 #endif
 
     if (serverGeneration == 1)
diff --git a/src/savage_driver.h b/src/savage_driver.h
index b6d1e7a..c47b472 100644
--- a/src/savage_driver.h
+++ b/src/savage_driver.h
@@ -136,6 +136,9 @@ typedef struct _server{
 
    /* command DMA */
    drmRegion cmdDma;
+
+   /* XVideo through AGP */
+   drmRegion agpXVideo;
 } SAVAGEDRIServerPrivateRec, *SAVAGEDRIServerPrivatePtr;
 
 #endif
@@ -486,6 +489,7 @@ typedef struct _Savage {
 
     Bool bDisableXvMC;
 
+    Bool AGPforXv;
 #endif
 
     Bool disableCOB;
diff --git a/src/savage_video.c b/src/savage_video.c
index c999b09..57483e0 100644
--- a/src/savage_video.c
+++ b/src/savage_video.c
@@ -244,8 +244,13 @@ typedef struct {
    void         *video_memory;			/* opaque memory management information structure */
    CARD32       video_offset;			/* offset in video memory of packed YUV buffer */
 
-   void         *video_planarmem;			/* opaque memory management information structure */
+   void         *video_planarmem;		/* opaque memory management information structure */
    CARD32       video_planarbuf; 		/* offset in video memory of planar YV12 buffer */
+   
+   Bool         tried_agp;			/* TRUE if AGP allocation has been tried */
+   CARD32	agpBase;			/* Physical address of aperture base */
+   CARD32	agpBufferOffset;		/* Offset of buffer in AGP memory, or 0 if unavailable */
+   drmAddress   agpBufferMap;			/* Mapping of AGP buffer in process memory, or NULL */
 
 } SavagePortPrivRec, *SavagePortPrivPtr;
 
@@ -1030,7 +1035,8 @@ static void
 SavageStopVideo(ScrnInfoPtr pScrn, pointer data, Bool shutdown)
 {
     SavagePortPrivPtr pPriv = (SavagePortPrivPtr)data;
-    /*SavagePtr psav = SAVPTR(pScrn); */
+    SavagePtr psav = SAVPTR(pScrn);
+    ScreenPtr pScreen = screenInfo.screens[pScrn->scrnIndex];
 
     xf86ErrorFVerb(XVTRACE,"SavageStopVideo\n");
 
@@ -1039,6 +1045,15 @@ SavageStopVideo(ScrnInfoPtr pScrn, pointer data, Bool shutdown)
     if(shutdown) {
       /*SavageClipVWindow(pScrn);*/
  	SavageStreamsOff( pScrn );
+
+	if (pPriv->agpBufferMap != NULL) {
+	    SAVAGEDRIServerPrivatePtr pSAVAGEDRIServer = psav->DRIServerInfo;
+	    drmUnmap(pPriv->agpBufferMap, pSAVAGEDRIServer->agpXVideo.size);
+	    pSAVAGEDRIServer->agpXVideo.map = NULL;
+	    pPriv->agpBufferMap = NULL;
+	    pPriv->agpBufferOffset = 0;
+	}
+
         if (pPriv->video_memory != NULL) {
 	    SavageFreeMemory(pScrn, pPriv->video_memory);
 	    pPriv->video_memory = NULL;
@@ -1048,6 +1063,7 @@ SavageStopVideo(ScrnInfoPtr pScrn, pointer data, Bool shutdown)
 	    pPriv->video_planarmem = NULL;
         }
 	pPriv->videoStatus = 0;
+	pPriv->tried_agp = FALSE;
     } else {
 	if(pPriv->videoStatus & CLIENT_VIDEO_ON) {
 	    pPriv->videoStatus |= OFF_TIMER;
@@ -1176,10 +1192,12 @@ SavageCopyPlanarDataBCI(
     unsigned char *srcV, /* V */
     unsigned char *srcU, /* U */
     unsigned char *dst,
+    unsigned char * planarPtr,
     unsigned long planarOffset,
     int srcPitch, int srcPitch2,
     int dstPitch,
-    int h,int w)
+    int h,int w,
+    Bool isAGP)
 {
     SavagePtr psav = SAVPTR(pScrn);
 
@@ -1187,22 +1205,24 @@ SavageCopyPlanarDataBCI(
     unsigned long offsetY = planarOffset;
     unsigned long offsetV = offsetY +  srcPitch * h;
     unsigned long offsetU = offsetV +  srcPitch2 * (h>>1);
-    unsigned char *dstCopy = (unsigned char *)psav->FBBase + planarOffset;
     unsigned long dstOffset  = (unsigned long)dst - (unsigned long)psav->FBBase;
     int i;
+    unsigned char memType;
     
     BCI_GET_PTR;
 
     /* copy Y planar */
-    memcpy(dstCopy, srcY, srcPitch * h);
+    memcpy(planarPtr, srcY, srcPitch * h);
 
     /* copy V planar */    
-    dstCopy = dstCopy + srcPitch * h;
-    memcpy(dstCopy, srcV, srcPitch2 * (h>>1));
+    planarPtr = planarPtr + srcPitch * h;
+    memcpy(planarPtr, srcV, srcPitch2 * (h>>1));
 
     /* copy U planar */
-    dstCopy = dstCopy + srcPitch2 * (h>>1);    
-    memcpy(dstCopy, srcU, srcPitch2 * (h>>1));
+    planarPtr = planarPtr + srcPitch2 * (h>>1);    
+    memcpy(planarPtr, srcU, srcPitch2 * (h>>1));
+
+    memType = isAGP ? 3 : 0;
 
     /*
      * Transfer pixel data from one memory location to another location
@@ -1222,23 +1242,18 @@ SavageCopyPlanarDataBCI(
 
     w = (w+0xf)&0xff0;
     psav->WaitQueue(psav,11);
-    BCI_SEND(0x96070051);
-    BCI_SEND(offsetY);
-
+    BCI_SEND(BCI_SET_REGISTER | BCI_SET_REGISTER_COUNT(7) | 0x51);
+    BCI_SEND(offsetY | memType);
     BCI_SEND(dstOffset);
-
     BCI_SEND(((h-1)<<16)|((w-1)>>3));
-
     BCI_SEND(dstPitch >> 3);
-
-
-    BCI_SEND(offsetU);
-    BCI_SEND(offsetV);
-
+    BCI_SEND(offsetU | memType);
+    BCI_SEND(offsetV | memType);
     BCI_SEND((srcPitch2 << 16)| srcPitch2);
 
-    BCI_SEND(0x96010050);
+    BCI_SEND(BCI_SET_REGISTER | BCI_SET_REGISTER_COUNT(1) | 0x50);
     BCI_SEND(0x00200003 | srcPitch);
+
     BCI_SEND(0xC0170000);
 }
 
@@ -1948,6 +1963,45 @@ SavagePutImage(
         planarFrameSize = srcPitch * height + srcPitch2 * height;
     }
 
+    /* Check whether AGP buffers can be allocated. If not, fall back to ordinary
+       upload to framebuffer (slower) */
+    if (!pPriv->tried_agp && !psav->IsPCI && psav->drmFD > 0) {
+        int ret;
+	SAVAGEDRIServerPrivatePtr pSAVAGEDRIServer = psav->DRIServerInfo;
+        
+	pPriv->tried_agp = TRUE;
+	if (pSAVAGEDRIServer->agpXVideo.size >= max(new_size, planarFrameSize)) {
+	    if ( drmMap( psav->drmFD,
+		pSAVAGEDRIServer->agpXVideo.handle,
+		pSAVAGEDRIServer->agpXVideo.size,
+		&pSAVAGEDRIServer->agpXVideo.map ) < 0 ) {
+
+		xf86DrvMsg( pScreen->myNum, X_ERROR, "[agp] XVideo: Could not map agpXVideo \n" );
+		pPriv->agpBufferOffset = 0;
+		pPriv->agpBufferMap = NULL;
+	    } else {
+		pPriv->agpBufferMap = pSAVAGEDRIServer->agpXVideo.map;
+		pPriv->agpBufferOffset = pSAVAGEDRIServer->agpXVideo.offset;
+		pPriv->agpBase = drmAgpBase(psav->drmFD);
+#if 0
+		xf86DrvMsg( pScreen->myNum, X_INFO,
+		       "[agp] agpXVideo mapped at 0x%08lx aperture=0x%08x offset=0x%08lx\n",
+		       (unsigned long)pPriv->agpBufferMap, pPriv->agpBase, pPriv->agpBufferOffset);
+#endif
+	    }
+	} else {
+	    /* This situation is expected if AGPforXv is disabled, otherwise report. */
+	    if (pSAVAGEDRIServer->agpXVideo.size > 0) {
+		xf86DrvMsg( pScreen->myNum, X_ERROR,
+		    "[agp] XVideo: not enough space in buffer (got %ld bytes, required %ld bytes).\n", 
+	    	    pSAVAGEDRIServer->agpXVideo.size, max(new_size, planarFrameSize));
+	    }
+	    pPriv->agpBufferMap = NULL;
+	    pPriv->agpBufferOffset = 0;
+	}
+    }
+
+
     /* Buffer for final packed frame */
     pPriv->video_offset = SavageAllocateMemory(
 	pScrn, &pPriv->video_memory,
@@ -1991,14 +2045,29 @@ SavagePutImage(
 	offsetV += tmp;
 	nlines = ((((y2 + 0xffff) >> 16) + 1) & ~1) - top;
         if (S3_SAVAGE4_SERIES(psav->Chipset) && psav->BCIforXv && (npixels & 0xF) == 0 && pPriv->video_planarbuf != 0) {
-            SavageCopyPlanarDataBCI(
-                pScrn,
-	    	buf + (top * srcPitch) + (left >> 1), 
-	    	buf + offsetV, 
-	    	buf + offsetU, 
-	    	dst_start,
-	    	pPriv->video_planarbuf,
-	    	srcPitch, srcPitch2, dstPitch, nlines, npixels);
+            if (pPriv->agpBufferMap != NULL) {
+		/* Using copy to AGP memory */
+		SavageCopyPlanarDataBCI(
+		    pScrn,
+		    buf + (top * srcPitch) + (left >> 1), 
+		    buf + offsetV, 
+		    buf + offsetU, 
+		    dst_start,
+		    pPriv->agpBufferMap,
+		    pPriv->agpBase + pPriv->agpBufferOffset,
+		    srcPitch, srcPitch2, dstPitch, nlines, npixels, TRUE);
+            } else {            
+		/* Using ordinary copy to framebuffer */
+		SavageCopyPlanarDataBCI(
+		    pScrn,
+		    buf + (top * srcPitch) + (left >> 1), 
+		    buf + offsetV, 
+		    buf + offsetU, 
+		    dst_start,
+		    (unsigned char *)psav->FBBase + pPriv->video_planarbuf,
+		    pPriv->video_planarbuf,
+		    srcPitch, srcPitch2, dstPitch, nlines, npixels, FALSE);
+	    }
         } else {
 	    SavageCopyPlanarData(
 	    	buf + (top * srcPitch) + (left >> 1), 
_______________________________________________
xorg mailing list
xorg@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/xorg

Reply via email to