Hi.
This patch adds support for I420 colorspace acceleration (thanks Jon).
It was tested on CME hardware (K8M890) without DMA blit (it works
perfectly).

It needs testing on CLE video engine (K8M800, KM400, CLE266, VM800
chipsets) and for CME with DMA blit (it use viaDmaBlitImage function).

To use dma blit the direct Rendering and dmaXV must be enabled.

To test the I420 just download and play (with XV) movie from there:
http://sunum.kt.agh.edu.pl/~miklesz/Video%20Clips/AVI/Uncompressed%20I420/

Best Regards
Bartosz Kosiorek
Index: via_video.c
===================================================================
--- via_video.c	(wersja 843)
+++ via_video.c	(kopia robocza)
@@ -117,10 +117,14 @@
 static int viaPutImage(ScrnInfoPtr, short, short, short, short, short, short,
     short, short, int, unsigned char *, short, short, Bool,
     RegionPtr, pointer, DrawablePtr);
-static void nv12Blit(unsigned char *nv12Chroma,
-    const unsigned char *uBuffer,
-    const unsigned char *vBuffer,
-    unsigned width, unsigned srcPitch, unsigned dstPitch, unsigned lines);
+static void nv12Blit(unsigned char *dst,
+    const unsigned char *src1,
+    const unsigned char *src2,
+    unsigned srcPitch, unsigned dstPitch, unsigned width, unsigned height);
+static void UVBlit(unsigned char *dst,
+    const unsigned char *src1,
+    const unsigned char *src2,
+    unsigned srcPitch, unsigned dstPitch, unsigned width, unsigned height);
 
 static Atom xvBrightness, xvContrast, xvColorKey, xvHue, xvSaturation,
     xvAutoPaint;
@@ -158,11 +162,12 @@
     {XvSettable | XvGettable, 0, 1, "XV_AUTOPAINT_COLORKEY"}
 };
 
-#define NUM_IMAGES_G 6
+#define NUM_IMAGES_G 7
 
 static XF86ImageRec ImagesG[NUM_IMAGES_G] = {
     XVIMAGE_YUY2,
     XVIMAGE_YV12,
+    XVIMAGE_I420,
     {
         /*
          * Below, a dummy picture type that is used in XvPutImage only to do
@@ -288,29 +293,30 @@
         CARD32 bandwidth = (mode->HDisplay >> 4) * (mode->VDisplay >> 5) *
             pScrn->bitsPerPixel * mode->VRefresh;
 
-    switch (pVia->MemClk) {
-        case VIA_MEM_SDR100:           /* No overlay without DDR */
-        case VIA_MEM_SDR133:
-            return FALSE;
-        case VIA_MEM_DDR200:
-            /* Basic limit for DDR200 is about this */
-            if (bandwidth > 1800000)
+        switch (pVia->MemClk) {
+            case VIA_MEM_SDR100:           /* No overlay without DDR */
+            case VIA_MEM_SDR133:
                 return FALSE;
-            /* But we have constraints at higher than 800x600 */
-            if (mode->HDisplay > 800) {
-                if (pScrn->bitsPerPixel != 8)
+            case VIA_MEM_DDR200:
+                /* Basic limit for DDR200 is about this */
+                if (bandwidth > 1800000)
                     return FALSE;
-                if (mode->VDisplay > 768)
+                /* But we have constraints at higher than 800x600 */
+                if (mode->HDisplay > 800) {
+                    if (pScrn->bitsPerPixel != 8)
+                        return FALSE;
+                    if (mode->VDisplay > 768)
+                        return FALSE;
+                    if (mode->VRefresh > 60)
+                        return FALSE;
+                }
+                return TRUE;
+            case 0:               /*      FIXME: Why does my CLE266 report 0? 
+                                                 Because it is VIA_MEM_SDR66 ? */
+            case VIA_MEM_DDR266:
+                if (bandwidth > 7901250)
                     return FALSE;
-                if (mode->VRefresh > 60)
-                    return FALSE;
-            }
-            return TRUE;
-        case 0:               /*      FIXME: Why does my CLE266 report 0? */
-        case VIA_MEM_DDR266:
-            if (bandwidth > 7901250)
-                return FALSE;
-            return TRUE;
+                return TRUE;
         }
         return FALSE;
 
@@ -619,7 +625,7 @@
     pVia->useDmaBlit = FALSE;
 #ifdef XF86DRI
     pVia->useDmaBlit = pVia->directRenderingEnabled &&
-    ((pVia->Chipset == VIA_CLE266) ||
+        ((pVia->Chipset == VIA_CLE266) ||
         (pVia->Chipset == VIA_KM400) ||
         (pVia->Chipset == VIA_K8M800) ||
         (pVia->Chipset == VIA_PM800) ||
@@ -1018,6 +1024,7 @@
                 proReg) & ~HQV_FLIP_ODD) | HQV_SW_FLIP | HQV_FLIP_STATUS);
             break;
         case FOURCC_YV12:
+        case FOURCC_I420:
         default:
             while ((VIDInD(HQV_CONTROL + proReg) & HQV_SW_FLIP));
             VIDOutD(HQV_SRC_STARTADDR_Y + proReg,
@@ -1038,23 +1045,8 @@
     }
 }
 
-/*
- * Slow and dirty. NV12 blit.
- */
 
-static void
-nv12cp(unsigned char *dst,
-    const unsigned char *src, int dstPitch, int w, int h, int yuv422)
-{
-    /* 
-     * Blit luma component as a fake YUY2 assembler blit. 
-     */
 
-    (*viaFastVidCpy) (dst, src, dstPitch, w >> 1, h, TRUE);
-    nv12Blit(dst + dstPitch * h, src + w * h + (w >> 1) * (h >> 1),
-            src + w * h, w >> 1, w >> 1, dstPitch, h >> 1);
-}
-
 #ifdef XF86DRI
 
 static int
@@ -1077,7 +1069,7 @@
 
     bounceBuffer = ((unsigned long)src & 15);
     nv12Conversion = (pVia->VideoEngine == VIDEO_ENGINE_CME && 
-        id == FOURCC_YV12);
+        (id == FOURCC_YV12 || id == FOURCC_I420));
 
     switch (id) {
         case FOURCC_YUY2:
@@ -1092,6 +1084,7 @@
             break;
 
         case FOURCC_YV12:
+        case FOURCC_I420:
         default:
             bounceStride = ALIGN_TO(width, 16);
             bounceLines = height;
@@ -1107,8 +1100,9 @@
                 pPort->dmaBounceBuffer = 0;
             }
             size = bounceStride * bounceLines + 16;
-            if (FOURCC_YV12 == id)
+            if (id == FOURCC_YV12 || id == FOURCC_I420) {
                 size += ALIGN_TO(bounceStride >> 1, 16) * bounceLines;
+            }
             pPort->dmaBounceBuffer = (unsigned char *)malloc(size);
             pPort->dmaBounceLines = bounceLines;
             pPort->dmaBounceStride = bounceStride;
@@ -1147,14 +1141,22 @@
 
     lumaSync = blit.sync;
 
-    if (id == FOURCC_YV12) {
+    if (id == FOURCC_YV12 || id == FOURCC_I420) {
         unsigned tmp = ALIGN_TO(width >> 1, 16);
 
         if (nv12Conversion) {
-            nv12Blit(bounceBase + bounceStride * height,
-                src + bounceStride * height + tmp * (height >> 1),
-                src + bounceStride * height, width >> 1, tmp,
-                bounceStride, height >> 1);
+            if (id == FOURCC_YV12) {
+                nv12Blit(bounceBase + bounceStride * height,
+                    src + bounceStride * height + tmp * (height >> 1),
+                    src + bounceStride * height, 
+                    tmp, bounceStride, width >> 1, height >> 1);
+            } else if (id == FOURCC_I420) {
+                /* TODO Check if this code will work properly with FOURCC_I420 */
+                nv12Blit(bounceBase + bounceStride * height,
+                    src + bounceStride * height, 
+                    src + bounceStride * height + tmp * (height >> 1),
+                    tmp, bounceStride, width >> 1, height >> 1);
+            }
         } else if (bounceBuffer) {
             (*viaFastVidCpy) (base + bounceStride * height,
                     src + bounceStride * height, tmp, tmp >> 1, height, 1);
@@ -1281,13 +1283,44 @@
                     switch (id) {
                         case FOURCC_YV12:
                             if (pVia->VideoEngine == VIDEO_ENGINE_CME) {
-                                nv12cp(pVia->swov.SWDevice.
+                                int srcYSize  = width * height;
+                                /* Copy Y component */
+                                (*viaFastVidCpy) (pVia->swov.SWDevice.
+                                    lpSWOverlaySurface[pVia->dwFrameNum & 1], 
+                                    buf, dstPitch, width >> 1, height, TRUE);
+
+                                /* Copy U and V component */
+                                nv12Blit(pVia->swov.SWDevice.lpSWOverlaySurface[pVia->dwFrameNum & 1] + dstPitch * height, 
+                                    buf + srcYSize + (srcYSize >> 2), /* Size of UV component is 1/4 of Y */
+                                    buf + srcYSize, 
+                                    width >> 1, dstPitch, width >> 1, height >> 1);
+                            } else {
+                                (*viaFastVidCpy)(pVia->swov.SWDevice.
                                     lpSWOverlaySurface[pVia->dwFrameNum & 1],
                                     buf, dstPitch, width, height, 0);
+                            }
+                            break;
+                        case FOURCC_I420:
+                            if (pVia->VideoEngine == VIDEO_ENGINE_CME) {
+                                int srcYSize  = width * height;
+
+                                (*viaFastVidCpy) (pVia->swov.SWDevice.
+                                   lpSWOverlaySurface[pVia->dwFrameNum & 1], 
+                                   buf, dstPitch, width >> 1, height, TRUE);
+
+                                nv12Blit(pVia->swov.SWDevice.lpSWOverlaySurface[pVia->dwFrameNum & 1] + dstPitch*height, 
+                                   buf + srcYSize, 
+                                   buf + srcYSize + (srcYSize >> 2), /* UV component is 1/4 of Y */
+                                   width >> 1, dstPitch, width >> 1, height >> 1);
                             } else {
                                 (*viaFastVidCpy)(pVia->swov.SWDevice.
                                     lpSWOverlaySurface[pVia->dwFrameNum & 1],
                                     buf, dstPitch, width, height, 0);
+
+                                nv12Blit(pVia->swov.SWDevice.lpSWOverlaySurface[pVia->dwFrameNum & 1] + dstPitch * height, 
+                                    buf + srcYSize + (srcYSize >> 2), 
+                                    buf + srcYSize, 
+                                    width >> 1, dstPitch, width, height >> 1);
                             }
                             break;
                         case FOURCC_RV32:
@@ -1529,16 +1562,12 @@
     pVia->swov.panning_y = y;
 }
 
-/*
- * Blit the chroma field from one buffer to another while at the same time converting from
- * YV12 to NV12.
- */
 
 static void
-nv12Blit(unsigned char *nv12Chroma,
-        const unsigned char *uBuffer,
-        const unsigned char *vBuffer,
-        unsigned width, unsigned srcPitch, unsigned dstPitch, unsigned lines)
+nv12Blit(unsigned char *dst,
+        const unsigned char *src1,
+        const unsigned char *src2,
+        unsigned srcPitch, unsigned dstPitch, unsigned width, unsigned height)
 {
     int x;
     int dstAdd;
@@ -1547,34 +1576,40 @@
     dstAdd = dstPitch - (width << 1);
     srcAdd = srcPitch - width;
 
-    while (lines--) {
+    while (height--) {
         x = width;
         while (x > 3) {
-            register CARD32
-            dst32,
-            src32 = *((CARD32 *) vBuffer),
-            src32_2 = *((CARD32 *) uBuffer);
-            dst32 =
+            register CARD32 src32 = *((CARD32 *) src2);
+            register CARD32 src32_2 = *((CARD32 *) src1);
+
+            /* The same as, but faster from:
+
+            *dst= (*src1);
+            *(dst+1)= (*(src2));
+            *(dst+2)= (*(src1+1)); 
+            *(dst+3)= (*(src2+1));
+
+            */
+            *((CARD32 *) dst) = 
                 (src32_2 & 0xff) | ((src32 & 0xff) << 8) |
                 ((src32_2 & 0x0000ff00) << 8) | ((src32 & 0x0000ff00) << 16);
-            *((CARD32 *) nv12Chroma) = dst32;
-            nv12Chroma += 4;
-            dst32 =
+
+            dst += 4;
+            *((CARD32 *) dst) = 
                 ((src32_2 & 0x00ff0000) >> 16) | ((src32 & 0x00ff0000) >> 8) |
                 ((src32_2 & 0xff000000) >> 8) | (src32 & 0xff000000);
-            *((CARD32 *) nv12Chroma) = dst32;
-            nv12Chroma += 4;
+            dst += 4;
             x -= 4;
-            vBuffer += 4;
-            uBuffer += 4;
+            src2 += 4;
+            src1 += 4;
         }
         while (x--) {
-            *nv12Chroma++ = *uBuffer++;
-            *nv12Chroma++ = *vBuffer++;
+            *dst++ = *src1++;
+            *dst++ = *src2++;
         }
-        nv12Chroma += dstAdd;
-        vBuffer += srcAdd;
-        uBuffer += srcAdd;
+        dst += dstAdd;
+        src2 += srcAdd;
+        src1 += srcAdd;
     }
 }
 
Index: via_swov.c
===================================================================
--- via_swov.c	(wersja 843)
+++ via_swov.c	(kopia robocza)
@@ -339,6 +339,7 @@
     if (videoFlag & VIDEO_HQV_INUSE) {
         switch (pVia->swov.SrcFourCC) {
             case FOURCC_YV12:
+            case FOURCC_I420:
             case FOURCC_XVMC:
                 *pHQVCtl |= HQV_YUV420;
                 break;
@@ -368,6 +369,7 @@
     } else {
         switch (pVia->swov.SrcFourCC) {
             case FOURCC_YV12:
+            case FOURCC_I420:
             case FOURCC_XVMC:
                 if (vport == 1) {
                     *pVidCtl |= V1_YCbCr420;
@@ -450,6 +452,7 @@
                 break;
 
             case FOURCC_YV12:
+            case FOURCC_I420:
             case FOURCC_XVMC:
 
                 if (videoFlag & VIDEO_HQV_INUSE)
@@ -665,6 +668,7 @@
 
     switch (pVia->swov.SrcFourCC) {
         case FOURCC_YV12:
+        case FOURCC_I420:
         case FOURCC_XVMC:
             n = 0; /* 2^n = 1 byte per pixel (Y channel in planar YUV) */
             break;
@@ -1048,7 +1052,7 @@
         !(pVia->swov.gdwVideoFlagSW & VIDEO_1_INUSE))
         proReg = PRO_HQV1_OFFSET;
 
-    isplanar = ((fourcc == FOURCC_YV12) || (fourcc == FOURCC_XVMC));
+    isplanar = ((fourcc == FOURCC_YV12) || (fourcc == FOURCC_I420) || (fourcc == FOURCC_XVMC));
 
     width = pVia->swov.SWDevice.gdwSWSrcWidth;
     height = pVia->swov.SWDevice.gdwSWSrcHeight;
@@ -1091,6 +1095,7 @@
     isplanar = FALSE;
     switch (FourCC) {
         case FOURCC_YV12:
+        case FOURCC_I420:
         case FOURCC_XVMC:
             isplanar = TRUE;
             pitch = ALIGN_TO(Width, 32);
@@ -1183,9 +1188,10 @@
             break;
 
         case FOURCC_YV12:
+        case FOURCC_I420:
             retCode = CreateSurface(pScrn, FourCC, Width, Height, TRUE);
             if (retCode == Success)
-                retCode = AddHQVSurface(pScrn, numbuf, FOURCC_YV12);
+                retCode = AddHQVSurface(pScrn, numbuf, FourCC);
             break;
 
         case FOURCC_XVMC:
@@ -1247,6 +1253,7 @@
                 break;
 
             case FOURCC_YV12:
+            case FOURCC_I420:
                 VIAFreeLinear(&pVia->swov.SWfbMem);
             case FOURCC_XVMC:
                 pVia->swov.SrcFourCC = 0;
@@ -1411,6 +1418,7 @@
 {
     if (miniCtl & V1_Y_INTERPOLY) {
         if (pVia->swov.SrcFourCC == FOURCC_YV12
+            || pVia->swov.SrcFourCC == FOURCC_I420
             || pVia->swov.SrcFourCC == FOURCC_XVMC) {
             if (videoFlag & VIDEO_HQV_INUSE) {
                 if (videoFlag & VIDEO_1_INUSE)
@@ -1444,6 +1452,7 @@
         }
     } else {
         if (pVia->swov.SrcFourCC == FOURCC_YV12
+            || pVia->swov.SrcFourCC == FOURCC_I420
             || pVia->swov.SrcFourCC == FOURCC_XVMC) {
             if (videoFlag & VIDEO_HQV_INUSE) {
                 if (videoFlag & VIDEO_1_INUSE)
@@ -1790,6 +1799,7 @@
     pVia->swov.overlayRecordV1.dwOffset = dwOffset;
 
     if (pVia->swov.SrcFourCC == FOURCC_YV12
+        || pVia->swov.SrcFourCC == FOURCC_I420
         || pVia->swov.SrcFourCC == FOURCC_XVMC) {
 
         YCBCRREC YCbCr;
@@ -1882,6 +1892,7 @@
             SetHQVFetch(pVia, hqvSrcFetch, oriSrcHeight);
 
         if (pVia->swov.SrcFourCC == FOURCC_YV12
+            || pVia->swov.SrcFourCC == FOURCC_I420
             || pVia->swov.SrcFourCC == FOURCC_XVMC) {
             if (videoFlag & VIDEO_1_INUSE)
                 SaveVideoRegister(pVia, V1_STRIDE, srcPitch << 1);
@@ -2176,6 +2187,7 @@
         (pVia->swov.SrcFourCC == FOURCC_RV16) ||
         (pVia->swov.SrcFourCC == FOURCC_RV32) ||
         (pVia->swov.SrcFourCC == FOURCC_YV12) ||
+        (pVia->swov.SrcFourCC == FOURCC_I420) ||
         (pVia->swov.SrcFourCC == FOURCC_XVMC)) {
         videoFlag = pVia->swov.gdwVideoFlagSW;
     }
@@ -2250,6 +2262,7 @@
         (pVia->swov.SrcFourCC == FOURCC_RV16) ||
         (pVia->swov.SrcFourCC == FOURCC_RV32) ||
         (pVia->swov.SrcFourCC == FOURCC_YV12) ||
+        (pVia->swov.SrcFourCC == FOURCC_I420) ||
         (pVia->swov.SrcFourCC == FOURCC_XVMC)) {
         pVia->swov.SWDevice.gdwSWDstLeft = pUpdate->DstLeft + panDX;
         pVia->swov.SWDevice.gdwSWDstTop = pUpdate->DstTop + panDY;
@@ -2309,6 +2322,7 @@
         (pVia->swov.SrcFourCC == FOURCC_RV16) ||
         (pVia->swov.SrcFourCC == FOURCC_RV32) ||
         (pVia->swov.SrcFourCC == FOURCC_YV12) ||
+        (pVia->swov.SrcFourCC == FOURCC_I420) ||
         (pVia->swov.SrcFourCC == FOURCC_XVMC))
         videoFlag = pVia->swov.gdwVideoFlagSW;
 
_______________________________________________
Openchrome-devel mailing list
[email protected]
http://wiki.openchrome.org/mailman/listinfo/openchrome-devel

Reply via email to