Module Name:    xsrc
Committed By:   macallan
Date:           Wed Jun 19 13:26:01 UTC 2013

Modified Files:
        xsrc/external/mit/xf86-video-suncg14/dist/src: cg14.h cg14_driver.c
Added Files:
        xsrc/external/mit/xf86-video-suncg14/dist/src: cg14_accel.c

Log Message:
support hardware acceleration via SX
This requires a recent cgfourteen kernel driver which allows userland to map
SX register space.
Basic acceleration is enabled by default ( use Option "Accel" "false" to turn
it off ), XRender support is incomplete but good enough for anti-aliased text
rendering ( enable with Option "xrender" "true" ).


To generate a diff of this commit:
cvs rdiff -u -r1.5 -r1.6 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
cvs rdiff -u -r0 -r1.1 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
cvs rdiff -u -r1.9 -r1.10 \
    xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.5 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.6
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.5	Tue Jun  4 22:20:41 2013
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h	Wed Jun 19 13:26:01 2013
@@ -30,6 +30,7 @@
 #include <X11/Xmd.h>
 #include "gcstruct.h"
 #include "xf86sbusBus.h"
+#include "exa.h"
 
 /* Various offsets in virtual (ie. mmap()) spaces Linux and Solaris support. */
 #define CG14_REGS_VOFF		0x00000000	/* registers */
@@ -50,6 +51,11 @@
 #define CG14_G32_VOFF		0xb0000000
 #define CG14_R32_VOFF		0xc0000000
 
+/* these two are NetBSD specific */
+#define CG14_SXREG_VOFF		0x00010000	/* SX userspace registers */
+#define CG14_SXIO_VOFF		0xd0000000
+
+
 /* Hardware cursor map */
 #define CG14_CURS_SIZE		32
 struct cg14curs {
@@ -76,6 +82,7 @@ typedef struct {
 	int		width;
 	int		height;
 	int		use_shadow;
+	int		memsize;
 	int		HWCursor;
 	void *		shadow;
 	sbusDevicePtr	psdp;
@@ -83,9 +90,21 @@ typedef struct {
 	CreateScreenResourcesProcPtr CreateScreenResources;
 	OptionInfoPtr	Options;
 	xf86CursorInfoPtr	CursorInfoRec;
+	/* SX accel stuff */
+	void		*sxreg, *sxio;
+	int		use_accel, use_xrender;
+	uint32_t	last_mask;
+	uint32_t	last_rop;
+	uint32_t	srcoff, srcpitch, mskoff, mskpitch;
+	uint32_t	srcformat, dstformat, mskformat;
+	uint32_t	fillcolour;
+	int		op;
+	int		xdir, ydir;	
+	ExaDriverPtr 	pExa;
 } Cg14Rec, *Cg14Ptr;
 
 Bool CG14SetupCursor(ScreenPtr);
+Bool CG14InitAccel(ScreenPtr);
 
 #define GET_CG14_FROM_SCRN(p)    ((Cg14Ptr)((p)->driverPrivate))
 

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c:1.9 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c:1.10
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c:1.9	Tue Jun  4 22:20:41 2013
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_driver.c	Wed Jun 19 13:26:01 2013
@@ -99,11 +99,15 @@ _X_EXPORT DriverRec SUNCG14 = {
 typedef enum {
 	OPTION_SHADOW_FB,
 	OPTION_HW_CURSOR,
-	OPTION_SW_CURSOR
+	OPTION_SW_CURSOR,
+	OPTION_ACCEL,
+	OPTION_XRENDER
 } CG14Opts;
 
 static const OptionInfoRec CG14Options[] = {
-    { OPTION_SHADOW_FB, "ShadowFB", OPTV_BOOLEAN, {0}, TRUE},
+    { OPTION_SHADOW_FB,	"ShadowFB", OPTV_BOOLEAN, {0}, TRUE},
+    { OPTION_ACCEL, 	"Accel",    OPTV_BOOLEAN, {0}, TRUE},
+    { OPTION_XRENDER,	"XRender",  OPTV_BOOLEAN, {0}, FALSE},
     { -1,			NULL,		OPTV_NONE,	{0}, FALSE }
 };
 
@@ -342,6 +346,13 @@ CG14PreInit(ScrnInfoPtr pScrn, int flags
     if (psdp == NULL)
 	return FALSE;
 
+    pCg14->memsize = 4 * 1024 * 1024;	/* always safe */
+    if ((psdp->height * psdp->width * 4) > 0x00400000)
+    	 pCg14->memsize = 0x00800000;
+    if (psdp->size > pCg14->memsize)
+    	pCg14->memsize = psdp->size;
+    xf86DrvMsg(pScrn->scrnIndex, X_PROBED, "found %d MB video memory\n",
+      pCg14->memsize >> 20);
     /*********************
     deal with depth
     *********************/
@@ -370,6 +381,10 @@ CG14PreInit(ScrnInfoPtr pScrn, int flags
     xf86ProcessOptions(pScrn->scrnIndex, pScrn->options, pCg14->Options);
     pCg14->use_shadow = xf86ReturnOptValBool(pCg14->Options, OPTION_SHADOW_FB,
         TRUE);
+    pCg14->use_accel = xf86ReturnOptValBool(pCg14->Options, OPTION_ACCEL,
+        TRUE);
+    pCg14->use_xrender = xf86ReturnOptValBool(pCg14->Options, OPTION_XRENDER,
+        FALSE);
 
     /*
      * This must happen after pScrn->display has been set because
@@ -505,7 +520,7 @@ CG14ScreenInit(SCREEN_INIT_ARGS_DECL)
     ScrnInfoPtr pScrn;
     Cg14Ptr pCg14;
     VisualPtr visual;
-    int ret;
+    int ret, have_accel = 0;
 
     /* 
      * First get the ScrnInfoRec
@@ -515,13 +530,28 @@ CG14ScreenInit(SCREEN_INIT_ARGS_DECL)
     pCg14 = GET_CG14_FROM_SCRN(pScrn);
 
     /* Map the CG14 memory */
-    pCg14->fb = xf86MapSbusMem (pCg14->psdp, CG14_BGR_VOFF, 4 *
-				(pCg14->psdp->width * pCg14->psdp->height));
+    pCg14->fb = xf86MapSbusMem (pCg14->psdp, CG14_DIRECT_VOFF, pCg14->memsize);
     pCg14->x32 = xf86MapSbusMem (pCg14->psdp, CG14_X32_VOFF,
 				 (pCg14->psdp->width * pCg14->psdp->height));
     pCg14->xlut = xf86MapSbusMem (pCg14->psdp, CG14_XLUT_VOFF, 4096);
     pCg14->curs = xf86MapSbusMem (pCg14->psdp, CG14_CURSOR_VOFF, 4096);
 
+    pCg14->sxreg = xf86MapSbusMem (pCg14->psdp, CG14_SXREG_VOFF, 4096);
+    pCg14->sxio = xf86MapSbusMem (pCg14->psdp, CG14_SXIO_VOFF, 0x04000000);
+    have_accel = (pCg14->sxreg != NULL) && (pCg14->sxio != NULL);
+
+    if (have_accel) {
+    	xf86DrvMsg(pScrn->scrnIndex, X_PROBED,
+    	  "found kernel support for SX acceleration\n");
+    }
+    have_accel = have_accel & pCg14->use_accel;
+    if (have_accel) {
+    	xf86DrvMsg(pScrn->scrnIndex, X_PROBED, "using acceleration\n");
+    	if (pCg14->use_shadow)
+    	    xf86DrvMsg(pScrn->scrnIndex, X_PROBED, "disabling shadow\n");
+    	pCg14->use_shadow = FALSE;
+    }
+    	
     pCg14->width = pCg14->psdp->width;
     pCg14->height = pCg14->psdp->height;
 
@@ -582,18 +612,16 @@ CG14ScreenInit(SCREEN_INIT_ARGS_DECL)
     if (!ret)
 	return FALSE;
 
-#if 0
     /* must be after RGB ordering fixed */
     fbPictureInit (pScreen, 0, 0);
 
     if (pCg14->use_shadow && !CG14ShadowInit(pScreen)) {
-	xf86DrvMsg(scrnIndex, X_ERROR,
+	xf86DrvMsg(pScrn->scrnIndex, X_ERROR,
 		    "shadow framebuffer initialization failed\n");
 	return FALSE;
     }
 
     miInitializeBackingStore(pScreen);
-#endif
 
     xf86SetBackingStore(pScreen);
     xf86SetSilkenMouse(pScreen);
@@ -615,6 +643,24 @@ CG14ScreenInit(SCREEN_INIT_ARGS_DECL)
 	}
     }
 
+    /* setup acceleration */
+    if (have_accel) {
+	XF86ModReqInfo req;
+	int errmaj, errmin;
+
+	memset(&req, 0, sizeof(XF86ModReqInfo));
+	req.majorversion = 2;
+	req.minorversion = 0;
+	if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, &req,
+	    &errmaj, &errmin)) {
+		LoaderErrorMsg(NULL, "exa", errmaj, errmin);
+		return FALSE;
+	}
+	if (!CG14InitAccel(pScreen))
+	    have_accel = FALSE;
+    }
+
+
     /* Initialise cursor functions */
     miDCInitialize (pScreen, xf86GetPointerScreenFuncs());
 

Added files:

Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
diff -u /dev/null xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.1
--- /dev/null	Wed Jun 19 13:26:01 2013
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c	Wed Jun 19 13:26:01 2013
@@ -0,0 +1,1115 @@
+/* $NetBSD: cg14_accel.c,v 1.1 2013/06/19 13:26:01 macallan Exp $ */
+/*
+ * Copyright (c) 2013 Michael Lorenz
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *    - Redistributions of source code must retain the above copyright
+ *      notice, this list of conditions and the following disclaimer.
+ *    - Redistributions in binary form must reproduce the above
+ *      copyright notice, this list of conditions and the following
+ *      disclaimer in the documentation and/or other materials provided
+ *      with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+ 
+#include <sys/types.h>
+
+/* all driver need this */
+#include "xf86.h"
+#include "xf86_OSproc.h"
+#include "compiler.h"
+
+#include "cg14.h"
+#include <sparc/sxreg.h>
+
+#define SX_SINGLE
+/*#define SX_DEBUG*/
+/*#define SX_ADD_SOFTWARE*/
+
+#ifdef SX_DEBUG
+#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
+#define DPRINTF xf86Msg
+#else
+#define ENTER
+#define DPRINTF while (0) xf86Msg
+#endif
+
+#define arraysize(ary)        (sizeof(ary) / sizeof(ary[0]))
+
+/* 0xcc is SX's GXcopy equivalent */
+uint32_t sx_rop[] = { 0x00, 0x88, 0x44, 0xcc, 0x22, 0xaa, 0x66, 0xee,
+		      0x11, 0x99, 0x55, 0xdd, 0x33, 0xbb, 0x77, 0xff};
+
+int src_formats[] = {PICT_a8r8g8b8, PICT_x8r8g8b8,
+		     PICT_a8b8g8r8, PICT_x8b8g8r8, PICT_a8};
+int tex_formats[] = {PICT_a8r8g8b8, PICT_a8b8g8r8, PICT_a8};
+
+char c[8] = " .,:+*oX";
+
+/* write an SX register */
+static inline void
+write_sx_reg(Cg14Ptr p, int reg, uint32_t val)
+{
+	*(volatile uint32_t *)(p->sxreg + reg) = val;
+}
+
+/* read an SX register */
+static inline uint32_t
+read_sx_reg(Cg14Ptr p, int reg)
+{
+	return *(volatile uint32_t *)(p->sxreg + reg);
+}
+
+/* write a memory referencing instruction */
+static inline void
+write_sx_io(Cg14Ptr p, int reg, uint32_t val)
+{
+	*(volatile uint32_t *)(p->sxio + reg) = val;
+}
+
+static inline void
+CG14Wait(Cg14Ptr p)
+{
+	/* we just wait until the instruction queue is empty */
+	while ((read_sx_reg(p, SX_CONTROL_STATUS) & SX_MT) != 0) {};
+}
+
+static void
+CG14WaitMarker(ScreenPtr pScreen, int Marker)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+
+	CG14Wait(p);
+}
+
+static Bool
+CG14PrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
+		int xdir, int ydir, int alu, Pixel planemask)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+
+	ENTER;
+	DPRINTF(X_ERROR, "bits per pixel: %d\n",
+	    pSrcPixmap->drawable.bitsPerPixel);
+
+	if (planemask != p->last_mask) {
+		CG14Wait(p);
+		write_sx_reg(p, SX_PLANEMASK, planemask);
+		p->last_mask = planemask;
+	}
+	alu = sx_rop[alu];
+	if (alu != p->last_rop) {
+		CG14Wait(p);
+		write_sx_reg(p, SX_ROP_CONTROL, alu);
+		p->last_rop = alu;
+	}
+	p->srcpitch = exaGetPixmapPitch(pSrcPixmap);
+	p->srcoff = exaGetPixmapOffset(pSrcPixmap);
+	p->xdir = xdir;
+	p->ydir = ydir;
+	return TRUE;
+}
+
+static void
+CG14Copy(PixmapPtr pDstPixmap,
+         int srcX, int srcY, int dstX, int dstY, int w, int h)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	int dstpitch, dstoff, srcpitch, srcoff;
+	int srcstart, dststart, xinc, srcinc, dstinc;
+	int line, count, s, d, num;
+
+	ENTER;
+	dstpitch = exaGetPixmapPitch(pDstPixmap);
+	dstoff = exaGetPixmapOffset(pDstPixmap);
+	srcpitch = p->srcpitch;
+	srcoff = p->srcoff;
+	/*
+	 * should clear the WO bit in SX_CONTROL_STATUS, then check if SX
+	 * actually wrote anything and only sync if it did
+	 */
+	srcstart = (srcX << 2) + (srcpitch * srcY) + srcoff;
+	dststart = (dstX << 2) + (dstpitch * dstY) + dstoff;
+
+	/*
+	 * we always copy up to 32 pixels at a time so direction doesn't
+	 * matter if w<=32
+	 */
+	if (w > 32) {
+		if (p->xdir < 0) {
+			srcstart += (w - 32) << 2;
+			dststart += (w - 32) << 2;
+			xinc = -128;
+		} else
+			xinc = 128;
+	} else
+		xinc = 128;
+	if (p->ydir < 0) {
+		srcstart += (h - 1) * srcpitch;
+		dststart += (h - 1) * dstpitch;
+		srcinc = -srcpitch;
+		dstinc = -dstpitch;
+	} else {
+		srcinc = srcpitch;
+		dstinc = dstpitch;
+	}
+	if (p->last_rop == 0xcc) {
+		/* plain old copy */
+		if ( xinc > 0) {
+			/* going left to right */
+			for (line = 0; line < h; line++) {
+				count = 0;
+				s = srcstart;
+				d = dststart;
+				while ( count < w) {
+					num = min(32, w - count);
+					write_sx_io(p, s,
+					    SX_LD(10, num - 1, s & 7));
+					write_sx_io(p, d,
+					    SX_STM(10, num - 1, d & 7));
+					s += xinc;
+					d += xinc;
+					count += 32;
+				}
+				srcstart += srcinc;
+				dststart += dstinc;
+			}
+		} else {
+			/* going right to left */
+			int i, chunks = (w >> 5);
+			for (line = 0; line < h; line++) {
+				s = srcstart;
+				d = dststart;
+				count = w;
+				for (i = 0; i < chunks; i++) {
+					write_sx_io(p, s,
+					    SX_LD(10, 31, s & 7));
+					write_sx_io(p, d,
+					    SX_STM(10, 31, d & 7));
+					s -= 128;
+					d -= 128;
+					count -= 32;
+				}
+				/* leftovers, if any */
+				if (count > 0) {
+					s += (32 - count) << 2;
+					d += (32 - count) << 2;
+					write_sx_io(p, s,
+					    SX_LD(10, count - 1, s & 7));
+					write_sx_io(p, d,
+					    SX_STM(10, count - 1, d & 7));
+				}
+				srcstart += srcinc;
+				dststart += dstinc;
+			}
+		}
+	} else {
+		/* ROPs needed */
+		if ( xinc > 0) {
+			/* going left to right */
+			for (line = 0; line < h; line++) {
+				count = 0;
+				s = srcstart;
+				d = dststart;
+				while ( count < w) {
+					num = min(32, w - count);
+					write_sx_io(p, s,
+					    SX_LD(10, num - 1, s & 7));
+					write_sx_io(p, d,
+					    SX_LD(42, num - 1, d & 7));
+					if (num > 16) {
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	 SX_ROP(10, 42, 74, 15));
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	 SX_ROP(26, 58, 90, num - 17));
+					} else {
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	 SX_ROP(10, 42, 74, num - 1));
+					}
+					write_sx_io(p, d,
+					    SX_STM(74, num - 1, d & 7));
+					s += xinc;
+					d += xinc;
+					count += 32;
+				}
+				srcstart += srcinc;
+				dststart += dstinc;
+			}
+		} else {
+			/* going right to left */
+			int i, chunks = (w >> 5);
+			for (line = 0; line < h; line++) {
+				s = srcstart;
+				d = dststart;
+				count = w;
+				for (i = 0; i < chunks; i++) {
+					write_sx_io(p, s, SX_LD(10, 31, s & 7));
+					write_sx_io(p, d, SX_LD(42, 31, d & 7));
+					write_sx_reg(p, SX_INSTRUCTIONS,
+				    	    SX_ROP(10, 42, 74, 15));
+					write_sx_reg(p, SX_INSTRUCTIONS,
+				    	    SX_ROP(26, 58, 90, 15));
+					write_sx_io(p, d,
+					    SX_STM(74, 31, d & 7));
+					s -= 128;
+					d -= 128;
+					count -= 32;
+				}
+				/* leftovers, if any */
+				if (count > 0) {
+					s += (32 - count) << 2;
+					d += (32 - count) << 2;
+					write_sx_io(p, s,
+					    SX_LD(10, count - 1, s & 7));
+					write_sx_io(p, d,
+					    SX_LD(42, count - 1, d & 7));
+					if (count > 16) {
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	    SX_ROP(10, 42, 74, 15));
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	 SX_ROP(26, 58, 90, count - 17));
+					} else {
+						write_sx_reg(p, SX_INSTRUCTIONS,
+					    	 SX_ROP(10, 42, 74, count - 1));
+					}
+					
+					write_sx_io(p, d,
+					    SX_STM(74, count - 1, d & 7));
+				}
+				srcstart += srcinc;
+				dststart += dstinc;
+			}
+		}
+	}			
+	exaMarkSync(pDstPixmap->drawable.pScreen);
+}
+
+static void
+CG14DoneCopy(PixmapPtr pDstPixmap)
+{
+}
+
+static Bool
+CG14PrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+
+	ENTER;
+	DPRINTF(X_ERROR, "bits per pixel: %d\n", pPixmap->drawable.bitsPerPixel);
+	write_sx_reg(p, SX_QUEUED(8), fg);
+	write_sx_reg(p, SX_QUEUED(9), fg);
+	if (planemask != p->last_mask) {
+		CG14Wait(p);
+		write_sx_reg(p, SX_PLANEMASK, planemask);
+		p->last_mask = planemask;
+	}
+	alu = sx_rop[alu];
+	if (alu != p->last_rop) {
+		CG14Wait(p);
+		write_sx_reg(p, SX_ROP_CONTROL, alu);
+		p->last_rop = alu;
+	}
+	DPRINTF(X_ERROR, "%s: %x\n", __func__, alu);
+	return TRUE;
+}
+
+static void
+CG14Solid32(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
+{
+	int line, x, num;
+	uint32_t ptr;
+
+	ENTER;
+	if (p->last_rop == 0xcc) {
+		/* simple fill */
+		for (line = 0; line < h; line++) {
+			x = 0;
+			while (x < w) {
+				ptr = start + (x << 2);
+				num = min(32, w - x);
+				write_sx_io(p, ptr,
+				    SX_STS(8, num - 1, ptr & 7));
+				x += 32;
+			}
+			start += pitch;
+		}
+	} else if (p->last_rop == 0xaa) {
+		/* nothing to do here */
+		return;
+	} else {
+		/* alright, let's do actual ROP stuff */
+
+		/* first repeat the fill colour into 16 registers */
+		write_sx_reg(p, SX_INSTRUCTIONS,
+		    SX_SELECT_S(8, 8, 10, 15));
+
+		for (line = 0; line < h; line++) {
+			x = 0;
+			while (x < w) {
+				ptr = start + (x << 2);
+				num = min(32, w - x);
+				/* now suck fb data into registers */
+				write_sx_io(p, ptr,
+				    SX_LD(42, num - 1, ptr & 7));
+				/*
+				 * ROP them with the fill data we left in 10
+				 * non-memory ops can only have counts up to 16
+				 */
+				if (num <= 16) {
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 42, 74, num - 1));
+				} else {
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 42, 74, 15));
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 58, 90, num - 17));
+				}
+				/* and write the result back into memory */
+				write_sx_io(p, ptr,
+				    SX_ST(74, num - 1, ptr & 7));
+				x += 32;
+			}
+			start += pitch;
+		}
+	}
+}
+
+static void
+CG14Solid8(Cg14Ptr p, uint32_t start, uint32_t pitch, int w, int h)
+{
+	int line, x, num, off;
+	uint32_t ptr;
+
+	ENTER;
+	off = start & 7;
+	start &= ~7;
+
+	if (p->last_rop == 0xcc) {
+		/* simple fill */
+		for (line = 0; line < h; line++) {
+			x = 0;
+			while (x < w) {
+				ptr = start + x;
+				num = min(32, w - x);
+				write_sx_io(p, ptr,
+				    SX_STBS(8, num - 1, off));
+				x += 32;
+			}
+			start += pitch;
+		}
+	} else if (p->last_rop == 0xaa) {
+		/* nothing to do here */
+		return;
+	} else {
+		/* alright, let's do actual ROP stuff */
+
+		/* first repeat the fill colour into 16 registers */
+		write_sx_reg(p, SX_INSTRUCTIONS,
+		    SX_SELECT_S(8, 8, 10, 15));
+
+		for (line = 0; line < h; line++) {
+			x = 0;
+			while (x < w) {
+				ptr = start + x;
+				num = min(32, w - x);
+				/* now suck fb data into registers */
+				write_sx_io(p, ptr,
+				    SX_LDB(42, num - 1, off));
+				/*
+				 * ROP them with the fill data we left in 10
+				 * non-memory ops can only have counts up to 16
+				 */
+				if (num <= 16) {
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 42, 74, num - 1));
+				} else {
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 42, 74, 15));
+					write_sx_reg(p, SX_INSTRUCTIONS,
+					    SX_ROP(10, 58, 90, num - 17));
+				}
+				/* and write the result back into memory */
+				write_sx_io(p, ptr,
+				    SX_STB(74, num - 1, off));
+				x += 32;
+			}
+			start += pitch;
+		}
+	}
+}
+
+static void
+CG14Solid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
+	int start, depth;
+
+	ENTER;
+	dstpitch = exaGetPixmapPitch(pPixmap);
+	dstoff = exaGetPixmapOffset(pPixmap);
+
+	depth = pPixmap->drawable.bitsPerPixel;
+	switch (depth) {
+		case 32:
+			start = dstoff + (y1 * dstpitch) + (x1 << 2);
+			CG14Solid32(p, start, dstpitch, w, h);
+			break;
+		case 8:
+			start = dstoff + (y1 * dstpitch) + x1;
+			CG14Solid8(p, start, dstpitch, w, h);
+			break;
+	}
+
+	DPRINTF(X_ERROR, "Solid %d %d %d %d, %d %d -> %d\n", x1, y1, x2, y2,
+	    dstpitch, dstoff, start);
+	DPRINTF(X_ERROR, "%x %x %x\n", p->last_rop,
+	    read_sx_reg(p, SX_QUEUED(8)), read_sx_reg(p, SX_QUEUED(9)));
+	exaMarkSync(pPixmap->drawable.pScreen);
+}
+
+/*
+ * Memcpy-based UTS.
+ */
+static Bool
+CG14UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
+    char *src, int src_pitch)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	char  *dst        = p->fb + exaGetPixmapOffset(pDst);
+	int    dst_pitch  = exaGetPixmapPitch(pDst);
+
+	int bpp    = pDst->drawable.bitsPerPixel;
+	int cpp    = (bpp + 7) >> 3;
+	int wBytes = w * cpp;
+
+	ENTER;
+	dst += (x * cpp) + (y * dst_pitch);
+
+	CG14Wait(p);
+
+	while (h--) {
+		memcpy(dst, src, wBytes);
+		src += src_pitch;
+		dst += dst_pitch;
+	}
+	__asm("stbar;");
+	return TRUE;
+}
+
+/*
+ * Memcpy-based DFS.
+ */
+static Bool
+CG14DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
+    char *dst, int dst_pitch)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	char  *src        = p->fb + exaGetPixmapOffset(pSrc);
+	int    src_pitch  = exaGetPixmapPitch(pSrc);
+
+	ENTER;
+	int bpp    = pSrc->drawable.bitsPerPixel;
+	int cpp    = (bpp + 7) >> 3;
+	int wBytes = w * cpp;
+
+	src += (x * cpp) + (y * src_pitch);
+
+	CG14Wait(p);
+
+	while (h--) {
+		memcpy(dst, src, wBytes);
+		src += src_pitch;
+		dst += dst_pitch;
+	}
+
+	return TRUE;
+}
+
+Bool
+CG14CheckComposite(int op, PicturePtr pSrcPicture,
+                           PicturePtr pMaskPicture,
+                           PicturePtr pDstPicture)
+{
+	int i, ok = FALSE;
+
+	ENTER;
+
+	/*
+	 * SX is in theory capable of accelerating pretty much all Xrender ops,
+	 * even coordinate transformation and gradients. Support will be added
+	 * over time and likely have to spill over into its own source file.
+	 */
+	
+	if ((op != PictOpOver) && (op != PictOpAdd)) {
+		xf86Msg(X_ERROR, "%s: rejecting %d\n", __func__, op);
+		return FALSE;
+	}
+	i = 0;
+	while ((i < arraysize(src_formats)) && (!ok)) {
+		ok =  (pSrcPicture->format == src_formats[i]);
+		i++;
+	}
+
+	if (!ok) {
+		xf86Msg(X_ERROR, "%s: unsupported src format %x\n",
+		    __func__, pSrcPicture->format);
+		return FALSE;
+	}
+
+	DPRINTF(X_ERROR, "src is %x %d %d\n", pSrcPicture->format,
+	    pSrcPicture->pDrawable->width, pSrcPicture->pDrawable->height);
+
+	if (pMaskPicture != NULL) {
+		DPRINTF(X_ERROR, "mask is %x %d %d\n", pMaskPicture->format,
+		    pMaskPicture->pDrawable->width,
+		    pMaskPicture->pDrawable->height);
+	}
+	return TRUE;
+}
+
+Bool
+CG14PrepareComposite(int op, PicturePtr pSrcPicture,
+                             PicturePtr pMaskPicture,
+                             PicturePtr pDstPicture,
+                             PixmapPtr  pSrc,
+                             PixmapPtr  pMask,
+                             PixmapPtr  pDst)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+
+	ENTER;
+
+	if (pSrcPicture->pSourcePict != NULL) {
+		if (pSrcPicture->pSourcePict->type == SourcePictTypeSolidFill) {
+			p->fillcolour =
+			    pSrcPicture->pSourcePict->solidFill.color;
+			DPRINTF(X_ERROR, "%s: solid src %08x\n",
+			    __func__, p->fillcolour);
+		}
+	}
+	if ((pMaskPicture != NULL) && (pMaskPicture->pSourcePict != NULL)) {
+		if (pMaskPicture->pSourcePict->type ==
+		    SourcePictTypeSolidFill) {
+			p->fillcolour = 
+			   pMaskPicture->pSourcePict->solidFill.color;
+			DPRINTF(X_ERROR, "%s: solid mask %08x\n",
+			    __func__, p->fillcolour);
+		}
+	}
+	if (pMaskPicture != NULL) {
+		p->mskoff = exaGetPixmapOffset(pMask);		
+		p->mskpitch = exaGetPixmapPitch(pMask);
+		p->mskformat = pMaskPicture->format;
+	}
+	p->srcoff = exaGetPixmapOffset(pSrc);		
+	p->srcpitch = exaGetPixmapPitch(pSrc);		
+	p->srcformat = pSrcPicture->format;
+	p->dstformat = pDstPicture->format;
+	p->op = op;
+#ifdef SX_DEBUG
+	DPRINTF(X_ERROR, "%x %x -> %x\n", p->srcoff, p->mskoff,
+	    *(uint32_t *)(p->fb + p->srcoff));	
+#endif
+	return TRUE;
+}
+
+void CG14Comp_Over32(Cg14Ptr p,
+                   uint32_t src, uint32_t srcpitch,
+                   uint32_t dst, uint32_t dstpitch,
+                   int width, int height)
+{
+	uint32_t msk = src, mskx, dstx, m;
+	int line, x, i;
+
+	ENTER;
+	/* first get the source colour */
+	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
+	write_sx_reg(p, SX_QUEUED(8), 0xff);
+	for (line = 0; line < height; line++) {
+		mskx = msk;
+		dstx = dst;
+#ifdef SX_SINGLE
+
+		for (x = 0; x < width; x++) {
+			m = *(volatile uint32_t *)(p->fb + mskx);
+			m = m >> 24;
+			if (m == 0) {
+				/* nothing to do - all transparent */
+			} else if (m == 0xff) {
+				/* all opaque */
+				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+			} else {
+				/* fetch alpha value, stick it into scam */
+				/* mask is in R[12:15] */
+				/*write_sx_io(p, mskx,
+				    SX_LDUQ0(12, 0, mskx & 7));*/
+				write_sx_reg(p, SX_QUEUED(12), m);
+				/* fetch dst pixel */
+				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_ORV(12, 0, R_SCAM, 0));
+				/*
+				 * src * alpha + R0
+				 * R[9:11] * SCAM + R0 -> R[17:19]
+				 */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_SAXP16X16SR8(9, 0, 17, 2));
+			
+				/* invert SCAM */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_XORV(12, 8, R_SCAM, 0));
+#ifdef SX_DEBUG
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_XORV(12, 8, 13, 0));
+#endif
+				/* dst * (1 - alpha) + R[13:15] */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_SAXP16X16SR8(21, 17, 25, 2));
+				write_sx_io(p, dstx,
+				    SX_STUQ0C(24, 0, dstx & 7));
+			}
+			dstx += 4;
+			mskx += 4;
+		}
+#else
+		for (x = 0; x < width; x += 4) {
+			/* fetch 4 mask values */
+			write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
+			/* fetch destination pixels */
+			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+			/* duplicate them for all channels */
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
+			/* generate inverted alpha */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_XORS(12, 8, 28, 15));
+			/* multiply source */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 12, 44, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 16, 48, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 20, 52, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 24, 56, 3));
+			/* multiply dest */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(28, 60, 76, 15));
+			/* add up */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(44, 76, 92, 15));
+			/* write back */
+			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+			dstx += 16;
+			mskx += 16;
+		}
+#endif
+		dst += dstpitch;
+		msk += srcpitch;
+	}
+}
+
+void CG14Comp_Over8(Cg14Ptr p,
+                   uint32_t src, uint32_t srcpitch,
+                   uint32_t dst, uint32_t dstpitch,
+                   int width, int height)
+{
+	uint32_t msk = src, mskx, dstx, m;
+	int line, x, i;
+#ifdef SX_DEBUG
+	char buffer[256];
+#endif
+	ENTER;
+
+	/* first get the source colour */
+	write_sx_io(p, p->srcoff, SX_LDUQ0(8, 0, p->srcoff & 7));
+	write_sx_reg(p, SX_QUEUED(8), 0xff);
+	DPRINTF(X_ERROR, "src: %d %d %d, %08x\n", read_sx_reg(p, SX_QUEUED(9)),
+	    read_sx_reg(p, SX_QUEUED(10)), read_sx_reg(p, SX_QUEUED(11)),
+	    *(uint32_t *)(p->fb + p->srcoff));
+	for (line = 0; line < height; line++) {
+		mskx = msk;
+		dstx = dst;
+#ifdef SX_SINGLE
+
+		for (x = 0; x < width; x++) {
+			m = *(volatile uint8_t *)(p->fb + mskx);
+#ifdef SX_DEBUG
+			buffer[x] = c[m >> 5];
+#endif
+			if (m == 0) {
+				/* nothing to do - all transparent */
+			} else if (m == 0xff) {
+				/* all opaque */
+				write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+			} else {
+				/* fetch alpha value, stick it into scam */
+				/* mask is in R[12:15] */
+				/*write_sx_io(p, mskx & ~7, 
+				    SX_LDB(12, 0, mskx & 7));*/
+				write_sx_reg(p, SX_QUEUED(12), m);
+				/* fetch dst pixel */
+				write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_ORV(12, 0, R_SCAM, 0));
+				/*
+				 * src * alpha + R0
+				 * R[9:11] * SCAM + R0 -> R[17:19]
+				 */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_SAXP16X16SR8(9, 0, 17, 2));
+			
+				/* invert SCAM */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_XORV(12, 8, R_SCAM, 0));
+#ifdef SX_DEBUG
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_XORV(12, 8, 13, 0));
+#endif
+				/* dst * (1 - alpha) + R[13:15] */
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_SAXP16X16SR8(21, 17, 25, 2));
+				write_sx_io(p, dstx,
+				    SX_STUQ0C(24, 0, dstx & 7));
+			}
+			dstx += 4;
+			mskx += 1;
+		}
+#ifdef SX_DEBUG
+		buffer[x] = 0;
+		xf86Msg(X_ERROR, "%s\n", buffer);
+#endif
+#else
+		for (x = 0; x < width; x += 4) {
+			/* fetch 4 mask values */
+			write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
+			/* fetch destination pixels */
+			write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+			/* duplicate them for all channels */
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));			
+			write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
+			/* generate inverted alpha */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_XORS(12, 8, 28, 15));
+			/* multiply source */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 12, 44, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 16, 48, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 20, 52, 3));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(8, 24, 56, 3));
+			/* multiply dest */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_MUL16X16SR8(28, 60, 76, 15));
+			/* add up */
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(44, 76, 92, 15));
+			/* write back */
+			write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+			dstx += 16;
+			mskx += 4;
+		}
+#endif
+		dst += dstpitch;
+		msk += srcpitch;
+	}
+}
+
+void CG14Comp_Add32(Cg14Ptr p,
+                   uint32_t src, uint32_t srcpitch,
+                   uint32_t dst, uint32_t dstpitch,
+                   int width, int height)
+{
+	int line;
+	uint32_t srcx, dstx;
+	int full, part, x;
+
+	ENTER;
+	full = width >> 3;	/* chunks of 8 */
+	part = width & 7;	/* leftovers */
+	/* we do this up to 8 pixels at a time */
+	for (line = 0; line < height; line++) {
+		srcx = src;
+		dstx = dst;
+		for (x = 0; x < full; x++) {
+			write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
+			write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(8, 40, 72, 15));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(24, 56, 88, 15));
+			write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
+			srcx += 128;
+			dstx += 128;
+		}
+
+		/* do leftovers */
+		write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
+		write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
+		if (part & 16) {
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(8, 40, 72, 15));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(24, 56, 88, part - 17));
+		} else {
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(8, 40, 72, part - 1));
+		}
+		write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
+		
+		/* next line */
+		src += srcpitch;
+		dst += dstpitch;
+	}
+}
+
+void CG14Comp_Add8(Cg14Ptr p,
+                   uint32_t src, uint32_t srcpitch,
+                   uint32_t dst, uint32_t dstpitch,
+                   int width, int height)
+{
+	int line;
+	uint32_t srcx, dstx, srcoff, dstoff;
+	int pre, full, part, x;
+	uint8_t *d;
+	char buffer[256];
+	ENTER;
+
+	srcoff = src & 7;
+	src &= ~7;
+	dstoff = dst & 7;
+	dst &= ~7;
+	full = width >> 5;	/* chunks of 32 */
+	part = width & 31;	/* leftovers */
+
+#ifdef SX_DEBUG
+	xf86Msg(X_ERROR, "%d %d, %d x %d, %d %d\n", srcpitch, dstpitch,
+	    width, height, full, part);
+#endif
+	/* we do this up to 32 pixels at a time */
+	for (line = 0; line < height; line++) {
+		srcx = src;
+		dstx = dst;
+#ifdef SX_ADD_SOFTWARE
+		uint8_t *s = (uint8_t *)(p->fb + srcx + srcoff); 
+		d = (uint8_t *)(p->fb + dstx + dstoff); 
+		for (x = 0; x < width; x++) {
+			d[x] = min(255, s[x] + d[x]);
+		}
+#else
+		for (x = 0; x < full; x++) {
+			write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
+			write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(8, 40, 72, 15));
+			write_sx_reg(p, SX_INSTRUCTIONS,
+			    SX_ADDV(24, 56, 88, 15));
+			write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
+			srcx += 32;
+			dstx += 32;
+		}
+
+		if (part > 0) {
+			/* do leftovers */
+			write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
+			write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
+			if (part > 16) {
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_ADDV(8, 40, 72, 15));
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_ADDV(24, 56, 88, part - 17));
+			} else {
+				write_sx_reg(p, SX_INSTRUCTIONS,
+				    SX_ADDV(8, 40, 72, part - 1));
+			}
+			write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
+		}
+#endif
+#ifdef SX_DEBUG
+		d = (uint8_t *)(p->fb + src + srcoff);
+		for (x = 0; x < width; x++) {
+			buffer[x] = c[d[x]>>5];
+		}
+		buffer[x] = 0;
+		xf86Msg(X_ERROR, "%s\n", buffer);
+#endif
+		/* next line */
+		src += srcpitch;
+		dst += dstpitch;
+	}
+}
+
+void
+CG14Composite(PixmapPtr pDst, int srcX, int srcY,
+                              int maskX, int maskY,
+                              int dstX, int dstY,
+                              int width, int height)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	uint32_t dstoff, dstpitch;
+	uint32_t dst, msk, src;
+
+	ENTER;
+	dstoff = exaGetPixmapOffset(pDst);		
+	dstpitch = exaGetPixmapPitch(pDst);
+
+	switch (p->op) {
+		case PictOpOver:
+			dst = dstoff + (dstY * dstpitch) + (dstX << 2);
+			DPRINTF(X_ERROR, "Over %08x %08x, %d %d\n",
+			    p->mskformat, p->dstformat, srcX, srcY);
+			switch (p->mskformat) {
+				case PICT_a8:
+					msk = p->mskoff + 
+					    (maskY * p->mskpitch) + maskX;
+					CG14Comp_Over8(p, msk, p->mskpitch,
+					    dst, dstpitch, width, height);
+					break;
+				case PICT_a8r8g8b8:
+				case PICT_a8b8g8r8:
+					msk = p->mskoff + 
+					    (maskY * p->mskpitch) + 
+					    (maskX << 2);
+					CG14Comp_Over32(p, msk, p->mskpitch,
+					    dst, dstpitch, width, height);
+					break;
+				default:
+					xf86Msg(X_ERROR,
+					    "unsupported mask format\n");
+			}				
+			break;
+		case PictOpAdd:
+			DPRINTF(X_ERROR, "Add %08x %08x\n",
+			    p->srcformat, p->dstformat);
+			switch (p->srcformat) {
+				case PICT_a8:
+					src = p->srcoff +
+					    (srcY * p->srcpitch) + srcX;
+					dst = dstoff + (dstY * dstpitch) + dstX;
+					CG14Comp_Add8(p, src, p->srcpitch,
+					    dst, dstpitch, width, height);
+					break;
+				case PICT_a8r8g8b8:
+				case PICT_x8r8g8b8:
+					src = p->srcoff +
+					    (srcY * p->srcpitch) + (srcX << 2);
+					dst = dstoff + (dstY * dstpitch) +
+					    (dstX << 2);
+					CG14Comp_Add32(p, src, p->srcpitch,
+					    dst, dstpitch, width, height);
+					break;
+				default:
+					xf86Msg(X_ERROR,
+					    "unsupported src format\n");
+			}
+			break;
+		default:
+			xf86Msg(X_ERROR, "unsupported op %d\n", p->op);
+	}
+	exaMarkSync(pDst->drawable.pScreen);
+}
+
+
+
+Bool
+CG14InitAccel(ScreenPtr pScreen)
+{
+	ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
+	Cg14Ptr p = GET_CG14_FROM_SCRN(pScrn);
+	ExaDriverPtr pExa;
+
+	pExa = exaDriverAlloc();
+	if (!pExa)
+		return FALSE;
+
+	p->pExa = pExa;
+
+	pExa->exa_major = EXA_VERSION_MAJOR;
+	pExa->exa_minor = EXA_VERSION_MINOR;
+
+	pExa->memoryBase = p->fb;
+	pExa->memorySize = p->memsize;
+	pExa->offScreenBase = p->width * p->height * 4;
+
+	/*
+	 * SX memory instructions are written to 64bit aligned addresses with
+	 * a 3 bit displacement. Make sure the displacement remains constant
+	 * within one column
+	 */
+	
+	pExa->pixmapOffsetAlign = 8;
+	pExa->pixmapPitchAlign = 8;
+
+	pExa->flags = EXA_OFFSCREEN_PIXMAPS |
+		      /*EXA_SUPPORTS_OFFSCREEN_OVERLAPS |*/
+		      EXA_MIXED_PIXMAPS;
+
+	/*
+	 * these limits are bogus
+	 * SX doesn't deal with coordinates at all, so there is no limit but
+	 * we have to put something here
+	 */
+	pExa->maxX = 4096;
+	pExa->maxY = 4096;
+
+	pExa->WaitMarker = CG14WaitMarker;
+
+	pExa->PrepareSolid = CG14PrepareSolid;
+	pExa->Solid = CG14Solid;
+	pExa->DoneSolid = CG14DoneCopy;
+	pExa->PrepareCopy = CG14PrepareCopy;
+	pExa->Copy = CG14Copy;
+	pExa->DoneCopy = CG14DoneCopy;
+	if (p->use_xrender) {
+		pExa->CheckComposite = CG14CheckComposite;
+		pExa->PrepareComposite = CG14PrepareComposite;
+		pExa->Composite = CG14Composite;
+		pExa->DoneComposite = CG14DoneCopy;
+	}
+
+	/* EXA hits more optimized paths when it does not have to fallback 
+	 * because of missing UTS/DFS, hook memcpy-based UTS/DFS.
+	 */
+	pExa->UploadToScreen = CG14UploadToScreen;
+	pExa->DownloadFromScreen = CG14DownloadFromScreen;
+
+	/* do some hardware init */
+	write_sx_reg(p, SX_PLANEMASK, 0xffffffff);
+	p->last_mask = 0xffffffff;
+	write_sx_reg(p, SX_ROP_CONTROL, 0xcc);
+	p->last_rop = 0xcc;
+	return exaDriverInit(pScreen, pExa);
+}

Reply via email to