Module Name: xsrc
Committed By: macallan
Date: Fri Dec 24 04:41:40 UTC 2021
Modified Files:
xsrc/external/mit/xf86-video-suncg14/dist/src: cg14.h cg14_accel.c
cg14_render.c
Log Message:
add macros to simplify issuing SX instructions, hide the address alignment /
displacement shenanigans SX needs, and make the code more resemble the
inline assembler source it arguably is
To generate a diff of this commit:
cvs rdiff -u -r1.14 -r1.15 \
xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
cvs rdiff -u -r1.26 -r1.27 \
xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
cvs rdiff -u -r1.13 -r1.14 \
xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.15
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h:1.14 Wed Jul 24 16:07:59 2019
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14.h Fri Dec 24 04:41:40 2021
@@ -135,6 +135,9 @@ write_sx_io(Cg14Ptr p, int reg, uint32_t
p->queuecount++;
}
+#define sxi(inst) write_sx_reg(p, SX_INSTRUCTIONS, (inst))
+#define sxm(inst, addr, reg, count) write_sx_io(p, (addr) & ~7, inst((reg), (count), (addr) & 7))
+
Bool CG14SetupCursor(ScreenPtr);
Bool CG14InitAccel(ScreenPtr);
Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.27
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c:1.26 Sun Dec 19 04:50:27 2021
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_accel.c Fri Dec 24 04:41:40 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_accel.c,v 1.26 2021/12/19 04:50:27 macallan Exp $ */
+/* $NetBSD: cg14_accel.c,v 1.27 2021/12/24 04:41:40 macallan Exp $ */
/*
* Copyright (c) 2013 Michael Lorenz
* All rights reserved.
@@ -184,10 +184,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
d = dststart;
while ( count < w) {
num = min(32, w - count);
- write_sx_io(p, s,
- SX_LD(10, num - 1, s & 7));
- write_sx_io(p, d,
- SX_STM(10, num - 1, d & 7));
+ sxm(SX_LD, s, 10, num - 1);
+ sxm(SX_STM, d, 10, num - 1);
s += xinc;
d += xinc;
count += 32;
@@ -203,10 +201,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
d = dststart;
count = w;
for (i = 0; i < chunks; i++) {
- write_sx_io(p, s,
- SX_LD(10, 31, s & 7));
- write_sx_io(p, d,
- SX_STM(10, 31, d & 7));
+ sxm(SX_LD, s, 10, 31);
+ sxm(SX_STM, d, 10, 31);
s -= 128;
d -= 128;
count -= 32;
@@ -215,10 +211,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
if (count > 0) {
s += (32 - count) << 2;
d += (32 - count) << 2;
- write_sx_io(p, s,
- SX_LD(10, count - 1, s & 7));
- write_sx_io(p, d,
- SX_STM(10, count - 1, d & 7));
+ sxm(SX_LD, s, 10, count - 1);
+ sxm(SX_STM, d, 10, count - 1);
}
srcstart += srcinc;
dststart += dstinc;
@@ -234,21 +228,15 @@ CG14Copy32(PixmapPtr pDstPixmap,
d = dststart;
while ( count < w) {
num = min(32, w - count);
- write_sx_io(p, s,
- SX_LD(10, num - 1, s & 7));
- write_sx_io(p, d,
- SX_LD(42, num - 1, d & 7));
+ sxm(SX_LD, s, 10, num - 1);
+ sxm(SX_LD, d, 42, num - 1);
if (num > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, num - 17));
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, num - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, num - 1));
+ sxi(SX_ROP(10, 42, 74, num - 1));
}
- write_sx_io(p, d,
- SX_STM(74, num - 1, d & 7));
+ sxm(SX_STM, d, 74, num - 1);
s += xinc;
d += xinc;
count += 32;
@@ -264,14 +252,11 @@ CG14Copy32(PixmapPtr pDstPixmap,
d = dststart;
count = w;
for (i = 0; i < chunks; i++) {
- write_sx_io(p, s, SX_LD(10, 31, s & 7));
- write_sx_io(p, d, SX_LD(42, 31, d & 7));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, 15));
- write_sx_io(p, d,
- SX_STM(74, 31, d & 7));
+ sxm(SX_LD, s, 10, 31);
+ sxm(SX_LD, d, 42, 31);
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, 15));
+ sxm(SX_STM, d, 74, 31);
s -= 128;
d -= 128;
count -= 32;
@@ -280,22 +265,15 @@ CG14Copy32(PixmapPtr pDstPixmap,
if (count > 0) {
s += (32 - count) << 2;
d += (32 - count) << 2;
- write_sx_io(p, s,
- SX_LD(10, count - 1, s & 7));
- write_sx_io(p, d,
- SX_LD(42, count - 1, d & 7));
+ sxm(SX_LD, s, 10, count - 1);
+ sxm(SX_LD, d, 42, count - 1);
if (count > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, count - 17));
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, count - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, count - 1));
+ sxi(SX_ROP(10, 42, 74, count - 1));
}
-
- write_sx_io(p, d,
- SX_STM(74, count - 1, d & 7));
+ sxm(SX_STM, d, 74, count - 1);
}
srcstart += srcinc;
dststart += dstinc;
@@ -309,7 +287,8 @@ CG14Copy32(PixmapPtr pDstPixmap,
* copy with same alignment, left to right, no ROP
*/
static void
-CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_aligned_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+ int srcpitch, int dstpitch)
{
int saddr, daddr, pre, cnt, wrds;
@@ -324,8 +303,8 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s
daddr = dststart;
cnt = w;
if (pre > 0) {
- write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_STB(8, pre - 1, daddr & 7));
+ sxm(SX_LDB, saddr, 8, pre - 1);
+ sxm(SX_STB, daddr, 8, pre - 1);
saddr += pre;
daddr += pre;
cnt -= pre;
@@ -333,15 +312,15 @@ CG14Copy8_aligned_norop(Cg14Ptr p, int s
}
while (cnt > 3) {
wrds = min(32, cnt >> 2);
- write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+ sxm(SX_LD, saddr, 8, wrds - 1);
+ sxm(SX_ST, daddr, 8, wrds - 1);
saddr += wrds << 2;
daddr += wrds << 2;
cnt -= wrds << 2;
}
if (cnt > 0) {
- write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_STB(8, cnt - 1, daddr & 7));
+ sxm(SX_LDB, saddr, 8, cnt - 1);
+ sxm(SX_STB, daddr, 8, cnt - 1);
}
next:
srcstart += srcpitch;
@@ -354,7 +333,8 @@ next:
* copy with same alignment, left to right, ROP
*/
static void
-CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_aligned_rop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+ int srcpitch, int dstpitch)
{
int saddr, daddr, pre, cnt, wrds;
@@ -369,10 +349,10 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src
daddr = dststart;
cnt = w;
if (pre > 0) {
- write_sx_io(p, saddr & ~7, SX_LDB(8, pre - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_LDB(40, pre - 1, daddr & 7));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, pre - 1));
- write_sx_io(p, daddr & ~7, SX_STB(72, pre - 1, daddr & 7));
+ sxm(SX_LDB, saddr, 8, pre - 1);
+ sxm(SX_LDB, daddr, 40, pre - 1);
+ sxi(SX_ROP(8, 40, 72, pre - 1));
+ sxm(SX_STB, daddr, 72, pre - 1);
saddr += pre;
daddr += pre;
cnt -= pre;
@@ -380,23 +360,23 @@ CG14Copy8_aligned_rop(Cg14Ptr p, int src
}
while (cnt > 3) {
wrds = min(32, cnt >> 2);
- write_sx_io(p, saddr & ~7, SX_LD(8, wrds - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_LD(40, wrds - 1, daddr & 7));
+ sxm(SX_LD, saddr, 8, wrds - 1);
+ sxm(SX_LD, daddr, 40, wrds - 1);
if (cnt > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 56, 88, wrds - 17));
+ sxi(SX_ROP(8, 40, 72, 15));
+ sxi(SX_ROP(8, 56, 88, wrds - 17));
} else
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, wrds - 1));
- write_sx_io(p, daddr & ~7, SX_ST(72, wrds - 1, daddr & 7));
+ sxi(SX_ROP(8, 40, 72, wrds - 1));
+ sxm(SX_ST, daddr, 72, wrds - 1);
saddr += wrds << 2;
daddr += wrds << 2;
cnt -= wrds << 2;
}
if (cnt > 0) {
- write_sx_io(p, saddr & ~7, SX_LDB(8, cnt - 1, saddr & 7));
- write_sx_io(p, daddr & ~7, SX_LDB(40, cnt - 1, daddr & 7));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(8, 40, 72, cnt - 1));
- write_sx_io(p, daddr & ~7, SX_STB(72, cnt - 1, daddr & 7));
+ sxm(SX_LDB, saddr, 8, cnt - 1);
+ sxm(SX_LDB, daddr, 40, cnt - 1);
+ sxi(SX_ROP(8, 40, 72, cnt - 1));
+ sxm(SX_STB, daddr, 72, cnt - 1);
}
next:
srcstart += srcpitch;
@@ -459,38 +439,38 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
daddr = dststart & ~3;
while (h > 0) {
- write_sx_io(p, daddr & ~7, SX_LD(80, wrds - 1, daddr & 7));
- write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7));
+ sxm(SX_LD, daddr, 80, wrds - 1);
+ sxm(SX_LD, saddr, sreg, swrds - 1);
if (wrds > 15) {
if (dist != 0) {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
+ sxi(SX_FUNNEL_I(8, dist, 40, 15));
+ sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16));
/* shifted source pixels are now at register 40+ */
ssreg = 40;
} else ssreg = 8;
if (pre != 0) {
/* mask out leading junk */
write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
+ sxi(SX_ROPB(ssreg, 80, 8, 0));
write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, 14));
+ sxi(SX_ROPB(ssreg + 1, 81, 9, 14));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 15));
+ sxi(SX_ROPB(ssreg, 80, 8, 15));
}
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 16, 96, 24, wrds - 16));
+ sxi(SX_ROPB(ssreg + 16, 96, 24, wrds - 16));
} else {
if (dist != 0) {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
+ sxi(SX_FUNNEL_I(8, dist, 40, wrds));
ssreg = 40;
} else ssreg = 8;
if (pre != 0) {
/* mask out leading junk */
write_sx_reg(p, SX_QUEUED(R_MASK), lmask);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, 0));
+ sxi(SX_ROPB(ssreg, 80, 8, 0));
write_sx_reg(p, SX_QUEUED(R_MASK), 0xffffffff);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg + 1, 81, 9, wrds));
+ sxi(SX_ROPB(ssreg + 1, 81, 9, wrds));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, 8, wrds));
+ sxi(SX_ROPB(ssreg, 80, 8, wrds));
}
}
if (post != 0) {
@@ -502,15 +482,15 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
* the left end but it's less annoying this way and
* the instruction count is the same
*/
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(7 + wrds, 7, 5, 0));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(79 + wrds, 6, 4, 0));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, 7 + wrds, 0));
+ sxi(SX_ANDS(7 + wrds, 7, 5, 0));
+ sxi(SX_ANDS(79 + wrds, 6, 4, 0));
+ sxi(SX_ORS(5, 4, 7 + wrds, 0));
}
#ifdef DEBUG
- write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7));
+ sxm(SX_ST, taddr, 40, wrds - 1);
taddr += dstpitch;
#endif
- write_sx_io(p, daddr & ~7, SX_ST(8, wrds - 1, daddr & 7));
+ sxm(SX_ST, daddr, 8, wrds - 1);
saddr += srcpitch;
daddr += dstpitch;
h--;
@@ -519,7 +499,8 @@ CG14Copy8_short_rop(Cg14Ptr p, int srcst
/* up to 124 pixels so direction doesn't matter, unaligned, straight copy */
static void
-CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h, int srcpitch, int dstpitch)
+CG14Copy8_short_norop(Cg14Ptr p, int srcstart, int dststart, int w, int h,
+ int srcpitch, int dstpitch)
{
int saddr, daddr, pre, dist, wrds, swrds, spre, sreg, restaddr, post;
int ssreg;
@@ -571,30 +552,30 @@ CG14Copy8_short_norop(Cg14Ptr p, int src
daddr = dststart & ~3;
while (h > 0) {
- write_sx_io(p, saddr & ~7, SX_LD(sreg, swrds - 1, saddr & 7));
+ sxm(SX_LD, saddr, sreg, swrds - 1);
if (wrds > 15) {
if (dist != 0) {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, 15));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(24, dist, 56, wrds - 16));
- /* shifted source pixels are now at register 40+ */
+ sxi(SX_FUNNEL_I(8, dist, 40, 15));
+ sxi(SX_FUNNEL_I(24, dist, 56, wrds - 16));
+ /* shifted source pixels are now at reg 40+ */
ssreg = 40;
} else ssreg = 8;
if (pre != 0) {
/* read only the first word */
- write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7));
+ sxm(SX_LD, daddr, 80, 0);
/* mask out leading junk */
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0));
+ sxi(SX_ROPB(ssreg, 80, ssreg, 0));
}
} else {
if (dist != 0) {
- write_sx_reg(p, SX_INSTRUCTIONS, SX_FUNNEL_I(8, dist, 40, wrds));
+ sxi(SX_FUNNEL_I(8, dist, 40, wrds));
ssreg = 40;
} else ssreg = 8;
if (pre != 0) {
/* read only the first word */
- write_sx_io(p, daddr & ~7, SX_LD(80, 0, daddr & 7));
+ sxm(SX_LD, daddr, 80, 0);
/* mask out leading junk */
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROPB(ssreg, 80, ssreg, 0));
+ sxi(SX_ROPB(ssreg, 80, ssreg, 0));
}
}
if (post != 0) {
@@ -607,16 +588,16 @@ CG14Copy8_short_norop(Cg14Ptr p, int src
* the left end but it's less annoying this way and
* the instruction count is the same
*/
- write_sx_io(p, laddr & ~7, SX_LD(81, 0, laddr & 7));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(ssreg + wrds - 1, 7, 5, 0));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ANDS(81, 6, 4, 0));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(5, 4, ssreg + wrds - 1, 0));
+ sxm(SX_LD, laddr, 81, 0);
+ sxi(SX_ANDS(ssreg + wrds - 1, 7, 5, 0));
+ sxi(SX_ANDS(81, 6, 4, 0));
+ sxi(SX_ORS(5, 4, ssreg + wrds - 1, 0));
}
#ifdef DEBUG
- write_sx_io(p, taddr & ~7, SX_ST(40, wrds - 1, taddr & 7));
+ sxm(SX_ST, taddr, 40, wrds - 1);
taddr += dstpitch;
#endif
- write_sx_io(p, daddr & ~7, SX_ST(ssreg, wrds - 1, daddr & 7));
+ sxm(SX_ST, daddr, ssreg, wrds - 1);
saddr += srcpitch;
daddr += dstpitch;
h--;
@@ -663,10 +644,12 @@ CG14Copy8(PixmapPtr pDstPixmap,
if ((w < 125) && (w > 8)) {
switch (p->last_rop) {
case 0xcc:
- CG14Copy8_short_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ CG14Copy8_short_norop(p,
+ srcstart, dststart, w, h, srcinc, dstinc);
break;
default:
- CG14Copy8_short_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ CG14Copy8_short_rop(p,
+ srcstart, dststart, w, h, srcinc, dstinc);
}
return;
}
@@ -687,10 +670,12 @@ CG14Copy8(PixmapPtr pDstPixmap,
if (((srcstart & 3) == (dststart & 3)) && (xinc > 0)) {
switch (p->last_rop) {
case 0xcc:
- CG14Copy8_aligned_norop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ CG14Copy8_aligned_norop(p,
+ srcstart, dststart, w, h, srcinc, dstinc);
break;
default:
- CG14Copy8_aligned_rop(p, srcstart, dststart, w, h, srcinc, dstinc);
+ CG14Copy8_aligned_rop(p,
+ srcstart, dststart, w, h, srcinc, dstinc);
}
return;
}
@@ -706,18 +691,22 @@ CG14Copy8(PixmapPtr pDstPixmap,
*/
if (w > 8) {
int next, wi, end = dststart + w;
- DPRINTF(X_ERROR, "%s %08x %08x %d\n", __func__, srcstart, dststart, w);
+ DPRINTF(X_ERROR, "%s %08x %08x %d\n",
+ __func__, srcstart, dststart, w);
if ((p->xdir < 0) && (srcoff == dstoff)) {
srcstart += w;
next = max((end - 120) & ~3, dststart);
wi = end - next;
srcstart -= wi;
while (wi > 0) {
- DPRINTF(X_ERROR, "%s RL %08x %08x %d\n", __func__, srcstart, next, wi);
+ DPRINTF(X_ERROR, "%s RL %08x %08x %d\n",
+ __func__, srcstart, next, wi);
if (p->last_rop == 0xcc) {
- CG14Copy8_short_norop(p, srcstart, next, wi, h, srcinc, dstinc);
+ CG14Copy8_short_norop(p, srcstart,
+ next, wi, h, srcinc, dstinc);
} else
- CG14Copy8_short_rop(p, srcstart, next, wi, h, srcinc, dstinc);
+ CG14Copy8_short_rop(p, srcstart,
+ next, wi, h, srcinc, dstinc);
end = next;
/*
* avoid extremely narrow copies so I don't
@@ -736,11 +725,16 @@ CG14Copy8(PixmapPtr pDstPixmap,
next = min(end, (dststart + 124) & ~3);
wi = next - dststart;
while (wi > 0) {
- DPRINTF(X_ERROR, "%s LR %08x %08x %d\n", __func__, srcstart, next, wi);
+ DPRINTF(X_ERROR, "%s LR %08x %08x %d\n",
+ __func__, srcstart, next, wi);
if (p->last_rop == 0xcc) {
- CG14Copy8_short_norop(p, srcstart, dststart, wi, h, srcinc, dstinc);
+ CG14Copy8_short_norop(p,
+ srcstart, dststart, wi, h,
+ srcinc, dstinc);
} else
- CG14Copy8_short_rop(p, srcstart, dststart, wi, h, srcinc, dstinc);
+ CG14Copy8_short_rop(p,
+ srcstart, dststart, wi, h,
+ srcinc, dstinc);
srcstart += wi;
dststart = next;
if ((end - dststart) < 140) {
@@ -769,10 +763,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
d = dststart;
while ( count < w) {
num = min(32, w - count);
- write_sx_io(p, s,
- SX_LDB(10, num - 1, s & 7));
- write_sx_io(p, d,
- SX_STBM(10, num - 1, d & 7));
+ sxm(SX_LDB, s, 10, num - 1);
+ sxm(SX_STBM, d, 10, num - 1);
s += xinc;
d += xinc;
count += 32;
@@ -788,10 +780,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
d = dststart;
count = w;
for (i = 0; i < chunks; i++) {
- write_sx_io(p, s,
- SX_LDB(10, 31, s & 7));
- write_sx_io(p, d,
- SX_STBM(10, 31, d & 7));
+ sxm(SX_LDB, s, 10, 31);
+ sxm(SX_STBM, d, 10, 31);
s -= 32;
d -= 32;
count -= 32;
@@ -800,10 +790,8 @@ CG14Copy8(PixmapPtr pDstPixmap,
if (count > 0) {
s += (32 - count);
d += (32 - count);
- write_sx_io(p, s,
- SX_LDB(10, count - 1, s & 7));
- write_sx_io(p, d,
- SX_STBM(10, count - 1, d & 7));
+ sxm(SX_LDB, s, 10, count - 1);
+ sxm(SX_STBM, d, 10, count - 1);
}
srcstart += srcinc;
dststart += dstinc;
@@ -819,21 +807,15 @@ CG14Copy8(PixmapPtr pDstPixmap,
d = dststart;
while ( count < w) {
num = min(32, w - count);
- write_sx_io(p, s,
- SX_LDB(10, num - 1, s & 7));
- write_sx_io(p, d,
- SX_LDB(42, num - 1, d & 7));
+ sxm(SX_LDB, s, 10, num - 1);
+ sxm(SX_LDB, d, 42, num - 1);
if (num > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, num - 17));
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, num - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, num - 1));
+ sxi(SX_ROP(10, 42, 74, num - 1));
}
- write_sx_io(p, d,
- SX_STBM(74, num - 1, d & 7));
+ sxm(SX_STBM, d, 74, num - 1);
s += xinc;
d += xinc;
count += 32;
@@ -849,14 +831,11 @@ CG14Copy8(PixmapPtr pDstPixmap,
d = dststart;
count = w;
for (i = 0; i < chunks; i++) {
- write_sx_io(p, s, SX_LDB(10, 31, s & 7));
- write_sx_io(p, d, SX_LDB(42, 31, d & 7));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, 15));
- write_sx_io(p, d,
- SX_STBM(74, 31, d & 7));
+ sxm(SX_LDB, s, 10, 31);
+ sxm(SX_LDB, d, 42, 31);
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, 15));
+ sxm(SX_STBM, d, 74, 31);
s -= 128;
d -= 128;
count -= 32;
@@ -865,22 +844,15 @@ CG14Copy8(PixmapPtr pDstPixmap,
if (count > 0) {
s += (32 - count);
d += (32 - count);
- write_sx_io(p, s,
- SX_LDB(10, count - 1, s & 7));
- write_sx_io(p, d,
- SX_LDB(42, count - 1, d & 7));
+ sxm(SX_LDB, s, 10, count - 1);
+ sxm(SX_LDB, d, 42, count - 1);
if (count > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(26, 58, 90, count - 17));
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(26, 58, 90, count - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, count - 1));
+ sxi(SX_ROP(10, 42, 74, count - 1));
}
-
- write_sx_io(p, d,
- SX_STBM(74, count - 1, d & 7));
+ sxm(SX_STBM, d, 74, count - 1);
}
srcstart += srcinc;
dststart += dstinc;
@@ -956,8 +928,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
while (x < w) {
ptr = start + (x << 2);
num = min(32, w - x);
- write_sx_io(p, ptr,
- SX_STS(8, num - 1, ptr & 7));
+ sxm(SX_STS, ptr, 8, num - 1);
x += 32;
}
start += pitch;
@@ -969,8 +940,7 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
/* alright, let's do actual ROP stuff */
/* first repeat the fill colour into 16 registers */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SELECT_S(8, 8, 10, 15));
+ sxi(SX_SELECT_S(8, 8, 10, 15));
for (line = 0; line < h; line++) {
x = 0;
@@ -978,24 +948,19 @@ CG14Solid32(Cg14Ptr p, uint32_t start, u
ptr = start + (x << 2);
num = min(32, w - x);
/* now suck fb data into registers */
- write_sx_io(p, ptr,
- SX_LD(42, num - 1, ptr & 7));
+ sxm(SX_LD, ptr, 42, num - 1);
/*
* ROP them with the fill data we left in 10
* non-memory ops can only have counts up to 16
*/
if (num <= 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, num - 1));
+ sxi(SX_ROP(10, 42, 74, num - 1));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 58, 90, num - 17));
+ sxi(SX_ROP(10, 42, 74, 15));
+ sxi(SX_ROP(10, 58, 90, num - 17));
}
/* and write the result back into memory */
- write_sx_io(p, ptr,
- SX_ST(74, num - 1, ptr & 7));
+ sxm(SX_ST, ptr, 74, num - 1);
x += 32;
}
start += pitch;
@@ -1020,7 +985,7 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui
cnt = w;
pre = min(pre, cnt);
if (pre) {
- write_sx_io(p, ptr & ~7, SX_STBS(8, pre - 1, ptr & 7));
+ sxm(SX_STBS, ptr, 8, pre - 1);
ptr += pre;
cnt -= pre;
if (cnt == 0) goto next;
@@ -1029,13 +994,13 @@ CG14Solid8(Cg14Ptr p, uint32_t start, ui
if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
while(cnt > 3) {
num = min(32, cnt >> 2);
- write_sx_io(p, ptr & ~7, SX_STS(8, num - 1, ptr & 7));
+ sxm(SX_STS, ptr, 8, num - 1);
ptr += num << 2;
cnt -= num << 2;
}
if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
if (cnt > 0) {
- write_sx_io(p, ptr & ~7, SX_STBS(8, cnt - 1, ptr & 7));
+ sxm(SX_STBS, ptr, 8, cnt - 1);
}
if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
next:
@@ -1048,17 +1013,16 @@ next:
/* alright, let's do actual ROP stuff */
/* first repeat the fill colour into 16 registers */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SELECT_S(8, 8, 10, 15));
+ sxi(SX_SELECT_S(8, 8, 10, 15));
for (line = 0; line < h; line++) {
ptr = start;
cnt = w;
pre = min(pre, cnt);
if (pre) {
- write_sx_io(p, ptr & ~7, SX_LDB(26, pre - 1, ptr & 7));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, pre - 1));
- write_sx_io(p, ptr & ~7, SX_STB(42, pre - 1, ptr & 7));
+ sxm(SX_LDB, ptr, 26, pre - 1);
+ sxi(SX_ROP(10, 26, 42, pre - 1));
+ sxm(SX_STB, ptr, 42, pre - 1);
ptr += pre;
cnt -= pre;
if (cnt == 0) goto next2;
@@ -1067,25 +1031,22 @@ next:
if (ptr & 3) xf86Msg(X_ERROR, "%s %x\n", __func__, ptr);
while(cnt > 3) {
num = min(32, cnt >> 2);
- write_sx_io(p, ptr & ~7, SX_LD(26, num - 1, ptr & 7));
+ sxm(SX_LD, ptr, 26, num - 1);
if (num <= 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 26, 58, num - 1));
+ sxi(SX_ROP(10, 26, 58, num - 1));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 26, 58, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ROP(10, 42, 74, num - 17));
+ sxi(SX_ROP(10, 26, 58, 15));
+ sxi(SX_ROP(10, 42, 74, num - 17));
}
- write_sx_io(p, ptr & ~7, SX_ST(58, num - 1, ptr & 7));
+ sxm(SX_ST, ptr, 58, num - 1);
ptr += num << 2;
cnt -= num << 2;
}
if (cnt > 3) xf86Msg(X_ERROR, "%s cnt %d\n", __func__, cnt);
if (cnt > 0) {
- write_sx_io(p, ptr & ~7, SX_LDB(26, cnt - 1, ptr & 7));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ROP(10, 26, 42, cnt - 1));
- write_sx_io(p, ptr & ~7, SX_STB(42, cnt - 1, ptr & 7));
+ sxm(SX_LDB, ptr, 26, cnt - 1);
+ sxi(SX_ROP(10, 26, 42, cnt - 1));
+ sxm(SX_STB, ptr, 42, cnt - 1);
}
if ((ptr + cnt) != (start + w)) xf86Msg(X_ERROR, "%s %x vs %x\n", __func__, ptr + cnt, start + w);
next2:
Index: xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c
diff -u xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13 xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.14
--- xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c:1.13 Wed Jul 24 16:07:59 2019
+++ xsrc/external/mit/xf86-video-suncg14/dist/src/cg14_render.c Fri Dec 24 04:41:40 2021
@@ -1,4 +1,4 @@
-/* $NetBSD: cg14_render.c,v 1.13 2019/07/24 16:07:59 macallan Exp $ */
+/* $NetBSD: cg14_render.c,v 1.14 2021/12/24 04:41:40 macallan Exp $ */
/*
* Copyright (c) 2013 Michael Lorenz
* All rights reserved.
@@ -75,37 +75,30 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
for (x = 0; x < width; x += 4) {
rest = width - x;
/* fetch 4 mask values */
- write_sx_io(p, mskx, SX_LDUQ0(12, 3, mskx & 7));
+ sxm(SX_LDUQ0, mskx, 12, 3);
/* fetch destination pixels */
- write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 60, 3);
/* duplicate them for all channels */
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 16, 17, 2));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 20, 21, 2));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 24, 25, 2));
+ sxi(SX_ORS(0, 12, 13, 2));
+ sxi(SX_ORS(0, 16, 17, 2));
+ sxi(SX_ORS(0, 20, 21, 2));
+ sxi(SX_ORS(0, 24, 25, 2));
/* generate inverted alpha */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(12, 8, 28, 15));
+ sxi(SX_XORS(12, 8, 28, 15));
/* multiply source */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 12, 44, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 16, 48, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 20, 52, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 24, 56, 3));
+ sxi(SX_MUL16X16SR8(8, 12, 44, 3));
+ sxi(SX_MUL16X16SR8(8, 16, 48, 3));
+ sxi(SX_MUL16X16SR8(8, 20, 52, 3));
+ sxi(SX_MUL16X16SR8(8, 24, 56, 3));
/* multiply dest */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(28, 60, 76, 15));
+ sxi(SX_MUL16X16SR8(28, 60, 76, 15));
/* add up */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(44, 76, 92, 15));
+ sxi(SX_ADDV(44, 76, 92, 15));
/* write back */
if (rest < 4) {
- write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 92, rest - 1);
} else {
- write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 92, 3);
}
dstx += 16;
mskx += 16;
@@ -118,7 +111,7 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
/* nothing to do - all transparent */
} else if (m == 0xff) {
/* all opaque */
- write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+ sxm(SX_STUQ0, dstx, 8, 0);
} else {
/* fetch alpha value, stick it into scam */
/* mask is in R[12:15] */
@@ -126,28 +119,22 @@ void CG14Comp_Over32Solid(Cg14Ptr p,
SX_LDUQ0(12, 0, mskx & 7));*/
write_sx_reg(p, SX_QUEUED(12), m);
/* fetch dst pixel */
- write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ORV(12, 0, R_SCAM, 0));
+ sxm(SX_LDUQ0, dstx, 20, 0);
+ sxi(SX_ORV(12, 0, R_SCAM, 0));
/*
* src * alpha + R0
* R[9:11] * SCAM + R0 -> R[17:19]
*/
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(9, 0, 17, 2));
+ sxi(SX_SAXP16X16SR8(9, 0, 17, 2));
/* invert SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORV(12, 8, R_SCAM, 0));
+ sxi(SX_XORV(12, 8, R_SCAM, 0));
#ifdef SX_DEBUG
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORV(12, 8, 13, 0));
+ sxi(SX_XORV(12, 8, 13, 0));
#endif
/* dst * (1 - alpha) + R[13:15] */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(21, 17, 25, 2));
- write_sx_io(p, dstx,
- SX_STUQ0C(24, 0, dstx & 7));
+ sxi(SX_SAXP16X16SR8(21, 17, 25, 2));
+ sxm(SX_STUQ0C, dstx, 24, 0);
}
dstx += 4;
mskx += 4;
@@ -181,37 +168,30 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
for (x = 0; x < width; x += 4) {
rest = width - x;
/* fetch 4 mask values */
- write_sx_io(p, mskx, SX_LDB(12, 3, mskx & 7));
+ sxm(SX_LDB, mskx, 12, 3);
/* fetch destination pixels */
- write_sx_io(p, dstx, SX_LDUQ0(60, 3, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 60, 3);
/* duplicate them for all channels */
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 13, 16, 3));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 14, 20, 3));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 15, 24, 3));
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(0, 12, 13, 2));
+ sxi(SX_ORS(0, 13, 16, 3));
+ sxi(SX_ORS(0, 14, 20, 3));
+ sxi(SX_ORS(0, 15, 24, 3));
+ sxi(SX_ORS(0, 12, 13, 2));
/* generate inverted alpha */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(12, 8, 28, 15));
+ sxi(SX_XORS(12, 8, 28, 15));
/* multiply source */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 12, 44, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 16, 48, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 20, 52, 3));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(8, 24, 56, 3));
+ sxi(SX_MUL16X16SR8(8, 12, 44, 3));
+ sxi(SX_MUL16X16SR8(8, 16, 48, 3));
+ sxi(SX_MUL16X16SR8(8, 20, 52, 3));
+ sxi(SX_MUL16X16SR8(8, 24, 56, 3));
/* multiply dest */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_MUL16X16SR8(28, 60, 76, 15));
+ sxi(SX_MUL16X16SR8(28, 60, 76, 15));
/* add up */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(44, 76, 92, 15));
+ sxi(SX_ADDV(44, 76, 92, 15));
/* write back */
if (rest < 4) {
- write_sx_io(p, dstx, SX_STUQ0C(92, rest - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 92, rest - 1);
} else {
- write_sx_io(p, dstx, SX_STUQ0C(92, 3, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 92, 3);
}
dstx += 16;
mskx += 4;
@@ -226,7 +206,7 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
/* nothing to do - all transparent */
} else if (m == 0xff) {
/* all opaque */
- write_sx_io(p, dstx, SX_STUQ0(8, 0, dstx & 7));
+ sxm(SX_STUQ0, dstx, 8, 0);
} else {
/* fetch alpha value, stick it into scam */
/* mask is in R[12:15] */
@@ -234,28 +214,22 @@ void CG14Comp_Over8Solid(Cg14Ptr p,
SX_LDB(12, 0, mskx & 7));*/
write_sx_reg(p, SX_QUEUED(12), m);
/* fetch dst pixel */
- write_sx_io(p, dstx, SX_LDUQ0(20, 0, dstx & 7));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ORV(12, 0, R_SCAM, 0));
+ sxm(SX_LDUQ0, dstx, 20, 0);
+ sxi(SX_ORV(12, 0, R_SCAM, 0));
/*
* src * alpha + R0
* R[9:11] * SCAM + R0 -> R[17:19]
*/
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(9, 0, 17, 2));
+ sxi(SX_SAXP16X16SR8(9, 0, 17, 2));
/* invert SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORV(12, 8, R_SCAM, 0));
+ sxi(SX_XORV(12, 8, R_SCAM, 0));
#ifdef SX_DEBUG
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORV(12, 8, 13, 0));
+ sxi(SX_XORV(12, 8, 13, 0));
#endif
/* dst * (1 - alpha) + R[13:15] */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(21, 17, 25, 2));
- write_sx_io(p, dstx,
- SX_STUQ0C(24, 0, dstx & 7));
+ sxi(SX_SAXP16X16SR8(21, 17, 25, 2));
+ sxm(SX_STUQ0C, dstx, 24, 0);
}
dstx += 4;
mskx += 1;
@@ -287,30 +261,25 @@ void CG14Comp_Add32(Cg14Ptr p,
srcx = src;
dstx = dst;
for (x = 0; x < full; x++) {
- write_sx_io(p, srcx, SX_LDUQ0(8, 31, srcx & 7));
- write_sx_io(p, dstx, SX_LDUQ0(40, 31, dstx & 7));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, 15));
- write_sx_io(p, dstx, SX_STUQ0(72, 31, dstx & 7));
+ sxm(SX_LDUQ0, srcx, 8, 31);
+ sxm(SX_LDUQ0, dstx, 40, 31);
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, 15));
+ sxm(SX_STUQ0, dstx, 72, 31);
srcx += 128;
dstx += 128;
}
/* do leftovers */
- write_sx_io(p, srcx, SX_LDUQ0(8, part - 1, srcx & 7));
- write_sx_io(p, dstx, SX_LDUQ0(40, part - 1, dstx & 7));
+ sxm(SX_LDUQ0, srcx, 8, part - 1);
+ sxm(SX_LDUQ0, dstx, 40, part - 1);
if (part & 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, part - 17));
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, part - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, part - 1));
+ sxi(SX_ADDV(8, 40, 72, part - 1));
}
- write_sx_io(p, dstx, SX_STUQ0(72, part - 1, dstx & 7));
+ sxm(SX_STUQ0, dstx, 72, part - 1);
/* next line */
src += srcpitch;
@@ -355,10 +324,8 @@ void CG14Comp_Add8(Cg14Ptr p,
for (x = 0; x < full; x++) {
write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
write_sx_io(p, dstx, SX_LDB(40, 31, dstoff));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, 15));
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, 15));
write_sx_io(p, dstx, SX_STBC(72, 31, dstoff));
srcx += 32;
dstx += 32;
@@ -369,13 +336,10 @@ void CG14Comp_Add8(Cg14Ptr p,
write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
write_sx_io(p, dstx, SX_LDB(40, part - 1, dstoff));
if (part > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, part - 17));
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, part - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, part - 1));
+ sxi(SX_ADDV(8, 40, 72, part - 1));
}
write_sx_io(p, dstx, SX_STBC(72, part - 1, dstoff));
}
@@ -426,10 +390,8 @@ void CG14Comp_Add8_32(Cg14Ptr p,
write_sx_io(p, srcx, SX_LDB(8, 31, srcoff));
/* load alpha from destination */
write_sx_io(p, dstx, SX_LDUC0(40, 31, dstoff));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, 15));
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, 15));
/* write clamped values back into dest alpha */
write_sx_io(p, dstx, SX_STUC0C(72, 31, dstoff));
srcx += 32;
@@ -441,13 +403,10 @@ void CG14Comp_Add8_32(Cg14Ptr p,
write_sx_io(p, srcx, SX_LDB(8, part - 1, srcoff));
write_sx_io(p, dstx, SX_LDUC0(40, part - 1, dstoff));
if (part > 16) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, 15));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(24, 56, 88, part - 17));
+ sxi(SX_ADDV(8, 40, 72, 15));
+ sxi(SX_ADDV(24, 56, 88, part - 17));
} else {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ADDV(8, 40, 72, part - 1));
+ sxi(SX_ADDV(8, 40, 72, part - 1));
}
write_sx_io(p, dstx, SX_STUC0C(72, part - 1, dstoff));
}
@@ -488,31 +447,24 @@ void CG14Comp_Over32(Cg14Ptr p,
continue;
}
/* fetch source pixels */
- write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+ sxm(SX_LDUQ0, srcx, 12, num - 1);
if (flip) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(13, 4, 40, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(15, 4, 44, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(40, 4, 15, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(44, 4, 13, num - 1));
+ sxi(SX_GATHER(13, 4, 40, num - 1));
+ sxi(SX_GATHER(15, 4, 44, num - 1));
+ sxi(SX_SCATTER(40, 4, 15, num - 1));
+ sxi(SX_SCATTER(44, 4, 13, num - 1));
}
/* fetch dst pixels */
- write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 44, num - 1);
/* now process up to 4 pixels */
for (i = 0; i < num; i++) {
int ii = i << 2;
/* write inverted alpha into SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(12 + ii, 8, R_SCAM, 0));
+ sxi(SX_XORS(12 + ii, 8, R_SCAM, 0));
/* dst * (1 - alpha) + src */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3));
+ sxi(SX_SAXP16X16SR8(44 + ii, 12 + ii, 76 + ii, 3));
}
- write_sx_io(p, dstx,
- SX_STUQ0C(76, num - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 76, num - 1);
srcx += 16;
dstx += 16;
}
@@ -546,39 +498,30 @@ void CG14Comp_Over32Mask(Cg14Ptr p,
continue;
}
/* fetch source pixels */
- write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+ sxm(SX_LDUQ0, srcx, 12, num - 1);
if (flip) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(13, 4, 40, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(15, 4, 44, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(40, 4, 15, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(44, 4, 13, num - 1));
+ sxi(SX_GATHER(13, 4, 40, num - 1));
+ sxi(SX_GATHER(15, 4, 44, num - 1));
+ sxi(SX_SCATTER(40, 4, 15, num - 1));
+ sxi(SX_SCATTER(44, 4, 13, num - 1));
}
/* fetch mask */
- write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7));
+ sxm(SX_LDB, mskx, 28, num - 1);
/* fetch dst pixels */
- write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 44, num - 1);
/* now process up to 4 pixels */
for (i = 0; i < num; i++) {
int ii = i << 2;
/* mask alpha to SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ORS(28 + i, 0, R_SCAM, 0));
+ sxi(SX_ORS(28 + i, 0, R_SCAM, 0));
/* src * alpha */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+ sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
/* write inverted alpha into SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(28 + i, 8, R_SCAM, 0));
+ sxi(SX_XORS(28 + i, 8, R_SCAM, 0));
/* dst * (1 - alpha) + R[60:] */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+ sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
}
- write_sx_io(p, dstx,
- SX_STUQ0C(76, num - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 76, num - 1);
srcx += 16;
mskx += 4;
dstx += 16;
@@ -602,7 +545,7 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr
write_sx_reg(p, SX_QUEUED(8), 0xff);
write_sx_reg(p, SX_QUEUED(9), 0xff);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1));
+ sxi(SX_ORS(8, 0, 10, 1));
for (line = 0; line < height; line++) {
srcx = src;
mskx = msk;
@@ -616,42 +559,32 @@ void CG14Comp_Over32Mask_noalpha(Cg14Ptr
continue;
}
/* fetch source pixels */
- write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+ sxm(SX_LDUQ0, srcx, 12, num - 1);
if (flip) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(13, 4, 40, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(15, 4, 44, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(40, 4, 15, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(44, 4, 13, num - 1));
+ sxi(SX_GATHER(13, 4, 40, num - 1));
+ sxi(SX_GATHER(15, 4, 44, num - 1));
+ sxi(SX_SCATTER(40, 4, 15, num - 1));
+ sxi(SX_SCATTER(44, 4, 13, num - 1));
}
/* fetch mask */
- write_sx_io(p, mskx, SX_LDB(28, num - 1, mskx & 7));
+ sxm(SX_LDB, mskx, 28, num - 1);
/* fetch dst pixels */
- write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 44, num - 1);
/* set src alpha to 0xff */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(8, 4, 12, num - 1));
+ sxi(SX_SCATTER(8, 4, 12, num - 1));
/* now process up to 4 pixels */
for (i = 0; i < num; i++) {
int ii = i << 2;
/* mask alpha to SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ORS(28 + i, 0, R_SCAM, 0));
+ sxi(SX_ORS(28 + i, 0, R_SCAM, 0));
/* src * alpha */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+ sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
/* write inverted alpha into SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(28 + i, 8, R_SCAM, 0));
+ sxi(SX_XORS(28 + i, 8, R_SCAM, 0));
/* dst * (1 - alpha) + R[60:] */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+ sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
}
- write_sx_io(p, dstx,
- SX_STUQ0C(76, num - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 76, num - 1);
srcx += 16;
mskx += 4;
dstx += 16;
@@ -675,7 +608,7 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P
write_sx_reg(p, SX_QUEUED(8), 0xff);
write_sx_reg(p, SX_QUEUED(9), 0xff);
- write_sx_reg(p, SX_INSTRUCTIONS, SX_ORS(8, 0, 10, 1));
+ sxi(SX_ORS(8, 0, 10, 1));
for (line = 0; line < height; line++) {
srcx = src;
mskx = msk;
@@ -689,42 +622,32 @@ void CG14Comp_Over32Mask32_noalpha(Cg14P
continue;
}
/* fetch source pixels */
- write_sx_io(p, srcx, SX_LDUQ0(12, num - 1, srcx & 7));
+ sxm(SX_LDUQ0, srcx, 12, num - 1);
if (flip) {
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(13, 4, 40, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_GATHER(15, 4, 44, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(40, 4, 15, num - 1));
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(44, 4, 13, num - 1));
+ sxi(SX_GATHER(13, 4, 40, num - 1));
+ sxi(SX_GATHER(15, 4, 44, num - 1));
+ sxi(SX_SCATTER(40, 4, 15, num - 1));
+ sxi(SX_SCATTER(44, 4, 13, num - 1));
}
/* fetch mask */
- write_sx_io(p, mskx, SX_LDUQ0(28, num - 1, mskx & 7));
+ sxm(SX_LDUQ0, mskx, 28, num - 1);
/* fetch dst pixels */
- write_sx_io(p, dstx, SX_LDUQ0(44, num - 1, dstx & 7));
+ sxm(SX_LDUQ0, dstx, 44, num - 1);
/* set src alpha to 0xff */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SCATTER(8, 4, 12, num - 1));
+ sxi(SX_SCATTER(8, 4, 12, num - 1));
/* now process up to 4 pixels */
for (i = 0; i < num; i++) {
int ii = i << 2;
/* mask alpha to SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_ORS(28 + ii, 0, R_SCAM, 0));
+ sxi(SX_ORS(28 + ii, 0, R_SCAM, 0));
/* src * alpha */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
+ sxi(SX_SAXP16X16SR8(12 + ii, 0, 60 + ii, 3));
/* write inverted alpha into SCAM */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_XORS(28 + ii, 8, R_SCAM, 0));
+ sxi(SX_XORS(28 + ii, 8, R_SCAM, 0));
/* dst * (1 - alpha) + R[60:] */
- write_sx_reg(p, SX_INSTRUCTIONS,
- SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
+ sxi(SX_SAXP16X16SR8(44 + ii, 60 + ii, 76 + ii, 3));
}
- write_sx_io(p, dstx,
- SX_STUQ0C(76, num - 1, dstx & 7));
+ sxm(SX_STUQ0C, dstx, 76, num - 1);
srcx += 16;
mskx += 16;
dstx += 16;