I have a Sharp Zaurus C3100, where X normally runs rotated 90 degrees, using a shadow framebuffer. I've been hacking a bit on getting the code that blits a rotated shadow onto the display a bit faster and came up with the included patch.
Blitting in rotated mode is about 4x the previous speed. Non-rotated copies are about the same speed; maybe up to 10% slower for small rectangles (on the Zaurus). The idea is to copy the area in blocks of 32x32 pixels, to reduce the number of cache misses, which are unavoidable when walking either the source or the destination bitmap across the scanlines. 16x16, 24x24, andd 32x32 yields about the same result, so I chose 32x32 since it seems best for the non-rotated modes. Any comments on this patch? I have a question myself about the original code: This is the function call to get the address in the destination frame buffer to write to: win = (FbBits *) (*pBuf->window) (pScreen, scr_y, scr_x << 2, SHADOW_WINDOW_WRITE, &winSize, pBuf->closure); The "scr_x << 2" part seems, to me, to assume that sizeof(FbBits) == 4. Am I missing something, or is this really correct? Anyway, my patch does not make this problem either better or worse, but this is a chance to fix it if it is a bug... Staffan Index: programs/Xserver/miext/shadow/shrotate.c =================================================================== RCS file: /scratch/openbsd/cvs/XF4/xc/programs/Xserver/miext/shadow/shrotate.c,v retrieving revision 1.2 diff -u -r1.2 shrotate.c --- programs/Xserver/miext/shadow/shrotate.c 3 Nov 2004 00:09:54 -0000 1.2 +++ programs/Xserver/miext/shadow/shrotate.c 20 Sep 2005 23:07:58 -0000 @@ -45,6 +45,106 @@ #define TOP_TO_BOTTOM 2 #define BOTTOM_TO_TOP -2 + +static void +shadowUpdateRotatePackedSubRectangle(shadowBufPtr pBuf, + FbBits *shaLine, int shaFirstShift, + int shaStepOverX, int shaStepOverY, + int shaStepDownX, int shaStepDownY, + int shaBpp, FbBits shaMask, + ScreenPtr pScreen, + int scr_x1, int scr_y, + int scr_h, int scr_w, + int pixelsPerBits) +{ + FbBits *sha; + int shaShift; + int scr_x; + int w; + + /* + * Copy the bits, always write across the physical frame buffer + * to take advantage of write combining. + */ + while (scr_h--) + { + int p; + FbBits bits; + FbBits *win; + int i; + CARD32 winSize; + + sha = shaLine; + shaShift = shaFirstShift; + w = scr_w; + scr_x = scr_x1 * shaBpp >> FB_SHIFT; + + while (w) + { + /* + * Map some of this line + */ + win = (FbBits *) (*pBuf->window) (pScreen, + scr_y, + scr_x << 2, + SHADOW_WINDOW_WRITE, + &winSize, + pBuf->closure); + i = (winSize >> 2); + if (i > w) + i = w; + w -= i; + scr_x += i; + /* + * Copy the portion of the line mapped + */ + while (i--) + { + bits = 0; + p = pixelsPerBits; + /* + * Build one word of output from multiple inputs + */ + while (p--) + { + bits = FbScrLeft(bits, shaBpp); + bits |= FbScrRight (*sha, shaShift) & shaMask; + + shaShift -= shaStepOverX; + if (shaShift >= FB_UNIT) + { + shaShift -= FB_UNIT; + sha--; + } + else if (shaShift < 0) + { + shaShift += FB_UNIT; + sha++; + } + sha += shaStepOverY; + } + *win++ = bits; + } + } + scr_y++; + shaFirstShift -= shaStepDownX; + if (shaFirstShift >= FB_UNIT) + { + shaFirstShift -= FB_UNIT; + shaLine--; + } + else if (shaFirstShift < 0) + { + shaFirstShift += FB_UNIT; + shaLine++; + } + shaLine += shaStepDownY; + } +} + +#define BLOCKSIZE_HEIGHT 32 +#define BLOCKSIZE_WIDTH 32 + void shadowUpdateRotatePacked (ScreenPtr pScreen, shadowBufPtr pBuf) @@ -61,7 +161,6 @@ int sha_x1 = 0, sha_y1 = 0; int scr_x1 = 0, scr_x2 = 0, scr_y1 = 0, scr_y2 = 0, scr_w, scr_h; int scr_x, scr_y; - int w; int pixelsPerBits; int pixelsMask; FbStride shaStepOverY = 0, shaStepDownY = 0; @@ -221,86 +320,46 @@ ((sha_x1 * shaBpp) >> FB_SHIFT)); /* - * Copy the bits, always write across the physical frame buffer - * to take advantage of write combining. + * Copy in blocks of size BLOCKSIZE_WIDTH x BLOCKSIZE_HEIGHT + * to reduce the number of cache misses when rotating 90 or + * 270 degrees. */ - while (scr_h--) + for (scr_y = scr_y1; scr_y < scr_y2; scr_y += BLOCKSIZE_HEIGHT) { - int p; - FbBits bits; - FbBits *win; - int i; - CARD32 winSize; - sha = shaLine; shaShift = shaFirstShift; - w = scr_w; - scr_x = scr_x1 * shaBpp >> FB_SHIFT; - while (w) + for (scr_x = scr_x1; scr_x < scr_x2; scr_x += BLOCKSIZE_WIDTH) { - /* - * Map some of this line - */ - win = (FbBits *) (*pBuf->window) (pScreen, - scr_y, - scr_x << 2, - SHADOW_WINDOW_WRITE, - &winSize, - pBuf->closure); - i = (winSize >> 2); - if (i > w) - i = w; - w -= i; - scr_x += i; - /* - * Copy the portion of the line mapped - */ - while (i--) - { - bits = 0; - p = pixelsPerBits; - /* - * Build one word of output from multiple inputs - * - * Note that for 90/270 rotations, this will walk - * down the shadow hitting each scanline once. - * This is probably not very efficient. - */ - while (p--) - { - bits = FbScrLeft(bits, shaBpp); - bits |= FbScrRight (*sha, shaShift) & shaMask; + int h = BLOCKSIZE_HEIGHT; + int w = BLOCKSIZE_WIDTH; - shaShift -= shaStepOverX; - if (shaShift >= FB_UNIT) - { - shaShift -= FB_UNIT; - sha--; - } - else if (shaShift < 0) - { - shaShift += FB_UNIT; - sha++; - } - sha += shaStepOverY; - } - *win++ = bits; - } - } - scr_y++; - shaFirstShift -= shaStepDownX; - if (shaFirstShift >= FB_UNIT) - { - shaFirstShift -= FB_UNIT; - shaLine--; - } - else if (shaFirstShift < 0) - { - shaFirstShift += FB_UNIT; - shaLine++; + if (scr_y + h > scr_y2) + h = scr_y2 - scr_y; + if (scr_x + w > scr_x2) + w = scr_x2 - scr_x; + w = (w * shaBpp) >> FB_SHIFT; + + shadowUpdateRotatePackedSubRectangle + (pBuf, + sha, shaShift, + shaStepOverX, shaStepOverY, + shaStepDownX, shaStepDownY, + shaBpp, shaMask, + pScreen, + scr_x, scr_y, + h, w, + pixelsPerBits); + + shaShift -= BLOCKSIZE_WIDTH * shaStepOverX; + sha += BLOCKSIZE_WIDTH * shaStepOverY; + sha -= (shaShift >> FB_SHIFT); + shaShift &= FB_MASK; } - shaLine += shaStepDownY; + shaFirstShift -= BLOCKSIZE_HEIGHT * shaStepDownX; + shaLine += BLOCKSIZE_HEIGHT * shaStepDownY; + shaLine -= (shaFirstShift >> FB_SHIFT); + shaFirstShift &= FB_MASK; } } } _______________________________________________ Devel mailing list Devel@XFree86.Org http://XFree86.Org/mailman/listinfo/devel