[Pixman] [PATCH 03/14] More general BILINEAR=>NEAREST reduction
Generalize and simplify the code that reduces BILINEAR to NEAREST so that the reduction happens for all affine transformations where t00...t12 are integers and (t00 + t01) and (t10 + t11) are both odd. This is a sufficient condition for the resulting transformed coordinates to be exactly at the center of a pixel so that BILINEAR becomes identical to NEAREST. V2: Address some comments by Bill Spitzak Signed-off-by: Søren Sandmann--- pixman/pixman-image.c | 66 +-- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c index 1ff1a49..681864e 100644 --- a/pixman/pixman-image.c +++ b/pixman/pixman-image.c @@ -335,37 +335,47 @@ compute_image_info (pixman_image_t *image) { flags |= FAST_PATH_NEAREST_FILTER; } - else if ( - /* affine and integer translation components in matrix ... */ - ((flags & FAST_PATH_AFFINE_TRANSFORM) && -!pixman_fixed_frac (image->common.transform->matrix[0][2] | -image->common.transform->matrix[1][2])) && - ( - /* ... combined with a simple rotation */ - (flags & (FAST_PATH_ROTATE_90_TRANSFORM | - FAST_PATH_ROTATE_180_TRANSFORM | - FAST_PATH_ROTATE_270_TRANSFORM)) || - /* ... or combined with a simple non-rotated translation */ - (image->common.transform->matrix[0][0] == pixman_fixed_1 && -image->common.transform->matrix[1][1] == pixman_fixed_1 && -image->common.transform->matrix[0][1] == 0 && -image->common.transform->matrix[1][0] == 0) - ) - ) + else if (flags & FAST_PATH_AFFINE_TRANSFORM) { - /* FIXME: there are some affine-test failures, showing that -* handling of BILINEAR and NEAREST filter is not quite -* equivalent when getting close to 32K for the translation -* components of the matrix. That's likely some bug, but for -* now just skip BILINEAR->NEAREST optimization in this case. + /* Suppose the transform is +* +*[ t00, t01, t02 ] +*[ t10, t11, t12 ] +*[ 0, 0, 1 ] +* +* and the destination coordinates are (n + 0.5, m + 0.5). Then +* the transformed x coordinate is: +* +* tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02 +*= t00 * n + t01 * m + t02 + (t00 + t01) * 0.5 +* +* which implies that if t00, t01 and t02 are all integers +* and (t00 + t01) is odd, then tx will be an integer plus 0.5, +* which means a BILINEAR filter will reduce to NEAREST. The same +* applies in the y direction */ - pixman_fixed_t magic_limit = pixman_int_to_fixed (3); - if (image->common.transform->matrix[0][2] <= magic_limit && - image->common.transform->matrix[1][2] <= magic_limit && - image->common.transform->matrix[0][2] >= -magic_limit && - image->common.transform->matrix[1][2] >= -magic_limit) + pixman_fixed_t (*t)[3] = image->common.transform->matrix; + + if ((pixman_fixed_frac ( +t[0][0] | t[0][1] | t[0][2] | +t[1][0] | t[1][1] | t[1][2]) == 0) && + (pixman_fixed_to_int ( + (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1) { - flags |= FAST_PATH_NEAREST_FILTER; + /* FIXME: there are some affine-test failures, showing that +* handling of BILINEAR and NEAREST filter is not quite +* equivalent when getting close to 32K for the translation +* components of the matrix. That's likely some bug, but for +* now just skip BILINEAR->NEAREST optimization in this case. +*/ + pixman_fixed_t magic_limit = pixman_int_to_fixed (3); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } } } break; -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 13/14] pixman-filter: Nested polynomial for cubic
From: Bill Spitzakv11: Restored range checks Signed-off-by: Bill Spitzak Reviewed-by: Oded Gabbay --- pixman/pixman-filter.c | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index 4abd05f..db4ab6e 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -109,14 +109,16 @@ general_cubic (double x, double B, double C) if (ax < 1) { - return ((12 - 9 * B - 6 * C) * ax * ax * ax + - (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6; + return (((12 - 9 * B - 6 * C) * ax + +(-18 + 12 * B + 6 * C)) * ax * ax + + (6 - 2 * B)) / 6; } -else if (ax >= 1 && ax < 2) +else if (ax < 2) { - return ((-B - 6 * C) * ax * ax * ax + - (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) * - ax + (8 * B + 24 * C)) / 6; + return -B - 6 * C) * ax + + (6 * B + 30 * C)) * ax + +(-12 * B - 48 * C)) * ax + + (8 * B + 24 * C)) / 6; } else { -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] Bill Spitzak patches
Hi, The following patch series contains those of Bill's patches that I think are ready to be pushed to master, plus some other related changes that I also think are ready. 01-03: These are patches to do more BILINEAR->NEAREST filter reductions. They were inspired by a similar patch in Bill's series, but these patches do the reduction in more cases and include tests. 04:Compute the filter size from a transformed ellipse. 05-06: UI fixes to demos/scale. 07:gnuplot output This is based on Bill's gnuplot patch, but I rewrote the logic in pixman-filter.c to generate correct coordinates, and added a big comment explaining how the phase interleaving works. 08:Reduce malloc()/free()/memcpy() 09:Correct Simpson's integration 10:Integral splitting is only necessary for the LINEAR filter I rebased this so that it doesn't depend on the changes to the integral() from Bill's series, and made the comment in the code match the new code. 11:Speed up BOX/BOX I rebased this and removed the normalization 12:Fix several issues related to normalization This patch fixes several normalization issues including the one fixed in Bill's series. 13:Nested polynomial for cubic 14:Made Gaussian a bit wider In the patches where I made changes, I have generally retained Bill as the author if the patch still contained a substantial block of code that was written by Bill. Those that I rewrote completely (the BILINEAR=>NEAREST and the normalization ones), I have put myself as author. However, in all cases I'm happy enough to put either me or Bill as the author. If anyone has strong opinions about this, let me know. In all the patches I have also reformatted the commit log so that it fits within 80 characters. With the exception of the scale->rscale one, I think the remaining patches in Bill's series should not be accepted, though it is possible that a new series rebased on top of this will reveal that I missed something. Søren ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 11/14] pixman-filter: Speed up BOX/BOX filter
The convolution of two BOX filters is simply the length of the interval where both are non-zero, so we can simply return width from the integral() function because the integration region has already been restricted to be such that both functions are non-zero on it. This is both faster and more accurate than doing numerical integration. This patch is based on one by Bill Spitzak https://lists.freedesktop.org/archives/pixman/2016-March/004446.html with these changes: - Rebased to not assume any changes in the arguments to integral(). - Dropped the multiplication by scale - Added more details in the commit message. Signed-off-by: Søren Sandmann--- pixman/pixman-filter.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index c868723..32aaa9a 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -160,11 +160,15 @@ integral (pixman_kernel_t kernel1, double x1, pixman_kernel_t kernel2, double scale, double x2, double width) { +if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX) +{ + return width; +} /* The LINEAR filter is not differentiable at 0, so if the * integration interval crosses zero, break it into two * separate integrals. */ -if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0) +else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0) { return integral (kernel1, x1, kernel2, scale, x2, - x1) + -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 02/14] Add new test of filter reduction from BILINEAR to NEAREST
This new test tests a bunch of bilinear downscalings, where many have a transformation such that the BILINEAR filter can be reduced to NEAREST (and many don't). A CRC32 is computed for all the resulting images and compared to a known-good value for both 4-bit and 7-bit interpolation. V2: Remove leftover comment, some minor formatting fixes, use a timestamp as the PRNG seed. Signed-off-by: Søren Sandmann--- test/Makefile.sources| 1 + test/filter-reduction-test.c | 112 +++ 2 files changed, 113 insertions(+) create mode 100644 test/filter-reduction-test.c diff --git a/test/Makefile.sources b/test/Makefile.sources index 5d55e67..0a56231 100644 --- a/test/Makefile.sources +++ b/test/Makefile.sources @@ -21,6 +21,7 @@ TESTPROGRAMS = \ gradient-crash-test \ pixel-test\ matrix-test \ + filter-reduction-test \ composite-traps-test \ region-contains-test \ glyph-test\ diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c new file mode 100644 index 000..705fa4b --- /dev/null +++ b/test/filter-reduction-test.c @@ -0,0 +1,112 @@ +#include +#include +#include "utils.h" + +static const pixman_fixed_t entries[] = +{ +pixman_double_to_fixed (-1.0), +pixman_double_to_fixed (-0.5), +pixman_double_to_fixed (-1/3.0), +pixman_double_to_fixed (0.0), +pixman_double_to_fixed (0.5), +pixman_double_to_fixed (1.0), +pixman_double_to_fixed (1.5), +pixman_double_to_fixed (2.0), +pixman_double_to_fixed (3.0), +}; + +#define SIZE 12 + +static uint32_t +test_scale (const pixman_transform_t *xform, uint32_t crc) +{ +uint32_t *srcbuf, *dstbuf; +pixman_image_t *src, *dest; + +srcbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (srcbuf, SIZE * SIZE * 4, 0); +src = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE * 4); + +dstbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (dstbuf, SIZE * SIZE * 4, 0); +dest = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE * 4); + +pixman_image_set_transform (src, xform); +pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL); +pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0); + +image_endian_swap (src); +image_endian_swap (dest); + +pixman_image_composite (PIXMAN_OP_SRC, + src, NULL, dest, + 0, 0, 0, 0, 0, 0, + SIZE, SIZE); + +crc = compute_crc32_for_image (crc, dest); + +pixman_image_unref (src); +pixman_image_unref (dest); + +free (srcbuf); +free (dstbuf); + +return crc; +} + +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x02169677 +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0xE44B29AC +#else +#define CHECKSUM 0x +#endif + +int +main (int argc, const char *argv[]) +{ +const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries); +const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5; +uint32_t crc = 0; + +prng_srand (0x56EA1DBD); + +for (t0 = entries; t0 < end; ++t0) +{ + for (t1 = entries; t1 < end; ++t1) + { + for (t2 = entries; t2 < end; ++t2) + { + for (t3 = entries; t3 < end; ++t3) + { + for (t4 = entries; t4 < end; ++t4) + { + for (t5 = entries; t5 < end; ++t5) + { + pixman_transform_t xform = { + { { *t0, *t1, *t2 }, + { *t3, *t4, *t5 }, + { 0, 0, pixman_fixed_1 } } + }; + + crc = test_scale (, crc); + } + } + } + } + } +} + +if (crc != CHECKSUM) +{ + printf ("filter-reduction-test failed! (checksum=0x%08X, expected 0x%08X)\n", crc, CHECKSUM); + return 1; +} +else +{ + printf ("filter-reduction-test passed (checksum=0x%08X)\n", crc); + return 0; +} +} -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 07/14] pixman-image: Added enable-gnuplot config to view filters in gnuplot
From: Bill SpitzakIf enable-gnuplot is configured, then you can pipe the output of a pixman-using program to gnuplot and get a continuously-updated plot of the horizontal filter. This works well with demos/scale to test the filter generation. The plot is all the different subposition filters shuffled together. This is misleading in a few cases: IMPULSE.BOX - goes up and down as the subfilters have different numbers of non-zero samples IMPULSE.TRIANGLE - somewhat crooked for the same reason 1-wide filters - looks triangular, but a 1-wide box would be more accurate Changes by Søren: Rewrote the pixman-filter.c part to - make it generate correct coordinates - add a comment on how coordinates are generated - in rounding.txt, add a ceil() variant of the first-sample formula - make the gnuplot output slightly prettier v7: First time this ability was included v8: Use config option Moved code to the filter generator Modified scale demo to not call filter generator a second time. v10: Only print if successful generation of plots Use #ifdef, not #if v11: small whitespace fixes Signed-off-by: Bill Spitzak Signed-off-by: Søren Sandmann --- configure.ac | 13 ++ pixman/pixman-filter.c | 115 + pixman/rounding.txt| 1 + 3 files changed, 129 insertions(+) diff --git a/configure.ac b/configure.ac index 6b2134e..e833e45 100644 --- a/configure.ac +++ b/configure.ac @@ -834,6 +834,19 @@ fi AC_SUBST(PIXMAN_TIMERS) dnl === +dnl gnuplot + +AC_ARG_ENABLE(gnuplot, + [AC_HELP_STRING([--enable-gnuplot], + [enable output of filters that can be piped to gnuplot [default=no]])], + [enable_gnuplot=$enableval], [enable_gnuplot=no]) + +if test $enable_gnuplot = yes ; then + AC_DEFINE(PIXMAN_GNUPLOT, 1, [enable output that can be piped to gnuplot]) +fi +AC_SUBST(PIXMAN_GNUPLOT) + +dnl === dnl GTK+ AC_ARG_ENABLE(gtk, diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index b2bf53f..af46a43 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -297,6 +297,117 @@ create_1d_filter (int *width, return params; } +#ifdef PIXMAN_GNUPLOT + +/* If enable-gnuplot is configured, then you can pipe the output of a + * pixman-using program to gnuplot and get a continuously-updated plot + * of the horizontal filter. This works well with demos/scale to test + * the filter generation. + * + * The plot is all the different subposition filters shuffled + * together. This is misleading in a few cases: + * + * IMPULSE.BOX - goes up and down as the subfilters have different + * numbers of non-zero samples + * IMPULSE.TRIANGLE - somewhat crooked for the same reason + * 1-wide filters - looks triangular, but a 1-wide box would be more + * accurate + */ +static void +gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p) +{ +double step; +int i, j; +int first; + +step = 1.0 / n_phases; + +printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 0.5\n"); +printf ("plot '-' with linespoints ls 1\n"); + +/* The position of the first sample of the phase corresponding to + * frac is given by: + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * + * We have to find the frac that minimizes this expression. + * + * For odd widths, we have + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * = ceil (frac) + K - frac + * = 1 + K - frac + * + * for some K, so this is minimized when frac is maximized and + * strictly growing with frac. So for odd widths, we can simply + * start at the last phase and go backwards. + * + * For even widths, we have + * + * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac + * = ceil (frac - 0.5) + K - frac + * + * The graph for this function (ignoring K) looks like this: + * + *0.5 + * ||\ + * || \ + * || \ + * 0 || \ + * |\ | + * | \ | + * | \ | + * -0.5 | \| + * - + * 00.5 1 + * + * So in this case we need to start with the phase whose frac is + * less than, but as close as possible to 0.5, then go backwards + * until we hit the first phase, then wrap around to the last + * phase and continue backwards. + * + * Which phase is as close as possible 0.5? The locations of the + * sampling point corresponding to the kth phase is given by + * 1/(2 * n_phases) + k / n_phases: + * + * 1/(2 * n_phases) + k / n_phases = 0.5 +
[Pixman] [PATCH 10/14] pixman-filter: integral splitting is only needed for triangle filter
From: Bill SpitzakOnly the triangle is discontinuous at 0. The other filters resemble a cubic closely enough that Simpsons integration works without splitting. Changes by Søren: Rebase without the changes to the integral function, update comment to match the new code. Signed-off-by: Bill Spitzak Signed-off-by: Søren Sandmann Reviewed-by: Søren Sandmann --- pixman/pixman-filter.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index 8d4872a..c868723 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -160,18 +160,17 @@ integral (pixman_kernel_t kernel1, double x1, pixman_kernel_t kernel2, double scale, double x2, double width) { -/* If the integration interval crosses zero, break it into - * two separate integrals. This ensures that filters such - * as LINEAR that are not differentiable at 0 will still - * integrate properly. +/* The LINEAR filter is not differentiable at 0, so if the + * integration interval crosses zero, break it into two + * separate integrals. */ -if (x1 < 0 && x1 + width > 0) +if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0) { return integral (kernel1, x1, kernel2, scale, x2, - x1) + integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1); } -else if (x2 < 0 && x2 + width > 0) +else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0) { return integral (kernel1, x1, kernel2, scale, x2, - x2) + -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 04/14] demos/scale: Compute filter size using boundary of xformed ellipse
From: Bill SpitzakInstead of using the boundary of xformed rectangle, use the boundary of xformed ellipse. This is much more accurate and less blurry. In particular the filtering does not change as the image is rotated. Signed-off-by: Bill Spitzak Reviewed-by: Oded Gabbay Reviewed-by: Soren Sandmann --- demos/scale.c | 102 +++--- 1 file changed, 61 insertions(+), 41 deletions(-) diff --git a/demos/scale.c b/demos/scale.c index d00307e..0995ad0 100644 --- a/demos/scale.c +++ b/demos/scale.c @@ -55,50 +55,70 @@ get_widget (app_t *app, const char *name) return widget; } -static double -min4 (double a, double b, double c, double d) -{ -double m1, m2; - -m1 = MIN (a, b); -m2 = MIN (c, d); -return MIN (m1, m2); -} - -static double -max4 (double a, double b, double c, double d) -{ -double m1, m2; - -m1 = MAX (a, b); -m2 = MAX (c, d); -return MAX (m1, m2); -} - +/* Figure out the boundary of a diameter=1 circle transformed into an ellipse + * by trans. Proof that this is the correct calculation: + * + * Transform x,y to u,v by this matrix calculation: + * + * |u| |a c| |x| + * |v| = |b d|*|y| + * + * Horizontal component: + * + * u = ax+cy (1) + * + * For each x,y on a radius-1 circle (p is angle to the point): + * + * x^2+y^2 = 1 + * x = cos(p) + * y = sin(p) + * dx/dp = -sin(p) = -y + * dy/dp = cos(p) = x + * + * Figure out derivative of (1) relative to p: + * + * du/dp = a(dx/dp) + c(dy/dp) + *= -ay + cx + * + * The min and max u are when du/dp is zero: + * + * -ay + cx = 0 + * cx = ay + * c = ay/x (2) + * y = cx/a (3) + * + * Substitute (2) into (1) and simplify: + * + * u = ax + ay^2/x + *= a(x^2+y^2)/x + *= a/x (because x^2+y^2 = 1) + * x = a/u (4) + * + * Substitute (4) into (3) and simplify: + * + * y = c(a/u)/a + * y = c/u (5) + * + * Square (4) and (5) and add: + * + * x^2+y^2 = (a^2+c^2)/u^2 + * + * But x^2+y^2 is 1: + * + * 1 = (a^2+c^2)/u^2 + * u^2 = a^2+c^2 + * u = hypot(a,c) + * + * Similarily the max/min of v is at: + * + * v = hypot(b,d) + * + */ static void compute_extents (pixman_f_transform_t *trans, double *sx, double *sy) { -double min_x, max_x, min_y, max_y; -pixman_f_vector_t v[4] = -{ - { { 1, 1, 1 } }, - { { -1, 1, 1 } }, - { { -1, -1, 1 } }, - { { 1, -1, 1 } }, -}; - -pixman_f_transform_point (trans, [0]); -pixman_f_transform_point (trans, [1]); -pixman_f_transform_point (trans, [2]); -pixman_f_transform_point (trans, [3]); - -min_x = min4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); -max_x = max4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]); -min_y = min4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); -max_y = max4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]); - -*sx = (max_x - min_x) / 2.0; -*sy = (max_y - min_y) / 2.0; +*sx = hypot (trans->m[0][0], trans->m[0][1]) / trans->m[2][2]; +*sy = hypot (trans->m[1][0], trans->m[1][1]) / trans->m[2][2]; } typedef struct -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 06/14] demos/scale: Default to locked axis
From: Bill SpitzakSigned-off-by: Bill Spitzak Reviewed-by: Søren Sandmann --- demos/scale.ui | 1 + 1 file changed, 1 insertion(+) diff --git a/demos/scale.ui b/demos/scale.ui index f6f6e89..d498d26 100644 --- a/demos/scale.ui +++ b/demos/scale.ui @@ -177,6 +177,7 @@ id="lock_checkbutton"> Lock X and Y Dimensions 0.0 + True False -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 09/14] pixman-filter: Correct Simpsons integration
From: Bill SpitzakSimpsons uses cubic curve fitting, with 3 samples defining each cubic. This makes the weights of the samples be in a pattern of 1,4,2,4,2...4,1, and then dividing the result by 3. The previous code was using weights of 1,2,0,6,0,6...,2,1. With this fix the integration is accurate enough that the number of samples could be reduced a lot. Multiples of 12 seem to work best. v7: Merged with patch to reduce from 128 samples to 16 v9: Changed samples from 16 to 12 v10: Fixed rebase error that made it not compile v11: minor whitespace change v14: more whitespace changes Signed-off-by: Bill Spitzak Reviewed-by: Oded Gabbay Reviewed-by: Søren Sandmann --- pixman/pixman-filter.c | 21 +++-- 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index dd5176d..8d4872a 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -189,13 +189,19 @@ integral (pixman_kernel_t kernel1, double x1, } else { - /* Integration via Simpson's rule */ -#define N_SEGMENTS 128 + /* Integration via Simpson's rule +* See http://www.intmath.com/integration/6-simpsons-rule.php +* 12 segments (6 cubic approximations) seems to produce best +* result for lanczos3.linear, which was the combination that +* showed the most errors. This makes sense as the lanczos3 +* filter is 6 wide. +*/ +#define N_SEGMENTS 12 #define SAMPLE(a1, a2) \ (filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale)) double s = 0.0; - double h = width / (double)N_SEGMENTS; + double h = width / N_SEGMENTS; int i; s = SAMPLE (x1, x2); @@ -204,11 +210,14 @@ integral (pixman_kernel_t kernel1, double x1, { double a1 = x1 + h * i; double a2 = x2 + h * i; + s += 4 * SAMPLE (a1, a2); + } + for (i = 2; i < N_SEGMENTS; i += 2) + { + double a1 = x1 + h * i; + double a2 = x2 + h * i; s += 2 * SAMPLE (a1, a2); - - if (i >= 2 && i < N_SEGMENTS - 1) - s += 4 * SAMPLE (a1, a2); } s += SAMPLE (x1 + width, x2 + width); -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 01/14] pixman-fast-path.c: Pick NEAREST affine fast paths before BILINEAR ones
When a BILINEAR filter is reduced to NEAREST, it is possible for both types of fast paths to run; in this case, the NEAREST ones should be preferred as that is the simpler filter. Signed-off-by: Soren Sandmann--- pixman/pixman-fast-path.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 53d4a1f..b4daa26 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -3258,9 +3258,9 @@ static const pixman_iter_info_t fast_iters[] = }, #define AFFINE_FAST_PATHS(name, format, repeat) \ -SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ +NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ BILINEAR_AFFINE_FAST_PATH(name, format, repeat)\ -NEAREST_AFFINE_FAST_PATH(name, format, repeat) +SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 08/14] pixman-filter: reduce amount of malloc/free/memcpy to generate filter
From: Bill SpitzakRearranged so that the entire block of memory for the filter pair is allocated first, and then filled in. Previous version allocated and freed two temporary buffers for each filter and did an extra memcpy. v8: small refactor to remove the filter_width function v10: Restored filter_width function but with arguments changed to match later patches v11: Removed unused arg and pointer from filter_width function Whitespace fixes. Signed-off-by: Bill Spitzak Reviewed-by: Oded Gabbay Acked-by: Søren Sandmann --- pixman/pixman-filter.c | 56 +- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c index af46a43..dd5176d 100644 --- a/pixman/pixman-filter.c +++ b/pixman/pixman-filter.c @@ -217,25 +217,17 @@ integral (pixman_kernel_t kernel1, double x1, } } -static pixman_fixed_t * -create_1d_filter (int *width, +static void +create_1d_filter (int width, pixman_kernel_t reconstruct, pixman_kernel_t sample, double scale, - int n_phases) + int n_phases, + pixman_fixed_t *p) { -pixman_fixed_t *params, *p; double step; -double size; int i; -size = scale * filters[sample].width + filters[reconstruct].width; -*width = ceil (size); - -p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t)); -if (!params) -return NULL; - step = 1.0 / n_phases; for (i = 0; i < n_phases; ++i) @@ -250,8 +242,8 @@ create_1d_filter (int *width, * and sample positions. */ - x1 = ceil (frac - *width / 2.0 - 0.5); -x2 = x1 + *width; + x1 = ceil (frac - width / 2.0 - 0.5); + x2 = x1 + width; total = 0; for (x = x1; x < x2; ++x) @@ -279,7 +271,7 @@ create_1d_filter (int *width, } /* Normalize */ - p -= *width; + p -= width; total = 1 / total; new_total = 0; for (x = x1; x < x2; ++x) @@ -291,10 +283,15 @@ create_1d_filter (int *width, } if (new_total != pixman_fixed_1) - *(p - *width / 2) += (pixman_fixed_1 - new_total); + *(p - width / 2) += (pixman_fixed_1 - new_total); } +} -return params; + +static int +filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size) +{ +return ceil (filters[reconstruct].width + size * filters[sample].width); } #ifdef PIXMAN_GNUPLOT @@ -424,38 +421,31 @@ pixman_filter_create_separable_convolution (int *n_values, { double sx = fabs (pixman_fixed_to_double (scale_x)); double sy = fabs (pixman_fixed_to_double (scale_y)); -pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL; +pixman_fixed_t *params; int subsample_x, subsample_y; int width, height; +width = filter_width (reconstruct_x, sample_x, sx); subsample_x = (1 << subsample_bits_x); -subsample_y = (1 << subsample_bits_y); -horz = create_1d_filter (, reconstruct_x, sample_x, sx, subsample_x); -vert = create_1d_filter (, reconstruct_y, sample_y, sy, subsample_y); +height = filter_width (reconstruct_y, sample_y, sy); +subsample_y = (1 << subsample_bits_y); -if (!horz || !vert) -goto out; - *n_values = 4 + width * subsample_x + height * subsample_y; params = malloc (*n_values * sizeof (pixman_fixed_t)); if (!params) -goto out; + return NULL; params[0] = pixman_int_to_fixed (width); params[1] = pixman_int_to_fixed (height); params[2] = pixman_int_to_fixed (subsample_bits_x); params[3] = pixman_int_to_fixed (subsample_bits_y); -memcpy (params + 4, horz, - width * subsample_x * sizeof (pixman_fixed_t)); -memcpy (params + 4 + width * subsample_x, vert, - height * subsample_y * sizeof (pixman_fixed_t)); - -out: -free (horz); -free (vert); +create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x, + params + 4); +create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y, + params + 4 + width * subsample_x); #ifdef PIXMAN_GNUPLOT gnuplot_filter(width, subsample_x, params + 4); -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 3/3] More general BILINEAR=>NEAREST reduction
Generalize and simplify the code that reduces BILINEAR to NEAREST so that all the reduction happens for all affine transformations where t00..t12 are integers and (t00 + t01) and (t10 + t11) are both odd. This is a sufficient condition for the resulting transformed coordinates to be exactly at the center of a pixel so that BILINEAR becomes identical to NEAREST. V2: Address some comments by Bill Spitzak Signed-off-by: Søren Sandmann--- pixman/pixman-image.c | 66 +-- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c index 1ff1a49..681864e 100644 --- a/pixman/pixman-image.c +++ b/pixman/pixman-image.c @@ -335,37 +335,47 @@ compute_image_info (pixman_image_t *image) { flags |= FAST_PATH_NEAREST_FILTER; } - else if ( - /* affine and integer translation components in matrix ... */ - ((flags & FAST_PATH_AFFINE_TRANSFORM) && -!pixman_fixed_frac (image->common.transform->matrix[0][2] | -image->common.transform->matrix[1][2])) && - ( - /* ... combined with a simple rotation */ - (flags & (FAST_PATH_ROTATE_90_TRANSFORM | - FAST_PATH_ROTATE_180_TRANSFORM | - FAST_PATH_ROTATE_270_TRANSFORM)) || - /* ... or combined with a simple non-rotated translation */ - (image->common.transform->matrix[0][0] == pixman_fixed_1 && -image->common.transform->matrix[1][1] == pixman_fixed_1 && -image->common.transform->matrix[0][1] == 0 && -image->common.transform->matrix[1][0] == 0) - ) - ) + else if (flags & FAST_PATH_AFFINE_TRANSFORM) { - /* FIXME: there are some affine-test failures, showing that -* handling of BILINEAR and NEAREST filter is not quite -* equivalent when getting close to 32K for the translation -* components of the matrix. That's likely some bug, but for -* now just skip BILINEAR->NEAREST optimization in this case. + /* Suppose the transform is +* +*[ t00, t01, t02 ] +*[ t10, t11, t12 ] +*[ 0, 0, 1 ] +* +* and the destination coordinates are (n + 0.5, m + 0.5). Then +* the transformed x coordinate is: +* +* tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02 +*= t00 * n + t01 * m + t02 + (t00 + t01) * 0.5 +* +* which implies that if t00, t01 and t02 are all integers +* and (t00 + t01) is odd, then tx will be an integer plus 0.5, +* which means a BILINEAR filter will reduce to NEAREST. The same +* applies in the y direction */ - pixman_fixed_t magic_limit = pixman_int_to_fixed (3); - if (image->common.transform->matrix[0][2] <= magic_limit && - image->common.transform->matrix[1][2] <= magic_limit && - image->common.transform->matrix[0][2] >= -magic_limit && - image->common.transform->matrix[1][2] >= -magic_limit) + pixman_fixed_t (*t)[3] = image->common.transform->matrix; + + if ((pixman_fixed_frac ( +t[0][0] | t[0][1] | t[0][2] | +t[1][0] | t[1][1] | t[1][2]) == 0) && + (pixman_fixed_to_int ( + (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1) { - flags |= FAST_PATH_NEAREST_FILTER; + /* FIXME: there are some affine-test failures, showing that +* handling of BILINEAR and NEAREST filter is not quite +* equivalent when getting close to 32K for the translation +* components of the matrix. That's likely some bug, but for +* now just skip BILINEAR->NEAREST optimization in this case. +*/ + pixman_fixed_t magic_limit = pixman_int_to_fixed (3); + if (image->common.transform->matrix[0][2] <= magic_limit && + image->common.transform->matrix[1][2] <= magic_limit && + image->common.transform->matrix[0][2] >= -magic_limit && + image->common.transform->matrix[1][2] >= -magic_limit) + { + flags |= FAST_PATH_NEAREST_FILTER; + } } } break; -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 1/3] pixman-fast-path.c: Pick NEAREST affine fast paths before BILINEAR ones
When a BILINEAR filter is reduced to NEAREST, it is possible for both types of fast paths to run; in this case, the NEAREST ones should be preferred as that is the simpler filter. Signed-off-by: Soren Sandmann--- pixman/pixman-fast-path.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 53d4a1f..b4daa26 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -3258,9 +3258,9 @@ static const pixman_iter_info_t fast_iters[] = }, #define AFFINE_FAST_PATHS(name, format, repeat) \ -SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) \ +NEAREST_AFFINE_FAST_PATH(name, format, repeat) \ BILINEAR_AFFINE_FAST_PATH(name, format, repeat)\ -NEAREST_AFFINE_FAST_PATH(name, format, repeat) +SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat) AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD) AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE) -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 2/3] Add new test of filter reduction from BILINEAR to NEAREST
This new test tests a bunch of bilinear downscalings, where many have a transformation such that the BILINEAR filter can be reduced to NEAREST (and many don't). A CRC32 is computed for all the resulting images and compared to a known-good value for both 4-bit and 7-bit interpolation. V2: Remove leftover comment, some minor formatting fixes, use a timestamp as the PRNG seed. Signed-off-by: Søren Sandmann--- test/Makefile.sources| 1 + test/filter-reduction-test.c | 112 +++ 2 files changed, 113 insertions(+) create mode 100644 test/filter-reduction-test.c diff --git a/test/Makefile.sources b/test/Makefile.sources index 5d55e67..0a56231 100644 --- a/test/Makefile.sources +++ b/test/Makefile.sources @@ -21,6 +21,7 @@ TESTPROGRAMS = \ gradient-crash-test \ pixel-test\ matrix-test \ + filter-reduction-test \ composite-traps-test \ region-contains-test \ glyph-test\ diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c new file mode 100644 index 000..705fa4b --- /dev/null +++ b/test/filter-reduction-test.c @@ -0,0 +1,112 @@ +#include +#include +#include "utils.h" + +static const pixman_fixed_t entries[] = +{ +pixman_double_to_fixed (-1.0), +pixman_double_to_fixed (-0.5), +pixman_double_to_fixed (-1/3.0), +pixman_double_to_fixed (0.0), +pixman_double_to_fixed (0.5), +pixman_double_to_fixed (1.0), +pixman_double_to_fixed (1.5), +pixman_double_to_fixed (2.0), +pixman_double_to_fixed (3.0), +}; + +#define SIZE 12 + +static uint32_t +test_scale (const pixman_transform_t *xform, uint32_t crc) +{ +uint32_t *srcbuf, *dstbuf; +pixman_image_t *src, *dest; + +srcbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (srcbuf, SIZE * SIZE * 4, 0); +src = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE * 4); + +dstbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (dstbuf, SIZE * SIZE * 4, 0); +dest = pixman_image_create_bits ( + PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE * 4); + +pixman_image_set_transform (src, xform); +pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL); +pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0); + +image_endian_swap (src); +image_endian_swap (dest); + +pixman_image_composite (PIXMAN_OP_SRC, + src, NULL, dest, + 0, 0, 0, 0, 0, 0, + SIZE, SIZE); + +crc = compute_crc32_for_image (crc, dest); + +pixman_image_unref (src); +pixman_image_unref (dest); + +free (srcbuf); +free (dstbuf); + +return crc; +} + +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x02169677 +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0xE44B29AC +#else +#define CHECKSUM 0x +#endif + +int +main (int argc, const char *argv[]) +{ +const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries); +const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5; +uint32_t crc = 0; + +prng_srand (0x56EA1DBD); + +for (t0 = entries; t0 < end; ++t0) +{ + for (t1 = entries; t1 < end; ++t1) + { + for (t2 = entries; t2 < end; ++t2) + { + for (t3 = entries; t3 < end; ++t3) + { + for (t4 = entries; t4 < end; ++t4) + { + for (t5 = entries; t5 < end; ++t5) + { + pixman_transform_t xform = { + { { *t0, *t1, *t2 }, + { *t3, *t4, *t5 }, + { 0, 0, pixman_fixed_1 } } + }; + + crc = test_scale (, crc); + } + } + } + } + } +} + +if (crc != CHECKSUM) +{ + printf ("filter-reduction-test failed! (checksum=0x%08X, expected 0x%08X)\n", crc, CHECKSUM); + return 1; +} +else +{ + printf ("filter-reduction-test passed (checksum=0x%08X)\n", crc); + return 0; +} +} -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 0/3] More general reduction of BILINEAR to NEAREST
This series addresses the comments by Bill and also changes pixman-fast-path.c so that it picks NEAREST fast paths before BILINEAR. (I noticed this because the new filter-reduction-test.c failed to detect a bug that I deliberately introduced). Søren ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 1/2] Add new test of filter reduction from BILINEAR to NEAREST
This new test tests a bunch of bilinear downscalings, where many have a transformation such that the BILINEAR filter can be reduced to NEAREST (and many don't). A CRC32 is computed for all the resulting images and compared to a known-good value for both 4-bit and 7-bit interpolation. Signed-off-by: Søren Sandmann--- test/Makefile.sources| 1 + test/filter-reduction-test.c | 119 +++ 2 files changed, 120 insertions(+) create mode 100644 test/filter-reduction-test.c diff --git a/test/Makefile.sources b/test/Makefile.sources index 5d55e67..0a56231 100644 --- a/test/Makefile.sources +++ b/test/Makefile.sources @@ -21,6 +21,7 @@ TESTPROGRAMS = \ gradient-crash-test \ pixel-test\ matrix-test \ + filter-reduction-test \ composite-traps-test \ region-contains-test \ glyph-test\ diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c new file mode 100644 index 000..72b3142 --- /dev/null +++ b/test/filter-reduction-test.c @@ -0,0 +1,119 @@ +/* + * Test program, which can detect some problems with nearest neighbour + * and bilinear scaling in pixman. Testing is done by running lots + * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8 + * and r5g6b5 color formats. + * + * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in + * the case of test failure. + */ +#include +#include +#include "utils.h" + +static const pixman_fixed_t entries[] = +{ +pixman_double_to_fixed (-1.0), +pixman_double_to_fixed (-0.5), +pixman_double_to_fixed (-1/3.0), +pixman_double_to_fixed (0.0), +pixman_double_to_fixed (0.5), +pixman_double_to_fixed (1.0), +pixman_double_to_fixed (1.5), +pixman_double_to_fixed (2.0), +pixman_double_to_fixed (3.0), +}; + +#define SIZE 12 + +static uint32_t +test_scale (const pixman_transform_t *xform, uint32_t crc) +{ +uint32_t *srcbuf, *dstbuf; +pixman_image_t *src, *dest; + +srcbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (srcbuf, SIZE * SIZE * 4, 0); +src = pixman_image_create_bits (PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE * 4); + +dstbuf = malloc (SIZE * SIZE * 4); +prng_randmemset (dstbuf, SIZE * SIZE * 4, 0); +dest = pixman_image_create_bits (PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE * 4); + +pixman_image_set_transform (src, xform); +pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL); +pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0); + +image_endian_swap (src); +image_endian_swap (dest); + +pixman_image_composite (PIXMAN_OP_SRC, + src, NULL, dest, + 0, 0, 0, 0, 0, 0, + SIZE, SIZE); + +crc = compute_crc32_for_image (crc, dest); + +pixman_image_unref (src); +pixman_image_unref (dest); + +free (srcbuf); +free (dstbuf); + +return crc; +} + +#if BILINEAR_INTERPOLATION_BITS == 7 +#define CHECKSUM 0x40BDEAC4 +#elif BILINEAR_INTERPOLATION_BITS == 4 +#define CHECKSUM 0xF8245E72 +#else +#define CHECKSUM 0x +#endif + +int +main (int argc, const char *argv[]) +{ +const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries); +const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5; +uint32_t crc = 0; + +prng_srand (0xcafebabe); + +for (t0 = entries; t0 < end; ++t0) +{ + for (t1 = entries; t1 < end; ++t1) + { + for (t2 = entries; t2 < end; ++t2) + { + for (t3 = entries; t3 < end; ++t3) + { + for (t4 = entries; t4 < end; ++t4) + { + for (t5 = entries; t5 < end; ++t5) + { + pixman_transform_t xform = { + { { *t0, *t1, *t2 }, + { *t3, *t4, *t5 }, + { 0, 0, pixman_fixed_1 } } + }; + + crc = test_scale (, crc); + } + } + } + } + } +} + +if (crc != CHECKSUM) +{ + printf ("filter-reduction-test failed! (checksum=%08X, expected %08X)\n", crc, CHECKSUM); + return 1; +} +else +{ + printf ("filter-reduction-test passed (checksum=%08X)\n", crc); + return 0; +} +} -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] More general BILINEAR to NEAREST reduction
The following two patches generalize the reduction of BILINEAR to NEAREST based on the formula mentioned here: https://lists.freedesktop.org/archives/pixman/2010-August/000321.html Søren ___ Pixman mailing list Pixman@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH] Fix comment about BILINEAR_INTERPOLATION_BITS to say 8 rather than = 8
Since a4c79d695d52c94647b1aff7 the constant BILINEAR_INTERPOLATION_BITS must be strictly less than 8, so fix the comment to say this, and also add a COMPILE_TIME_ASSERT in the bilinear fetcher in pixman-fast-path.c --- pixman/pixman-fast-path.c | 2 ++ pixman/pixman-private.h | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index c6e43de..a9b7d3a 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -2343,6 +2343,8 @@ fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) int32_t dist_y; int i; +COMPILE_TIME_ASSERT(BILINEAR_INTERPOLATION_BITS 8); + fx = info-x; ux = iter-image-common.transform-matrix[0][0]; diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index fdc966a..73108a0 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -7,7 +7,7 @@ * The defines which are shared between C and assembly code */ -/* bilinear interpolation precision (must be = 8) */ +/* bilinear interpolation precision (must be 8) */ #define BILINEAR_INTERPOLATION_BITS 7 #define BILINEAR_INTERPOLATION_RANGE (1 BILINEAR_INTERPOLATION_BITS) -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 0/5] Some changes to Color Dodge and Color Burn
The overall goal of the following patches is to make it more obvious how the blend mode code relates to the specifications. To that end, the comment for each blend routine is updated with some math that shows how we go from specification to a formula that can deal with premultiplied alpha, and the code is updated to follow the math as much as possible. The blend routines for Color Dodge and Color Burn are rewritten to match the derived formulas. In the case of Color Dodge, this in some sense makes the code less correct because the new code can now underflow the unsigned variables when the source pixel is superluminescent, while the old code was careful to clamp to zero. In the case of Color Burn, I believe the new code is a net improvement since the old code could underflow whereas the new code can't. The reason I don't care too much about the code being correct is that the blend mode code in general has a number of issues that makes it not work very well, especially when superluminescent pixels are involved. For one, the use of unsigned variables is wrong; for another, the macros use a non-saturating sum that can sometimes overflow. I have some plans to fix the blend modes for real, but for now the only thing I want to accomplish with this patch set, is to make the connection between code and formulas clear. Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 1/5] pixman-combine32.c: Formatting fixes
Fix a bunch of spacing issues. --- pixman/pixman-combine32.c | 112 +++--- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c index 3ac7576..be3cfdf 100644 --- a/pixman/pixman-combine32.c +++ b/pixman/pixman-combine32.c @@ -142,12 +142,12 @@ combine_mask (const uint32_t *src, const uint32_t *mask, int i) static void combine_clear (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { -memset (dest, 0, width * sizeof(uint32_t)); +memset (dest, 0, width * sizeof (uint32_t)); } static void @@ -155,7 +155,7 @@ combine_dst (pixman_implementation_t *imp, pixman_op_t op, uint32_t * dest, const uint32_t * src, -const uint32_t * mask, +const uint32_t * mask, int width) { return; @@ -164,9 +164,9 @@ combine_dst (pixman_implementation_t *imp, static void combine_src_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -189,9 +189,9 @@ combine_src_u (pixman_implementation_t *imp, static void combine_over_u (pixman_implementation_t *imp, pixman_op_t op, -uint32_t *dest, -const uint32_t * src, -const uint32_t * mask, +uint32_t * dest, +const uint32_t * src, +const uint32_t * mask, int width) { int i; @@ -254,9 +254,9 @@ combine_over_u (pixman_implementation_t *imp, static void combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op, -uint32_t *dest, -const uint32_t * src, -const uint32_t * mask, +uint32_t * dest, +const uint32_t * src, +const uint32_t * mask, int width) { int i; @@ -274,9 +274,9 @@ combine_over_reverse_u (pixman_implementation_t *imp, static void combine_in_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -293,9 +293,9 @@ combine_in_u (pixman_implementation_t *imp, static void combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -313,9 +313,9 @@ combine_in_reverse_u (pixman_implementation_t *imp, static void combine_out_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const uint32_t * src, + const uint32_t * mask, int width) { int i; @@ -332,9 +332,9 @@ combine_out_u (pixman_implementation_t *imp, static void combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op, - uint32_t *dest, - const uint32_t * src, - const uint32_t * mask, + uint32_t * dest, + const
[Pixman] [PATCH 2/5] pixman-combine32: Improve documentation for blend mode operators
This commit overhauls the comments in pixman-comine32.c regarding blend modes: - Add a link to the PDF supplement that clarifies the specification of ColorBurn and ColorDodge - Clarify how the formulas for premultiplied colors are derived form the ones in the PDF specifications - Write out the derivation of the formulas in each blend routine --- pixman/pixman-combine32.c | 330 -- 1 file changed, 204 insertions(+), 126 deletions(-) diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c index be3cfdf..ae9eed4 100644 --- a/pixman/pixman-combine32.c +++ b/pixman/pixman-combine32.c @@ -463,32 +463,59 @@ combine_saturate_u (pixman_implementation_t *imp, } } + /* * PDF blend modes: + * * The following blend modes have been taken from the PDF ISO 32000 * specification, which at this point in time is available from - * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf - * The relevant chapters are 11.3.5 and 11.3.6. + * + * http://www.adobe.com/devnet/pdf/pdf_reference.html + * + * The specific documents of interest are the PDF spec itself: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf + * + * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat + * 9.1 and Reader 9.1: + * + * http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf + * + * that clarifies the specifications for blend modes ColorDodge and + * ColorBurn. + * * The formula for computing the final pixel color given in 11.3.6 is: - * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) - * with B() being the blend function. - * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs - * - * These blend modes should match the SVG filter draft specification, as - * it has been designed to mirror ISO 32000. Note that at the current point - * no released draft exists that shows this, as the formulas have not been - * updated yet after the release of ISO 32000. - * - * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and - * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an - * argument. Note that this implementation operates on premultiplied colors, - * while the PDF specification does not. Therefore the code uses the formula - * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as) + * + * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs) + * + * with B() is the blend function. When B(Cb, Cs) = Cs, this formula + * reduces to the regular OVER operator. + * + * Cs and Cb are not premultiplied, so in our implementation we instead + * use: + * + * cr = (1 – αs) × cb + (1 – αb) × cs + αb × αs × B (cb/αb, cs/αs) + * + * where cr, cs, and cb are premultiplied colors, and where the + * + * αb × αs × B(cb/αb, cs/αs) + * + * part is first arithmetically simplified under the assumption that αb + * and αs are not 0, and then updated to produce a meaningful result when + * they are. + * + * For all the blend mode operators, the alpha channel is given by + * + * αr = αs + αb + αb × αs */ /* * Multiply - * B(Dca, ad, Sca, as) = Dca.Sca + * + * ad * as * B(d / ad, s / as) + *= ad * as * d/ad * s/as + *= d * s + * */ static void combine_multiply_u (pixman_implementation_t *imp, @@ -608,7 +635,10 @@ combine_multiply_ca (pixman_implementation_t *imp, /* * Screen - * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca + * + * ad * as * B(d/ad, s/as) + *= ad * as * (d/ad + s/as - s/as * d/ad) + *= ad * s + as * d - s * d */ static inline uint32_t blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) @@ -620,11 +650,25 @@ PDF_SEPARABLE_BLEND_MODE (screen) /* * Overlay - * B(Dca, Da, Sca, Sa) = - * if 2.Dca Da - * 2.Sca.Dca - * otherwise - * Sa.Da - 2.(Da - Dca).(Sa - Sca) + * + * ad * as * B(d/ad, s/as) + * = ad * as * Hardlight (s, d) + * = if (d / ad 0.5) + * as * ad * Multiply (s/as, 2 * d/ad) + * else + * as * ad * Screen (s/as, 2 * d / ad - 1) + * = if (d 0.5 * ad) + * as * ad * s/as * 2 * d /ad + * else + * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1)) + * = if (2 * d ad) + * 2 * s * d + * else + * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1) + * = if (2 * d ad) + * 2 * s * d + * else + * as * ad - 2 * (ad - d) * (as - s) */ static inline uint32_t blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) @@ -642,7 +686,10 @@ PDF_SEPARABLE_BLEND_MODE (overlay) /* * Darken - * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa) + * + * ad * as * B(d/ad, s/as) + * = ad * as * MIN(d/ad, s/as) + * = MIN (as * d, ad * s) */ static inline uint32_t blend_darken (uint32_t dca, uint32_t da, uint32_t sca,
[Pixman] [PATCH 4/5] Make ColorDodge code follow the math closer
Change blend_color_dodge() to follow the math in the comment more closely. Note, the new code here is in some sense worse than the old code because it can now underflow the unsigned variables when the source is superluminescent and (as - s) is therefore negative. The old code was careful to clamp to 0. But for superluminescent variables we really need the ability for the blend function to become negative, and so the solution the underflow problem is to just use signed variables. The use of unsigned variables is a general problem in all of the blend mode code that will have to be solved later. The CRC32 values in thread-test and blitters-test are updated to account for the changes in output. --- pixman/pixman-combine32.c | 15 +++ test/blitters-test.c | 2 +- test/thread-test.c| 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c index e0a6a98..af059eb 100644 --- a/pixman/pixman-combine32.c +++ b/pixman/pixman-combine32.c @@ -742,15 +742,14 @@ PDF_SEPARABLE_BLEND_MODE (lighten) static inline uint32_t blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -if (s = as) -{ - return d == 0 ? 0 : DIV_ONE_UN8 (as * ad); -} +if (d == 0) +return 0; +else if (as * d = ad * (as - s)) + return DIV_ONE_UN8 (as * ad); +else if (as - s == 0) +return DIV_ONE_UN8 (as * ad); else -{ - uint32_t r = d * as / (as - s); - return DIV_ONE_UN8 (as * MIN (r, ad)); -} +return DIV_ONE_UN8 (as * ((d * as) / ((as - s; } PDF_SEPARABLE_BLEND_MODE (color_dodge) diff --git a/test/blitters-test.c b/test/blitters-test.c index 920cbbb..396b5b5 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -394,6 +394,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main(blitters, 200, - 0xAC8FDA98, + 0x6A783AD5, test_composite, argc, argv); } diff --git a/test/thread-test.c b/test/thread-test.c index f24c31d..a0c7819 100644 --- a/test/thread-test.c +++ b/test/thread-test.c @@ -181,7 +181,7 @@ main (void) crc32 = compute_crc32 (0, crc32s, sizeof crc32s); -#define EXPECTED 0xFD497D8D +#define EXPECTED 0x12F4B484 if (crc32 != EXPECTED) { -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 3/5] pixman-combine32: Rename a number of variable from sa/sca to as/s
There are no semantic changes, just variables renames. The motivation for these renames is so that the names are shorter and better match the one used in the comments. --- pixman/pixman-combine32.c | 199 +++--- 1 file changed, 99 insertions(+), 100 deletions(-) diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c index ae9eed4..e0a6a98 100644 --- a/pixman/pixman-combine32.c +++ b/pixman/pixman-combine32.c @@ -641,9 +641,9 @@ combine_multiply_ca (pixman_implementation_t *imp, *= ad * s + as * d - s * d */ static inline uint32_t -blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_screen (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca); +return DIV_ONE_UN8 (s * ad + d * as - s * d); } PDF_SEPARABLE_BLEND_MODE (screen) @@ -671,15 +671,16 @@ PDF_SEPARABLE_BLEND_MODE (screen) * as * ad - 2 * (ad - d) * (as - s) */ static inline uint32_t -blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_overlay (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -uint32_t rca; +uint32_t r; -if (2 * dca da) - rca = 2 * sca * dca; +if (2 * d ad) + r = 2 * s * d; else - rca = sa * da - 2 * (da - dca) * (sa - sca); -return DIV_ONE_UN8 (rca); + r = as * ad - 2 * (ad - d) * (as - s); + +return DIV_ONE_UN8 (r); } PDF_SEPARABLE_BLEND_MODE (overlay) @@ -692,12 +693,11 @@ PDF_SEPARABLE_BLEND_MODE (overlay) * = MIN (as * d, ad * s) */ static inline uint32_t -blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_darken (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -uint32_t s, d; +s = ad * s; +d = as * d; -s = sca * da; -d = dca * sa; return DIV_ONE_UN8 (s d ? d : s); } @@ -711,12 +711,11 @@ PDF_SEPARABLE_BLEND_MODE (darken) * = MAX (as * d, ad * s) */ static inline uint32_t -blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_lighten (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -uint32_t s, d; - -s = sca * da; -d = dca * sa; +s = ad * s; +d = as * d; + return DIV_ONE_UN8 (s d ? s : d); } @@ -741,16 +740,16 @@ PDF_SEPARABLE_BLEND_MODE (lighten) * */ static inline uint32_t -blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -if (sca = sa) +if (s = as) { - return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da); + return d == 0 ? 0 : DIV_ONE_UN8 (as * ad); } else { - uint32_t rca = dca * sa / (sa - sca); - return DIV_ONE_UN8 (sa * MIN (rca, da)); + uint32_t r = d * as / (as - s); + return DIV_ONE_UN8 (as * MIN (r, ad)); } } @@ -777,16 +776,16 @@ PDF_SEPARABLE_BLEND_MODE (color_dodge) * ad * as - as * as * (ad - d) / s */ static inline uint32_t -blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -if (sca == 0) +if (s == 0) { - return dca da ? 0 : DIV_ONE_UN8 (sa * da); + return d ad ? 0 : DIV_ONE_UN8 (as * ad); } else { - uint32_t rca = (da - dca) * sa / sca; - return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca)); + uint32_t r = (ad - d) * as / s; + return DIV_ONE_UN8 (as * (MAX (r, ad) - r)); } } @@ -810,12 +809,12 @@ PDF_SEPARABLE_BLEND_MODE (color_burn) * as * ad - 2 * (ad - d) * (as - s) */ static inline uint32_t -blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa) +blend_hard_light (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -if (2 * sca sa) - return DIV_ONE_UN8 (2 * sca * dca); +if (2 * s as) + return DIV_ONE_UN8 (2 * s * d); else - return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca)); + return DIV_ONE_UN8 (as * ad - 2 * (ad - d) * (as - s)); } PDF_SEPARABLE_BLEND_MODE (hard_light) @@ -838,38 +837,38 @@ PDF_SEPARABLE_BLEND_MODE (hard_light) * d * as + (sqrt (d * ad) - d) * (2 * s - as); */ static inline uint32_t -blend_soft_light (uint32_t dca_org, - uint32_t da_org, - uint32_t sca_org, - uint32_t sa_org) -{ -double dca = dca_org * (1.0 / MASK); -double da = da_org * (1.0 / MASK); -double sca = sca_org * (1.0 / MASK); -double sa = sa_org * (1.0 / MASK); -double rca; - -if (2 * sca sa) +blend_soft_light (uint32_t d_org, + uint32_t ad_org, + uint32_t s_org, + uint32_t as_org) +{ +double d = d_org * (1.0 / MASK); +double ad = ad_org * (1.0 / MASK); +double s = s_org * (1.0 / MASK); +double as = as_org * (1.0 / MASK); +double r; + +if (2 * s as) { - if (da ==
[Pixman] [PATCH 5/5] Make code for color burn follow the math more closely
For superluminescent destinations, the old code could underflow in uint32_t r = (ad - d) * as / s; when (ad - d) was negative. The new code avoids this problem (and therefore causes changes in the checksums of thread-test and blitters-test), but it is likely still buggy due to the use of unsigned variables and other issues in the blend mode code. --- pixman/pixman-combine32.c | 15 +++ test/blitters-test.c | 2 +- test/thread-test.c| 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c index af059eb..54946cc 100644 --- a/pixman/pixman-combine32.c +++ b/pixman/pixman-combine32.c @@ -777,15 +777,14 @@ PDF_SEPARABLE_BLEND_MODE (color_dodge) static inline uint32_t blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as) { -if (s == 0) -{ - return d ad ? 0 : DIV_ONE_UN8 (as * ad); -} +if (d = ad) + return DIV_ONE_UN8 (ad * as); +else if (as * ad - as * d = ad * s) + return 0; +else if (s == 0) + return 0; else -{ - uint32_t r = (ad - d) * as / s; - return DIV_ONE_UN8 (as * (MAX (r, ad) - r)); -} + return DIV_ONE_UN8 (ad * as - (as * as * (ad - d)) / s); } PDF_SEPARABLE_BLEND_MODE (color_burn) diff --git a/test/blitters-test.c b/test/blitters-test.c index 396b5b5..ea03f47 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -394,6 +394,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main(blitters, 200, - 0x6A783AD5, + 0xE0A07495, test_composite, argc, argv); } diff --git a/test/thread-test.c b/test/thread-test.c index a0c7819..71b84f0 100644 --- a/test/thread-test.c +++ b/test/thread-test.c @@ -181,7 +181,7 @@ main (void) crc32 = compute_crc32 (0, crc32s, sizeof crc32s); -#define EXPECTED 0x12F4B484 +#define EXPECTED 0xE299B18E if (crc32 != EXPECTED) { -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 2/3] Move generated affine fetchers into pixman-fast-path.c
From: Søren Sandmann Pedersen s...@redhat.com The generated fetchers for NEAREST, BILINEAR, and SEPARABLE_CONVOLUTION filters are fast paths and so they belong in pixman-fast-path.c --- pixman/pixman-bits-image.c | 530 pixman/pixman-fast-path.c | 530 2 files changed, 530 insertions(+), 530 deletions(-) diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c index 35247f9..f9121a3 100644 --- a/pixman/pixman-bits-image.c +++ b/pixman/pixman-bits-image.c @@ -505,472 +505,6 @@ bits_image_fetch_general (pixman_iter_t *iter, return buffer; } -typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x); - -static force_inline void -bits_image_fetch_separable_convolution_affine (pixman_image_t * image, - int offset, - int line, - int width, - uint32_t * buffer, - const uint32_t * mask, - - convert_pixel_t convert_pixel, - pixman_format_code_t format, - pixman_repeat_t repeat_mode) -{ -bits_image_t *bits = image-bits; -pixman_fixed_t *params = image-common.filter_params; -int cwidth = pixman_fixed_to_int (params[0]); -int cheight = pixman_fixed_to_int (params[1]); -int x_off = ((cwidth 16) - pixman_fixed_1) 1; -int y_off = ((cheight 16) - pixman_fixed_1) 1; -int x_phase_bits = pixman_fixed_to_int (params[2]); -int y_phase_bits = pixman_fixed_to_int (params[3]); -int x_phase_shift = 16 - x_phase_bits; -int y_phase_shift = 16 - y_phase_bits; -pixman_fixed_t vx, vy; -pixman_fixed_t ux, uy; -pixman_vector_t v; -int k; - -/* reference point is the center of the pixel */ -v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2; -v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2; -v.vector[2] = pixman_fixed_1; - -if (!pixman_transform_point_3d (image-common.transform, v)) - return; - -ux = image-common.transform-matrix[0][0]; -uy = image-common.transform-matrix[1][0]; - -vx = v.vector[0]; -vy = v.vector[1]; - -for (k = 0; k width; ++k) -{ - pixman_fixed_t *y_params; - int satot, srtot, sgtot, sbtot; - pixman_fixed_t x, y; - int32_t x1, x2, y1, y2; - int32_t px, py; - int i, j; - - if (mask !mask[k]) - goto next; - - /* Round x and y to the middle of the closest phase before continuing. This -* ensures that the convolution matrix is aligned right, since it was -* positioned relative to a particular phase (and not relative to whatever -* exact fraction we happen to get here). -*/ - x = ((vx x_phase_shift) x_phase_shift) + ((1 x_phase_shift) 1); - y = ((vy y_phase_shift) y_phase_shift) + ((1 y_phase_shift) 1); - - px = (x 0x) x_phase_shift; - py = (y 0x) y_phase_shift; - - x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off); - y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off); - x2 = x1 + cwidth; - y2 = y1 + cheight; - - satot = srtot = sgtot = sbtot = 0; - - y_params = params + 4 + (1 x_phase_bits) * cwidth + py * cheight; - - for (i = y1; i y2; ++i) - { - pixman_fixed_t fy = *y_params++; - - if (fy) - { - pixman_fixed_t *x_params = params + 4 + px * cwidth; - - for (j = x1; j x2; ++j) - { - pixman_fixed_t fx = *x_params++; - int rx = j; - int ry = i; - - if (fx) - { - pixman_fixed_t f; - uint32_t pixel, mask; - uint8_t *row; - - mask = PIXMAN_FORMAT_A (format)? 0 : 0xff00; - - if (repeat_mode != PIXMAN_REPEAT_NONE) - { - repeat (repeat_mode, rx, bits-width); - repeat (repeat_mode, ry, bits-height); - - row = (uint8_t *)bits-bits + bits-rowstride * 4 * ry; - pixel = convert_pixel (row, rx) | mask; - } - else - { - if (rx 0 || ry 0 || rx = bits-width || ry = bits-height) - { - pixel = 0; - } - else - { - row
[Pixman] [PATCH 1/3] {scaling, affine, composite-traps}-test: Use compute_crc32_for_image()
By using this function instead of compute_crc32() the alpha masking code and the call to image_endian_swap() are not duplicated. --- test/affine-test.c | 12 ++-- test/composite-traps-test.c | 11 +-- test/scaling-test.c | 12 ++-- 3 files changed, 5 insertions(+), 30 deletions(-) diff --git a/test/affine-test.c b/test/affine-test.c index c1649ed..3a37d7f 100644 --- a/test/affine-test.c +++ b/test/affine-test.c @@ -273,15 +273,8 @@ test_composite (int testnum, pixman_image_composite (op, src_img, NULL, dst_img, src_x, src_y, 0, 0, dst_x, dst_y, w, h); -if (dst_fmt == PIXMAN_x8r8g8b8) -{ - /* ignore unused part */ - for (i = 0; i dst_stride * dst_height / 4; i++) - dstbuf[i] = 0xFF; -} - -image_endian_swap (dst_img); - +crc32 = compute_crc32_for_image (0, dst_img); + if (verbose) { int j; @@ -298,7 +291,6 @@ test_composite (int testnum, pixman_image_unref (src_img); pixman_image_unref (dst_img); -crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); free (srcbuf); free (dstbuf); diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c index 2983eae..34ae340 100644 --- a/test/composite-traps-test.c +++ b/test/composite-traps-test.c @@ -214,14 +214,7 @@ test_composite (int testnum, pixman_composite_trapezoids (op, src_img, dst_img, mask_format, src_x, src_y, dst_x, dst_y, n_traps, traps); -if (dst_format == PIXMAN_x8r8g8b8) -{ - /* ignore unused part */ - for (i = 0; i dst_stride * dst_height / 4; i++) - dst_bits[i] = 0xFF; -} - -image_endian_swap (dst_img); +crc32 = compute_crc32_for_image (0, dst_img); if (verbose) { @@ -236,8 +229,6 @@ test_composite (int testnum, } } -crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height); - fence_free (dst_bits); pixman_image_unref (src_img); diff --git a/test/scaling-test.c b/test/scaling-test.c index b4142a7..04ecb63 100644 --- a/test/scaling-test.c +++ b/test/scaling-test.c @@ -340,15 +340,8 @@ test_composite (int testnum, pixman_image_composite (op, src_img, mask_img, dst_img, src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); -if (dst_fmt == PIXMAN_x8r8g8b8 || dst_fmt == PIXMAN_x8b8g8r8) -{ - /* ignore unused part */ - for (i = 0; i dst_stride * dst_height / 4; i++) - dstbuf[i] = 0xFF; -} - -image_endian_swap (dst_img); - +crc32 = compute_crc32_for_image (0, dst_img); + if (verbose) { int j; @@ -366,7 +359,6 @@ test_composite (int testnum, pixman_image_unref (mask_img); pixman_image_unref (dst_img); -crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height); free (srcbuf); free (maskbuf); free (dstbuf); -- 1.7.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 3/3] test: Test negative strides
Pixman supports negative strides, but up until now they haven't been tested outside of stress-test. This commit adds testing of negative strides to blitters-test, scaling-test, affine-test, rotate-test, and composite-traps-test. --- test/affine-test.c | 22 -- test/blitters-test.c| 12 +++- test/composite-traps-test.c | 23 +++ test/rotate-test.c | 17 + test/scaling-test.c | 31 +-- test/utils.c|6 ++ 6 files changed, 98 insertions(+), 13 deletions(-) diff --git a/test/affine-test.c b/test/affine-test.c index 03d296f..8e19023 100644 --- a/test/affine-test.c +++ b/test/affine-test.c @@ -80,6 +80,18 @@ test_composite (int testnum, prng_randmemset (srcbuf, src_stride * src_height, 0); prng_randmemset (dstbuf, dst_stride * dst_height, 0); +if (prng_rand_n (2) == 0) +{ + srcbuf += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; +} + +if (prng_rand_n (2) == 0) +{ + dstbuf += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; +} + src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ? PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5; @@ -281,6 +293,12 @@ test_composite (int testnum, pixman_image_unref (src_img); pixman_image_unref (dst_img); +if (src_stride 0) + srcbuf += (src_stride / 4) * (src_height - 1); + +if (dst_stride 0) + dstbuf += (dst_stride / 4) * (dst_height - 1); + free (srcbuf); free (dstbuf); @@ -289,9 +307,9 @@ test_composite (int testnum, } #if BILINEAR_INTERPOLATION_BITS == 7 -#define CHECKSUM 0xBC00B1DF +#define CHECKSUM 0xBE724CFE #elif BILINEAR_INTERPOLATION_BITS == 4 -#define CHECKSUM 0xA227306B +#define CHECKSUM 0x79BBE501 #else #define CHECKSUM 0x #endif diff --git a/test/blitters-test.c b/test/blitters-test.c index 2120daf..af94835 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -57,6 +57,13 @@ create_random_image (pixman_format_code_t *allowed_formats, prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF); } +/* test negative stride */ +if (prng_rand_n (4) == 0) +{ + buf += (stride / 4) * (height - 1); + stride = - stride; +} + img = pixman_image_create_bits (fmt, width, height, buf, stride); if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_COLOR) @@ -89,6 +96,9 @@ free_random_image (uint32_t initcrc, if (fmt != PIXMAN_null) crc32 = compute_crc32_for_image (initcrc, img); +if (img-bits.rowstride 0) + data += img-bits.rowstride * (img-bits.height - 1); + pixman_image_unref (img); free (data); @@ -385,6 +395,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main(blitters, 200, - 0x0CF3283B, + 0xAC8FDA98, test_composite, argc, argv); } diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c index 44d5012..86a0355 100644 --- a/test/composite-traps-test.c +++ b/test/composite-traps-test.c @@ -97,19 +97,25 @@ test_composite (int testnum, int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1; int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1; int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * src_bpp; - uint32_t *bits; + uint32_t *bits, *orig; src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2); src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2); src_stride = (src_stride + 3) ~3; - bits = (uint32_t *)make_random_bytes (src_stride * src_height); + orig = bits = (uint32_t *)make_random_bytes (src_stride * src_height); + if (prng_rand_n (2) == 0) + { + bits += (src_stride / 4) * (src_height - 1); + src_stride = - src_stride; + } + src_img = pixman_image_create_bits ( src_format, src_width, src_height, bits, src_stride); - pixman_image_set_destroy_function (src_img, destroy_bits, bits); + pixman_image_set_destroy_function (src_img, destroy_bits, orig); if (prng_rand_n (8) == 0) { @@ -153,6 +159,12 @@ test_composite (int testnum, dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height); + if (prng_rand_n (2) == 0) + { + dst_bits += (dst_stride / 4) * (dst_height - 1); + dst_stride = - dst_stride; + } + dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2); dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2); @@ -219,6 +231,9 @@ test_composite (int testnum, if (verbose) print_image (dst_img); +if (dst_stride 0) + dst_bits +=
[Pixman] [PATCH 2/3] test: Share the image printing code
The affine-test, blitters-test, and scaling-test all have the ability to print out the bytes of the destination image. Share this code by moving it to utils.c. At the same time make the code work correctly with negative strides. --- test/affine-test.c | 12 +--- test/blitters-test.c| 19 +-- test/composite-traps-test.c | 12 +--- test/scaling-test.c | 12 +--- test/utils.c| 32 test/utils.h|4 6 files changed, 40 insertions(+), 51 deletions(-) diff --git a/test/affine-test.c b/test/affine-test.c index 3a37d7f..03d296f 100644 --- a/test/affine-test.c +++ b/test/affine-test.c @@ -276,17 +276,7 @@ test_composite (int testnum, crc32 = compute_crc32_for_image (0, dst_img); if (verbose) -{ - int j; - - for (i = 0; i dst_height; i++) - { - for (j = 0; j dst_stride; j++) - printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j)); - - printf (\n); - } -} + print_image (dst_img); pixman_image_unref (src_img); pixman_image_unref (dst_img); diff --git a/test/blitters-test.c b/test/blitters-test.c index a2c6ff4..2120daf 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -222,7 +222,6 @@ static pixman_format_code_t mask_fmt_list[] = { uint32_t test_composite (int testnum, int verbose) { -int i; pixman_image_t *src_img = NULL; pixman_image_t *dst_img = NULL; pixman_image_t *mask_img = NULL; @@ -355,23 +354,7 @@ test_composite (int testnum, int verbose) src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h); if (verbose) -{ - int j; - - printf (---\n); - for (i = 0; i dst_height; i++) - { - for (j = 0; j dst_stride; j++) - { - if (j == (dst_width * PIXMAN_FORMAT_BPP (dst_fmt) + 7) / 8) - printf (| ); - - printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j)); - } - printf (\n); - } - printf (---\n); -} + print_image (dst_img); free_random_image (0, src_img, PIXMAN_null); crc32 = free_random_image (0, dst_img, dst_fmt); diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c index 34ae340..44d5012 100644 --- a/test/composite-traps-test.c +++ b/test/composite-traps-test.c @@ -217,17 +217,7 @@ test_composite (int testnum, crc32 = compute_crc32_for_image (0, dst_img); if (verbose) -{ - int j; - - for (i = 0; i dst_height; i++) - { - for (j = 0; j dst_stride; j++) - printf (%02X , *((uint8_t *)dst_bits + i * dst_stride + j)); - - printf (\n); - } -} + print_image (dst_img); fence_free (dst_bits); diff --git a/test/scaling-test.c b/test/scaling-test.c index 04ecb63..0778d2d 100644 --- a/test/scaling-test.c +++ b/test/scaling-test.c @@ -343,17 +343,7 @@ test_composite (int testnum, crc32 = compute_crc32_for_image (0, dst_img); if (verbose) -{ - int j; - - for (i = 0; i dst_height; i++) - { - for (j = 0; j dst_stride; j++) - printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j)); - - printf (\n); - } -} + print_image (dst_img); pixman_image_unref (src_img); pixman_image_unref (mask_img); diff --git a/test/utils.c b/test/utils.c index 3d1ba22..a693f30 100644 --- a/test/utils.c +++ b/test/utils.c @@ -238,6 +238,38 @@ compute_crc32_for_image (uint32_tcrc32, return crc32; } +void +print_image (pixman_image_t *image) +{ +int i, j; +int width, height, stride; +pixman_format_code_t format; +uint8_t *buffer; + +width = pixman_image_get_width (image); +height = pixman_image_get_height (image); +stride = pixman_image_get_stride (image); +format = pixman_image_get_format (image); +buffer = (uint8_t *)pixman_image_get_data (image); + +if (stride 0) + stride = - stride; + +printf (---\n); +for (i = 0; i height; i++) +{ + for (j = 0; j stride; j++) + { + if (j == (width * PIXMAN_FORMAT_BPP (format) + 7) / 8) + printf (| ); + + printf (%02X , *((uint8_t *)buffer + i * stride + j)); + } + printf (\n); +} +printf (---\n); +} + /* perform endian conversion of pixel data */ void diff --git a/test/utils.h b/test/utils.h index c278151..28b7193 100644 --- a/test/utils.h +++ b/test/utils.h @@ -63,6 +63,10 @@ uint32_t compute_crc32_for_image (uint32_tin_crc32, pixman_image_t *image); +/* Print the image in hexadecimal */ +void +print_image (pixman_image_t *image); + /* Returns TRUE if running on a little endian system */ static force_inline pixman_bool_t --
[Pixman] [PATCHv2 0/3] SSSE3 iterator for bilinear scaling
Here is a new version of the bilinear scaler that fixes Matt's and Siarhei's comments and also uses movdqu instead of movdqa for the writes to iter-buffer. This ensures that the iterator doesn't impose new alignment restrictions that could interfere with the direct-to-destination optimizations. Even with movdqu there is still a benefit from aligning the iter-buffer, so I'm keeping the patch that aligns the buffers in pixman-general. Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 3/3] ssse3: Add iterator for separable bilinear scaling
This new iterator uses the SSSE3 instructions pmaddubsw and pabsw to implement a fast iterator for bilinear scaling. There is a graph here recording the per-pixel time for various bilinear scaling algorithms as reported by scaling-bench: http://people.freedesktop.org/~sandmann/ssse3.v2/ssse3.v2.png As the graph shows, this new iterator is clearly faster than the existing C iterator, and when used with an SSE2 combiner, it is also faster than the existing SSE2 fast paths for upscaling, though not for downscaling. Another graph: http://people.freedesktop.org/~sandmann/ssse3.v2/movdqu.png shows the difference between writing to iter-buffer with movdqa, movdqu on an aligned buffer, and movdqu on a deliberately unaligned buffer. Since the differences are very small, the patch here avoids using movdqa because imposing alignment restrictions on iter-buffer may interfere with other optimizations, such as writing directly to the destination image. The data was measured with scaling-bench on a Sandy Bridge Core i3-2350M @ 2.3GHz and is available in this directory: http://people.freedesktop.org/~sandmann/ssse3.v2/ where there is also a Gnumeric spreadsheet ssse3.v2.gnumeric containing the per-pixel values and the graph. V2: - Use uintptr_t instead of unsigned long in the ALIGN macro - Use _mm_storel_epi64 instead of _mm_cvtsi128_si64 as the latter form is not available on x86-32. - Use _mm_storeu_si128() instead of _mm_store_si128() to avoid imposing alignment requirements on iter-buffer --- pixman/pixman-ssse3.c | 312 ++ 1 file changed, 312 insertions(+) diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c index 19d71e7..34763e2 100644 --- a/pixman/pixman-ssse3.c +++ b/pixman/pixman-ssse3.c @@ -35,6 +35,316 @@ #include pixman-private.h #include pixman-inlines.h +typedef struct +{ +inty; +uint64_t * buffer; +} line_t; + +typedef struct +{ +line_t line0; +line_t line1; +pixman_fixed_t y; +pixman_fixed_t x; +uint64_t data[1]; +} bilinear_info_t; + +static void +ssse3_fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ +uint32_t *bits = image-bits + y * image-rowstride; +__m128i vx = _mm_set_epi16 ( + - (x + 1), x, - (x + 1), x, + - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); +__m128i vux = _mm_set_epi16 ( + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); +__m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); +__m128i *b = (__m128i *)line-buffer; +__m128i vrl0, vrl1; + +while ((n -= 2) = 0) +{ + __m128i vw, vr, s; + + vrl1 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x + ux))); + /* vrl1: R1, L1 */ + +final_pixel: + vrl0 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x))); + /* vrl0: R0, L0 */ + + /* The weights are based on vx which is a vector of +* +*- (x + 1), x, - (x + 1), x, +* - (x + ux + 1), x + ux, - (x + ux + 1), x + ux +* +* so the 16 bit weights end up like this: +* +*iw0, w0, iw0, w0, iw1, w1, iw1, w1 +* +* and after shifting and packing, we get these bytes: +* +*iw0, w0, iw0, w0, iw1, w1, iw1, w1, +*iw0, w0, iw0, w0, iw1, w1, iw1, w1, +* +* which means the first and the second input pixel +* have to be interleaved like this: +* +*la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, +*lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 +* +* before maddubsw can be used. +*/ + + vw = _mm_add_epi16 ( + vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 +*/ + + vw = _mm_packus_epi16 (vw, vw); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, +* iw0, w0, iw0, w0, iw1, w1, iw1, w1 +*/ + vx = _mm_add_epi16 (vx, vux); + + x += 2 * ux; + + vr = _mm_unpacklo_epi16 (vrl1, vrl0); + /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ + + s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); + /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ + + vr = _mm_unpackhi_epi8 (vr, s); + /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, +* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 +*/ + + vr = _mm_maddubs_epi16 (vr, vw); + + /* When the weight is 0, the inverse weight is +* 128 which can't be represented in a signed byte. +* As a result maddubsw computes the following: +* +* r = l * -128 + r * 0 +* +* rather than the desired +* +* r = l * 128 + r
[Pixman] [PATCHv2 1/3] general: Ensure that iter buffers are aligned to 16 bytes
At the moment iter buffers are only guaranteed to be aligned to a 4 byte boundary. SIMD implementations benefit from the buffers being aligned to 16 bytes, so ensure this is the case. V2: - Use uintptr_t instead of unsigned long - allocate 3 * SCANLINE_BUFFER_LENGTH byte on stack rather than just SCANLINE_BUFFER_LENGTH - use sizeof (stack_scanline_buffer) instead of SCANLINE_BUFFER_LENGTH to determine overflow --- pixman/pixman-general.c | 22 +++--- pixman/pixman-private.h | 3 +++ pixman/pixman-utils.c | 9 + 3 files changed, 27 insertions(+), 7 deletions(-) diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c index 6310bff..a653fa7 100644 --- a/pixman/pixman-general.c +++ b/pixman/pixman-general.c @@ -114,7 +114,7 @@ general_composite_rect (pixman_implementation_t *imp, pixman_composite_info_t *info) { PIXMAN_COMPOSITE_ARGS (info); -uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8]; +uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH]; uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer; uint8_t *src_buffer, *mask_buffer, *dest_buffer; pixman_iter_t src_iter, mask_iter, dest_iter; @@ -137,17 +137,25 @@ general_composite_rect (pixman_implementation_t *imp, Bpp = 16; } -if (width * Bpp SCANLINE_BUFFER_LENGTH) +#define ALIGN(addr)\ +((uint8_t *)uintptr_t)(addr)) + 15) (~15))) + +src_buffer = ALIGN (scanline_buffer); +mask_buffer = ALIGN (src_buffer + width * Bpp); +dest_buffer = ALIGN (mask_buffer + width * Bpp); + +if (ALIGN (dest_buffer + width * Bpp) + scanline_buffer + sizeof (stack_scanline_buffer)) { - scanline_buffer = pixman_malloc_abc (width, 3, Bpp); + scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3); if (!scanline_buffer) return; -} -src_buffer = scanline_buffer; -mask_buffer = src_buffer + width * Bpp; -dest_buffer = mask_buffer + width * Bpp; + src_buffer = ALIGN (scanline_buffer); + mask_buffer = ALIGN (src_buffer + width * Bpp); + dest_buffer = ALIGN (mask_buffer + width * Bpp); +} if (width_flag == ITER_WIDE) { diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 120196d..535117d 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -787,6 +787,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b); void * pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c); +void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c); + pixman_bool_t _pixman_multiply_overflows_size (size_t a, size_t b); diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c index 98723a8..4a3a835 100644 --- a/pixman/pixman-utils.c +++ b/pixman/pixman-utils.c @@ -49,6 +49,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int b) } void * +pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c) +{ +if (!b || a = INT32_MAX / b || (a * b) INT32_MAX - c) + return NULL; + +return malloc (a * b + c); +} + +void * pixman_malloc_ab (unsigned int a, unsigned int b) { -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCHv2 2/3] Add empty SSSE3 implementation
This commit adds a new, empty SSSE3 implementation and the associated build system support. configure.ac: detect whether the compiler understands SSSE3 intrinsics and set up the required CFLAGS Makefile.am:Add libpixman-ssse3.la pixman-x86.c: Add X86_SSSE3 feature flag and detect it in detect_cpu_features(). pixman-ssse3.c: New file with an empty SSSE3 implementation V2: Remove SSSE3_LDFLAGS since it isn't necessary unless Solaris support is added. --- configure.ac| 45 pixman/Makefile.am | 12 pixman/pixman-private.h | 5 + pixman/pixman-ssse3.c | 50 + pixman/pixman-x86.c | 15 +-- 5 files changed, 125 insertions(+), 2 deletions(-) create mode 100644 pixman/pixman-ssse3.c diff --git a/configure.ac b/configure.ac index daf4062..263c63e 100644 --- a/configure.ac +++ b/configure.ac @@ -437,6 +437,50 @@ fi AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes) dnl === +dnl Check for SSSE3 + +if test x$SSSE3_CFLAGS = x ; then +SSSE3_CFLAGS=-mssse3 -Winline +fi + +have_ssse3_intrinsics=no +AC_MSG_CHECKING(whether to use SSSE3 intrinsics) +xserver_save_CFLAGS=$CFLAGS +CFLAGS=$SSSE3_CFLAGS $CFLAGS + +AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ +#include mmintrin.h +#include xmmintrin.h +#include emmintrin.h +#include tmmintrin.h +int main () { +__m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c; +c = _mm_maddubs_epi16 (a, b); +return 0; +}]])], have_ssse3_intrinsics=yes) +CFLAGS=$xserver_save_CFLAGS + +AC_ARG_ENABLE(ssse3, + [AC_HELP_STRING([--disable-ssse3], + [disable SSSE3 fast paths])], + [enable_ssse3=$enableval], [enable_ssse3=auto]) + +if test $enable_ssse3 = no ; then + have_ssse3_intrinsics=disabled +fi + +if test $have_ssse3_intrinsics = yes ; then + AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics]) +fi + +AC_MSG_RESULT($have_ssse3_intrinsics) +if test $enable_ssse3 = yes test $have_ssse3_intrinsics = no ; then + AC_MSG_ERROR([SSSE3 intrinsics not detected]) +fi + +AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes) + +dnl === dnl Other special flags needed when building code using MMX or SSE instructions case $host_os in solaris*) @@ -471,6 +515,7 @@ AC_SUBST(MMX_CFLAGS) AC_SUBST(MMX_LDFLAGS) AC_SUBST(SSE2_CFLAGS) AC_SUBST(SSE2_LDFLAGS) +AC_SUBST(SSSE3_CFLAGS) dnl === dnl Check for VMX/Altivec diff --git a/pixman/Makefile.am b/pixman/Makefile.am index b9ea754..b376d9a 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -52,6 +52,18 @@ libpixman_1_la_LIBADD += libpixman-sse2.la ASM_CFLAGS_sse2=$(SSE2_CFLAGS) endif +# ssse3 code +if USE_SSSE3 +noinst_LTLIBRARIES += libpixman-ssse3.la +libpixman_ssse3_la_SOURCES = \ + pixman-ssse3.c +libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS) +libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS) +libpixman_1_la_LIBADD += libpixman-ssse3.la + +ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS) +endif + # arm simd code if USE_ARM_SIMD noinst_LTLIBRARIES += libpixman-arm-simd.la diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 535117d..6ca13b2 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -593,6 +593,11 @@ pixman_implementation_t * _pixman_implementation_create_sse2 (pixman_implementation_t *fallback); #endif +#ifdef USE_SSSE3 +pixman_implementation_t * +_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback); +#endif + #ifdef USE_ARM_SIMD pixman_implementation_t * _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback); diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c new file mode 100644 index 000..19d71e7 --- /dev/null +++ b/pixman/pixman-ssse3.c @@ -0,0 +1,50 @@ +/* + * Copyright © 2013 Soren Sandmann Pedersen + * Copyright © 2013 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
[Pixman] [PATCH 2/2] ssse3: Add iterator for separable bilinear scaling
This new iterator uses the SSSE3 instructions pmaddubsw and pabsw to implement a fast iterator for bilinear scaling. There is a graph here recording the per-pixel time for various bilinear scaling algorithms as reported by scaling-bench: http://people.freedesktop.org/~sandmann/ssse3/ssse3.png As the graph shows, this new iterator is clearly faster than the existing C iterator, and when used with an SSE2 combiner, it is also faster than the existing SSE2 fast paths except for the lowest scaling ratios. The data was measured on an Ivy Bridge i7-3520M @ 2.0GHz and is available in this directory: http://people.freedesktop.org/~sandmann/ssse3/ where there is also a Gnumeric spreadsheet ssse3.gnumeric containing the per-pixel values and the graph. --- pixman/pixman-ssse3.c | 312 + 1 files changed, 312 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c index 19d71e7..98d929b 100644 --- a/pixman/pixman-ssse3.c +++ b/pixman/pixman-ssse3.c @@ -35,6 +35,316 @@ #include pixman-private.h #include pixman-inlines.h +typedef struct +{ +inty; +uint64_t * buffer; +} line_t; + +typedef struct +{ +line_t line0; +line_t line1; +pixman_fixed_t y; +pixman_fixed_t x; +uint64_t data[1]; +} bilinear_info_t; + +static void +ssse3_fetch_horizontal (bits_image_t *image, line_t *line, + int y, pixman_fixed_t x, pixman_fixed_t ux, int n) +{ +uint32_t *bits = image-bits + y * image-rowstride; +__m128i vx = _mm_set_epi16 ( + - (x + 1), x, - (x + 1), x, + - (x + ux + 1), x + ux, - (x + ux + 1), x + ux); +__m128i vux = _mm_set_epi16 ( + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux, + - 2 * ux, 2 * ux, - 2 * ux, 2 * ux); +__m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0); +__m128i *b = (__m128i *)line-buffer; +__m128i vrl0, vrl1; + +while ((n -= 2) = 0) +{ + __m128i vw, vr, s; + + vrl1 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x + ux))); + /* vrl1: R1, L1 */ + +final_pixel: + vrl0 = _mm_loadl_epi64 ( + (__m128i *)(bits + pixman_fixed_to_int (x))); + /* vrl0: R0, L0 */ + + /* The weights are based on vx which is a vector of +* +*- (x + 1), x, - (x + 1), x, +* - (x + ux + 1), x + ux, - (x + ux + 1), x + ux +* +* so the 16 bit weights end up like this: +* +*iw0, w0, iw0, w0, iw1, w1, iw1, w1 +* +* and after shifting and packing, we get these bytes: +* +*iw0, w0, iw0, w0, iw1, w1, iw1, w1, +*iw0, w0, iw0, w0, iw1, w1, iw1, w1, +* +* which means the first and the second input pixel +* have to be interleaved like this: +* +*la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, +*lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 +* +* before maddubsw can be used. +*/ + + vw = _mm_add_epi16 ( + vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS)); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1 +*/ + + vw = _mm_packus_epi16 (vw, vw); + /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1, +* iw0, w0, iw0, w0, iw1, w1, iw1, w1 +*/ + vx = _mm_add_epi16 (vx, vux); + + x += 2 * ux; + + vr = _mm_unpacklo_epi16 (vrl1, vrl0); + /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */ + + s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2)); + /* s: lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */ + + vr = _mm_unpackhi_epi8 (vr, s); + /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1, +* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1 +*/ + + vr = _mm_maddubs_epi16 (vr, vw); + + /* When the weight is 0, the inverse weight is +* 128 which can't be represented in a signed byte. +* As a result maddubsw computes the following: +* +* r = l * -128 + r * 0 +* +* rather than the desired +* +* r = l * 128 + r * 0 +* +* We fix this by taking the absolute value of the +* result. +*/ + vr = _mm_abs_epi16 (vr); + + /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */ + _mm_store_si128 (b++, vr); +} + +if (n == -1) +{ + vrl1 = _mm_setzero_si128(); + goto final_pixel; +} + +line-y = y; +} + +static uint32_t * +ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask) +{ +pixman_fixed_t fx, ux; +bilinear_info_t *info = iter-data; +line_t *line0, *line1; +int y0, y1; +int32_t dist_y; +__m128i vw; +int i; + +fx = info-x; +ux = iter-image-common.transform-matrix[0][0]; + +y0 = pixman_fixed_to_int (info-y); +y1 = y0 + 1; + +
[Pixman] [PATCH 02/11] Add ITER_WIDE iter flag
This will be useful for putting iterators into tables where they can be looked up by iterator flags. Without this flag, wide iterators can only be recognized by the absence of ITER_NARROW, which makes testing for a match difficult. --- pixman/pixman-general.c | 20 +--- pixman/pixman-private.h | 13 +++-- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c index a4935c7..c674ffa 100644 --- a/pixman/pixman-general.c +++ b/pixman/pixman-general.c @@ -116,7 +116,7 @@ general_composite_rect (pixman_implementation_t *imp, pixman_iter_t src_iter, mask_iter, dest_iter; pixman_combine_32_func_t compose; pixman_bool_t component_alpha; -iter_flags_t narrow, src_iter_flags; +iter_flags_t width_flag, src_iter_flags; int Bpp; int i; @@ -124,12 +124,12 @@ general_composite_rect (pixman_implementation_t *imp, (!mask_image || mask_image-common.flags FAST_PATH_NARROW_FORMAT) (dest_image-common.flags FAST_PATH_NARROW_FORMAT)) { - narrow = ITER_NARROW; + width_flag = ITER_NARROW; Bpp = 4; } else { - narrow = 0; + width_flag = ITER_WIDE; Bpp = 16; } @@ -145,7 +145,7 @@ general_composite_rect (pixman_implementation_t *imp, mask_buffer = src_buffer + width * Bpp; dest_buffer = mask_buffer + width * Bpp; -if (!narrow) +if (width_flag == ITER_WIDE) { /* To make sure there aren't any NANs in the buffers */ memset (src_buffer, 0, width * Bpp); @@ -154,7 +154,7 @@ general_composite_rect (pixman_implementation_t *imp, } /* src iter */ -src_iter_flags = narrow | op_flags[op].src | ITER_SRC; +src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; _pixman_implementation_src_iter_init (imp-toplevel, src_iter, src_image, src_x, src_y, width, height, @@ -179,18 +179,16 @@ general_composite_rect (pixman_implementation_t *imp, _pixman_implementation_src_iter_init ( imp-toplevel, mask_iter, mask_image, mask_x, mask_y, width, height, mask_buffer, - ITER_SRC | narrow | (component_alpha? 0 : ITER_IGNORE_RGB), + ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), info-mask_flags); /* dest iter */ _pixman_implementation_dest_iter_init ( - imp-toplevel, dest_iter, - dest_image, dest_x, dest_y, width, height, dest_buffer, - ITER_DEST | narrow | op_flags[op].dst, - info-dest_flags); + imp-toplevel, dest_iter, dest_image, dest_x, dest_y, width, height, + dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info-dest_flags); compose = _pixman_implementation_lookup_combiner ( - imp-toplevel, op, component_alpha, narrow); + imp-toplevel, op, component_alpha, width_flag != ITER_WIDE); for (i = 0; i height; ++i) { diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 9b6353e..0fe86ca 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -212,7 +212,8 @@ typedef void (* pixman_iter_write_back_t) (pixman_iter_t *iter); typedef enum { -ITER_NARROW = (1 0), +ITER_NARROW = (1 0), +ITER_WIDE = (1 1), /* Localized alpha is when the alpha channel is used only to compute * the alpha value of the destination. This means that the computation @@ -229,15 +230,15 @@ typedef enum * we can treat it as if it were ARGB, which means in some cases we can * avoid copying it to a temporary buffer. */ -ITER_LOCALIZED_ALPHA = (1 1), -ITER_IGNORE_ALPHA =(1 2), -ITER_IGNORE_RGB = (1 3), +ITER_LOCALIZED_ALPHA = (1 2), +ITER_IGNORE_ALPHA =(1 3), +ITER_IGNORE_RGB = (1 4), /* These indicate whether the iterator is for a source * or a destination image */ -ITER_SRC = (1 4), -ITER_DEST =(1 5) +ITER_SRC = (1 5), +ITER_DEST =(1 6) } iter_flags_t; struct pixman_iter_t -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 05/11] sse2: Replace the fetcher_info_t table with a pixman_iter_info_t table
Similar to the changes to noop, put all the iterators into a table of pixman_iter_info_t and then do a generic search of that table during iterator initialization. --- pixman/pixman-sse2.c | 64 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 863bc18..344cc46 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -6340,47 +6340,53 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter-buffer; } -typedef struct +static void +iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) { -pixman_format_code_t format; -pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; +pixman_image_t *image = iter-image; +uint8_t *b = (uint8_t *)image-bits.bits; +int s = image-bits.rowstride * 4; + +iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; +iter-stride = s; +} -static const fetcher_info_t fetchers[] = +#define IMAGE_FLAGS\ +(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) + +static const pixman_iter_info_t sse2_iters[] = { -{ PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 }, -{ PIXMAN_r5g6b5, sse2_fetch_r5g6b5 }, -{ PIXMAN_a8, sse2_fetch_a8 }, -{ PIXMAN_null } +{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL +}, +{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, sse2_fetch_r5g6b5, NULL +}, +{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, sse2_fetch_a8, NULL +}, +{ PIXMAN_null }, }; static pixman_bool_t sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { -pixman_image_t *image = iter-image; +const pixman_iter_info_t *info; -#define FLAGS \ -(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) - -if ((iter-iter_flags ITER_NARROW) - (iter-image_flags FLAGS) == FLAGS) +for (info = sse2_iters; info-format != PIXMAN_null; ++info) { - const fetcher_info_t *f; - - for (f = fetchers[0]; f-format != PIXMAN_null; f++) + if ((info-format == PIXMAN_any || +info-format == iter-image-common.extended_format_code) + (info-image_flags iter-image_flags) == info-image_flags + (info-iter_flags iter-iter_flags) == info-iter_flags) { - if (image-common.extended_format_code == f-format) - { - uint8_t *b = (uint8_t *)image-bits.bits; - int s = image-bits.rowstride * 4; - - iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (f-format) / 8; - iter-stride = s; + iter-get_scanline = info-get_scanline; + iter-write_back = info-write_back; - iter-get_scanline = f-get_scanline; - return TRUE; - } + if (info-initializer) + info-initializer (iter, info); + return TRUE; } } -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 06/11] mmx: Replace the fetcher_info_t table with a pixman_iter_info_t table
Similar to the SSE2 commit, information about the iterators is stored in a table of pixman_iter_info_t. --- pixman/pixman-mmx.c | 64 + 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 746ecd6..02ec998 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3922,47 +3922,53 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter-buffer; } -typedef struct +static void +iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) { -pixman_format_code_t format; -pixman_iter_get_scanline_t get_scanline; -} fetcher_info_t; +pixman_image_t *image = iter-image; +uint8_t *b = (uint8_t *)image-bits.bits; +int s = image-bits.rowstride * 4; + +iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; +iter-stride = s; +} + +#define IMAGE_FLAGS\ +(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) -static const fetcher_info_t fetchers[] = +static const pixman_iter_info_t mmx_iters[] = { -{ PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 }, -{ PIXMAN_r5g6b5, mmx_fetch_r5g6b5 }, -{ PIXMAN_a8, mmx_fetch_a8 }, -{ PIXMAN_null } +{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL +}, +{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, mmx_fetch_r5g6b5, NULL +}, +{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, + iter_init_bits_stride, mmx_fetch_a8, NULL +}, +{ PIXMAN_null }, }; static pixman_bool_t mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { -pixman_image_t *image = iter-image; - -#define FLAGS \ -(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) +const pixman_iter_info_t *info; -if ((iter-iter_flags ITER_NARROW) - (iter-image_flags FLAGS) == FLAGS) +for (info = mmx_iters; info-format != PIXMAN_null; ++info) { - const fetcher_info_t *f; - - for (f = fetchers[0]; f-format != PIXMAN_null; f++) + if ((info-format == PIXMAN_any || +info-format == iter-image-common.extended_format_code) + (info-image_flags iter-image_flags) == info-image_flags + (info-iter_flags iter-iter_flags) == info-iter_flags) { - if (image-common.extended_format_code == f-format) - { - uint8_t *b = (uint8_t *)image-bits.bits; - int s = image-bits.rowstride * 4; + iter-get_scanline = info-get_scanline; + iter-write_back = info-write_back; - iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (f-format) / 8; - iter-stride = s; - - iter-get_scanline = f-get_scanline; - return TRUE; - } + if (info-initializer) + info-initializer (iter, info); + return TRUE; } } -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 07/11] fast: Replace the fetcher_info_t table with a pixman_iter_info_t table
Similar to the SSE2 and MMX patches, this commit replaces a table of fetcher_info_t with a table of pixman_iter_info_t, and similar to the noop patch, both fast_src_iter_init() and fast_dest_iter_init() are now doing exactly the same thing, so their code can be shared in a new function called fast_iter_init_common(). --- pixman/pixman-fast-path.c | 107 +++--- 1 file changed, 45 insertions(+), 62 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 247aea6..047675c 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -2261,46 +2261,55 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) } } -typedef struct +static void +iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) { -pixman_format_code_t format; -pixman_iter_get_scanline_t get_scanline; -pixman_iter_write_back_t write_back; -} fetcher_info_t; +pixman_image_t *image = iter-image; +uint8_t *b = (uint8_t *)image-bits.bits; +int s = image-bits.rowstride * 4; + +iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; +iter-stride = s; +} + +#define IMAGE_FLAGS\ +(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ + FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) -static const fetcher_info_t fetchers[] = +static const pixman_iter_info_t fast_iters[] = { -{ PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, -{ PIXMAN_null } +{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, + iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + +{ PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST, + iter_init_bits_stride, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + +{ PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, + ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, + iter_init_bits_stride, fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + +{ PIXMAN_null }, }; static pixman_bool_t -fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +fast_iter_init_common (pixman_implementation_t *imp, pixman_iter_t *iter) { -pixman_image_t *image = iter-image; - -#define FLAGS \ -(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ - FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) +const pixman_iter_info_t *info; -if ((iter-iter_flags ITER_NARROW) - (iter-image_flags FLAGS) == FLAGS) +for (info = fast_iters; info-format != PIXMAN_null; ++info) { - const fetcher_info_t *f; - - for (f = fetchers[0]; f-format != PIXMAN_null; f++) + if ((info-format == PIXMAN_any || +info-format == iter-image-common.extended_format_code) + (info-image_flags iter-image_flags) == info-image_flags + (info-iter_flags iter-iter_flags) == info-iter_flags) { - if (image-common.extended_format_code == f-format) - { - uint8_t *b = (uint8_t *)image-bits.bits; - int s = image-bits.rowstride * 4; - - iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (f-format) / 8; - iter-stride = s; + iter-get_scanline = info-get_scanline; + iter-write_back = info-write_back; - iter-get_scanline = f-get_scanline; - return TRUE; - } + if (info-initializer) + info-initializer (iter, info); + return TRUE; } } @@ -2308,42 +2317,16 @@ fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) } static pixman_bool_t -fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { -pixman_image_t *image = iter-image; - -if ((iter-iter_flags ITER_NARROW) - (iter-image_flags FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) -{ - const fetcher_info_t *f; - - for (f = fetchers[0]; f-format != PIXMAN_null; f++) - { - if (image-common.extended_format_code == f-format) - { - uint8_t *b = (uint8_t *)image-bits.bits; - int s = image-bits.rowstride * 4; - - iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (f-format) / 8; - iter-stride = s; - - if ((iter-iter_flags (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter-get_scanline = fast_dest_fetch_noop; - } - else - { - iter-get_scanline = f-get_scanline; - } - iter-write_back = f-write_back;
[Pixman] [PATCH 09/11] Add _pixman_implementation_iter_init() and use instead of _src/_dest_init()
A new field, 'iter_info', is added to the implementation struct, and all the implementations store a pointer to their iterator tables in it. A new function, _pixman_implementation_iter_init(), is then added that searches those tables, and the new function is called in pixman-general.c and pixman-image.c instead of the old _pixman_implementation_src_init() and _pixman_implementation_dest_init(). --- pixman/pixman-fast-path.c | 1 + pixman/pixman-general.c| 12 pixman/pixman-image.c | 2 +- pixman/pixman-implementation.c | 63 ++ pixman/pixman-mmx.c| 1 + pixman/pixman-noop.c | 1 + pixman/pixman-private.h| 13 + pixman/pixman-sse2.c | 1 + 8 files changed, 88 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 047675c..d5f707f 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -2336,6 +2336,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) imp-fill = fast_path_fill; imp-src_iter_init = fast_src_iter_init; imp-dest_iter_init = fast_dest_iter_init; +imp-iter_info = fast_iters; return imp; } diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c index 91e33c4..c469a81 100644 --- a/pixman/pixman-general.c +++ b/pixman/pixman-general.c @@ -196,9 +196,10 @@ general_composite_rect (pixman_implementation_t *imp, /* src iter */ src_iter_flags = width_flag | op_flags[op].src | ITER_SRC; -_pixman_implementation_src_iter_init (imp-toplevel, src_iter, src_image, - src_x, src_y, width, height, - src_buffer, src_iter_flags, info-src_flags); +_pixman_implementation_iter_init (imp-toplevel, src_iter, src_image, + src_x, src_y, width, height, + src_buffer, src_iter_flags, + info-src_flags); /* mask iter */ if ((src_iter_flags (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == @@ -216,14 +217,14 @@ general_composite_rect (pixman_implementation_t *imp, mask_image-common.component_alpha PIXMAN_FORMAT_RGB (mask_image-bits.format); -_pixman_implementation_src_iter_init ( +_pixman_implementation_iter_init ( imp-toplevel, mask_iter, mask_image, mask_x, mask_y, width, height, mask_buffer, ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB), info-mask_flags); /* dest iter */ -_pixman_implementation_dest_iter_init ( +_pixman_implementation_iter_init ( imp-toplevel, dest_iter, dest_image, dest_x, dest_y, width, height, dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, info-dest_flags); @@ -263,6 +264,7 @@ _pixman_implementation_create_general (void) imp-src_iter_init = general_src_iter_init; imp-dest_iter_init = general_dest_iter_init; +imp-iter_info = general_iters; return imp; } diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c index 78c8610..4f9c2f9 100644 --- a/pixman/pixman-image.c +++ b/pixman/pixman-image.c @@ -920,7 +920,7 @@ _pixman_image_get_solid (pixman_implementation_t *imp, pixman_iter_t iter; otherwise: - _pixman_implementation_src_iter_init ( + _pixman_implementation_iter_init ( imp, iter, image, 0, 0, 1, 1, (uint8_t *)result, ITER_NARROW | ITER_SRC, image-common.flags); diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c index cfb82bb..4bdc836 100644 --- a/pixman/pixman-implementation.c +++ b/pixman/pixman-implementation.c @@ -285,6 +285,69 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return FALSE; } +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) +{ +return NULL; +} + +void +_pixman_implementation_iter_init (pixman_implementation_t *imp, + pixman_iter_t *iter, + pixman_image_t *image, + int x, + int y, + int width, + int height, + uint8_t *buffer, + iter_flags_t iter_flags, + uint32_t image_flags) +{ +pixman_format_code_t format; + +iter-image = image; +iter-buffer = (uint32_t *)buffer; +iter-x = x; +iter-y = y; +iter-width = width; +iter-height = height; +iter-iter_flags = iter_flags; +iter-image_flags = image_flags; + +if (!iter-image) +{ + iter-get_scanline =
[Pixman] [PATCH 11/11] Consolidate all the iter_init_bits_stride functions
The SSE2, MMX, and fast implementations all have a copy of the function iter_init_bits_stride that computes an image buffer and stride. Move that function to pixman-utils.c and share it among all the implementations. --- pixman/pixman-fast-path.c | 19 +-- pixman/pixman-mmx.c | 17 +++-- pixman/pixman-private.h | 3 +++ pixman/pixman-sse2.c | 17 +++-- pixman/pixman-utils.c | 11 +++ 5 files changed, 25 insertions(+), 42 deletions(-) diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c index 9af26af..3982dce 100644 --- a/pixman/pixman-fast-path.c +++ b/pixman/pixman-fast-path.c @@ -2261,17 +2261,6 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter) } } -static void -iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ -pixman_image_t *image = iter-image; -uint8_t *b = (uint8_t *)image-bits.bits; -int s = image-bits.rowstride * 4; - -iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; -iter-stride = s; -} - #define IMAGE_FLAGS\ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) @@ -2279,15 +2268,17 @@ iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) static const pixman_iter_info_t fast_iters[] = { { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC, - iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, + _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL }, { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST, - iter_init_bits_stride, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, + _pixman_iter_init_bits_stride, + fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS, ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA, - iter_init_bits_stride, fast_dest_fetch_noop, fast_write_back_r5g6b5 }, + _pixman_iter_init_bits_stride, + fast_dest_fetch_noop, fast_write_back_r5g6b5 }, { PIXMAN_null }, }; diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 861b856..c94d282 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -3922,17 +3922,6 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter-buffer; } -static void -iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ -pixman_image_t *image = iter-image; -uint8_t *b = (uint8_t *)image-bits.bits; -int s = image-bits.rowstride * 4; - -iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; -iter-stride = s; -} - #define IMAGE_FLAGS\ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) @@ -3940,13 +3929,13 @@ iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) static const pixman_iter_info_t mmx_iters[] = { { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW, - iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL + _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL }, { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW, - iter_init_bits_stride, mmx_fetch_r5g6b5, NULL + _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL }, { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW, - iter_init_bits_stride, mmx_fetch_a8, NULL + _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL }, { PIXMAN_null }, }; diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index aa0a842..af4a0b6 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -652,6 +652,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * region, uint32_t * _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask); +void +_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info); + /* These formats all have depth 0, so they * will never clash with any real ones */ diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index bc834b5..dde9235 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -6340,17 +6340,6 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask) return iter-buffer; } -static void -iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info) -{ -pixman_image_t *image = iter-image; -uint8_t *b = (uint8_t *)image-bits.bits; -int s = image-bits.rowstride * 4; - -iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) / 8; -iter-stride = s; -} - #define IMAGE_FLAGS\ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) @@ -6358,13 +6347,13
[Pixman] [PATCH] gtk-utils.c: Use cairo in show_image() rather than GdkPixbuf
From: Søren Sandmann Pedersen s...@redhat.com GdkPixbufs are not premultiplied, so when using them to display pixman images, there is some unecessary conversions going on: First the image is converted to non-premultiplied, and then GdkPixbuf premultiplies before sending the result to the X server. These conversions may cause the displayed image to not be exactly identical to the original. This patch just uses a cairo image surface instead, which avoids these conversions. Also make the comment about sRGB a little more concise. --- demos/gtk-utils.c | 53 + 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/demos/gtk-utils.c b/demos/gtk-utils.c index d7e946d..32d4aec 100644 --- a/demos/gtk-utils.c +++ b/demos/gtk-utils.c @@ -95,14 +95,31 @@ pixbuf_from_argb32 (uint32_t *bits, static gboolean on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data) { -GdkPixbuf *pixbuf = data; +pixman_image_t *pimage = data; +int width = pixman_image_get_width (pimage); +int height = pixman_image_get_height (pimage); +int stride = pixman_image_get_stride (pimage); +cairo_surface_t *cimage; +cairo_format_t format; +cairo_t *cr; + +if (pixman_image_get_format (pimage) == PIXMAN_x8r8g8b8) + format = CAIRO_FORMAT_RGB24; +else + format = CAIRO_FORMAT_ARGB32; + +cimage = cairo_image_surface_create_for_data ( + (uint8_t *)pixman_image_get_data (pimage), + format, width, height, stride); -gdk_draw_pixbuf (widget-window, NULL, -pixbuf, 0, 0, 0, 0, -gdk_pixbuf_get_width (pixbuf), -gdk_pixbuf_get_height (pixbuf), -GDK_RGB_DITHER_NONE, -0, 0); +cr = gdk_cairo_create (widget-window); + +cairo_rectangle (cr, 0, 0, width, height); +cairo_set_source_surface (cr, cimage, 0, 0); +cairo_fill (cr); + +cairo_destroy (cr); +cairo_surface_destroy (cimage); return TRUE; } @@ -111,7 +128,6 @@ void show_image (pixman_image_t *image) { GtkWidget *window; -GdkPixbuf *pixbuf; int width, height; int argc; char **argv; @@ -132,22 +148,15 @@ show_image (pixman_image_t *image) format = pixman_image_get_format (image); -/* Three cases: - * - * - image is a8r8g8b8_sRGB: we will display without modification - *under the assumption that the monitor is sRGB - * - * - image is a8r8g8b8: we will display without modification - *under the assumption that whoever created the image - *probably did it wrong by using sRGB inputs - * - * - other: we will convert to a8r8g8b8 under the assumption that - *whoever created the image probably did it wrong. +/* We always display the image as if it contains sRGB data. That + * means that no conversion should take place when the image + * has the a8r8g8b8_sRGB format. */ switch (format) { case PIXMAN_a8r8g8b8_sRGB: case PIXMAN_a8r8g8b8: +case PIXMAN_x8r8g8b8: copy = pixman_image_ref (image); break; @@ -161,11 +170,7 @@ show_image (pixman_image_t *image) break; } -pixbuf = pixbuf_from_argb32 (pixman_image_get_data (copy), -width, height, -pixman_image_get_stride (copy)); - -g_signal_connect (window, expose_event, G_CALLBACK (on_expose), pixbuf); +g_signal_connect (window, expose_event, G_CALLBACK (on_expose), copy); g_signal_connect (window, delete_event, G_CALLBACK (gtk_main_quit), NULL); gtk_widget_show (window); -- 1.7.11.7 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [RFC, PATCH 0/8] Floating point pipeline
Hi, The following patches change the 64 pipeline to use single precision floating point channels instead. The main benefit of this is that we get more range and precision so that we can support HDR image formats such as half precision floating point argb. Unlike 16 bpc, single precision floating point is enough for most people's needs. A secondary benefit is that this floating point pipeline could serve as a reference implementation in the test suite such that we can relax the requirement for bit-exact operation. Finally, floating point may be a better match for some SIMD instruction sets such as AVX. Downsides include that some chips do not have hardware floating point, that computation on four floating point channels may be slower than on four 16 bit channels for example due to more cache pressure or because floating point is just slower. Also note that the linear-sRGB scanline store routine is probably slower with these patches because it now uses a binary search instead of a simple table lookup. If someone has better suggestions here, I'm definitely interested. Comments appreciated. Thanks, Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 2/8] blitters-test: Prepare for floating point
From: Søren Sandmann Pedersen s...@redhat.com Comment out some formats in blitters-test that are going to rely on floating point in some upcoming patches. --- test/blitters-test.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/blitters-test.c b/test/blitters-test.c index 6a3cc86..a2a1ea9 100644 --- a/test/blitters-test.c +++ b/test/blitters-test.c @@ -172,10 +172,12 @@ static pixman_format_code_t img_fmt_list[] = { PIXMAN_x14r6g6b6, PIXMAN_r8g8b8, PIXMAN_b8g8r8, +#if 0 /* These are going to use floating point in the near future */ PIXMAN_x2r10g10b10, PIXMAN_a2r10g10b10, PIXMAN_x2b10g10r10, PIXMAN_a2b10g10r10, +#endif PIXMAN_a1r5g5b5, PIXMAN_x1r5g5b5, PIXMAN_a1b5g5r5, @@ -395,6 +397,6 @@ main (int argc, const char *argv[]) } return fuzzer_test_main(blitters, 200, - 0xA364B5BF, + 0x67951DE6, test_composite, argc, argv); } -- 1.7.11.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 1/8] glyph-test: Prepare for floating point
From: Søren Sandmann Pedersen s...@redhat.com In preparation for an upcoming change of the wide pipe to use floating point, comment out some formats in glyph-test that are going to be using floating point and update the CRC32 value to match. --- test/glyph-test.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/test/glyph-test.c b/test/glyph-test.c index 84de5aa..9dd5b41 100644 --- a/test/glyph-test.c +++ b/test/glyph-test.c @@ -30,10 +30,13 @@ static const pixman_format_code_t formats[] = PIXMAN_x14r6g6b6, PIXMAN_r8g8b8, PIXMAN_b8g8r8, +#if 0 +/* These use floating point */ PIXMAN_x2r10g10b10, PIXMAN_a2r10g10b10, PIXMAN_x2b10g10r10, PIXMAN_a2b10g10r10, +#endif PIXMAN_a1r5g5b5, PIXMAN_x1r5g5b5, PIXMAN_a1b5g5r5, @@ -329,7 +332,7 @@ test_glyphs (int testnum, int verbose) int main (int argc, const char *argv[]) { -return fuzzer_test_main (glyph, 3, -0x741CB2DB, +return fuzzer_test_main (glyph, 3, +0x79E74996, test_glyphs, argc, argv); } -- 1.7.11.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 3/8] Add pixman-combine-float.c
From: Søren Sandmann Pedersen s...@redhat.com This file contains floating point implementations of combiners for all pixman operators. These combiners operate on buffers containing single precision floating point pixels stored in (a, r, g, b) order. The combiners are added to the pixman_implementation_t struct, but nothing uses them yet. This commit incorporates a number of bug fixes contributed by Andrea Canciani. --- pixman/Makefile.sources | 1 + pixman/pixman-combine-float.c | 956 ++ pixman/pixman-general.c | 1 + pixman/pixman-private.h | 10 + 4 files changed, 968 insertions(+) create mode 100644 pixman/pixman-combine-float.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index cf7040f..96540ec 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -5,6 +5,7 @@ libpixman_sources = \ pixman-bits-image.c \ pixman-combine32.c \ pixman-combine64.c \ + pixman-combine-float.c \ pixman-conical-gradient.c \ pixman-x86.c\ pixman-mips.c \ diff --git a/pixman/pixman-combine-float.c b/pixman/pixman-combine-float.c new file mode 100644 index 000..9617e24 --- /dev/null +++ b/pixman/pixman-combine-float.c @@ -0,0 +1,956 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2010, 2012 Soren Sandmann Pedersen + * Copyright © 2010, 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Author: Soren Sandmann Pedersen (sandm...@cs.au.dk) + */ + +#ifdef HAVE_CONFIG_H +#include config.h +#endif + +#include math.h +#include string.h +#include float.h + +#include pixman-private.h + +typedef float (* combine_channel_t) (float sa, float s, float da, float d); + +static force_inline void +combine_inner (pixman_bool_t component, + float *dest, const float *src, const float *mask, int n_pixels, + combine_channel_t combine_a, combine_channel_t combine_c) +{ +int i; + +if (!mask) +{ + for (i = 0; i 4 * n_pixels; i += 4) + { + float sa = src[i + 0]; + float sr = src[i + 1]; + float sg = src[i + 2]; + float sb = src[i + 3]; + + float da = dest[i + 0]; + float dr = dest[i + 1]; + float dg = dest[i + 2]; + float db = dest[i + 3]; + + dest[i + 0] = combine_a (sa, sa, da, da); + dest[i + 1] = combine_c (sa, sr, da, dr); + dest[i + 2] = combine_c (sa, sg, da, dg); + dest[i + 3] = combine_c (sa, sb, da, db); + } +} +else +{ + for (i = 0; i 4 * n_pixels; i += 4) + { + float sa, sr, sg, sb; + float ma, mr, mg, mb; + float da, dr, dg, db; + + sa = src[i + 0]; + sr = src[i + 1]; + sg = src[i + 2]; + sb = src[i + 3]; + + if (component) + { + ma = mask[i + 0]; + mr = mask[i + 1]; + mg = mask[i + 2]; + mb = mask[i + 3]; + + sr *= mr; + sg *= mg; + sb *= mb; + + ma *= sa; + mr *= sa; + mg *= sa; + mb *= sa; + + sa = ma; + } + else + { + ma = mask[i + 0]; + + sa *= ma; + sr *= ma; + sg *= ma; + sb *= ma; + + ma = mr = mg = mb = sa; + } + + da = dest[i + 0]; + dr = dest[i + 1]; + dg = dest[i + 2]; + db = dest[i + 3
[Pixman] [PATCH 5/8] pixman-access.c: Add floating point accessor functions
From: Søren Sandmann Pedersen s...@redhat.com Three new function pointer fields are added to bits_image_t: fetch_scanline_float fetch_pixel_float store_scanline_float similar to the existing 32 and 64 bit accessors. The fetcher_info_t struct in pixman_access similarly gets a new get_scanline_float field. For most formats, the new get_scanline_float field is set to a new function fetch_scanline_generic_float() that first calls the 32 bit fetcher uses the 32 bit scanline fetcher and then expands these pixels to floating point. For the 10 bpc formats, new floating point accessors are added that use pixman_unorm_to_float() and pixman_float_to_unorm() to convert back and forth. The PIXMAN_a8r8g8b8_sRGB format is handled with a 256-entry table that maps 8 bit sRGB channels to linear single precision floating point numbers. The sRGB-linear direction can then be done with a simple table lookup. The other direction is currently done with 4096-entry table which works fine for 16 bit integers, but not so great for floating point. So instead this patch uses a binary search in the sRGB-linear table. The existing 32 bit accessors for the sRGB format are also converted to use this method. --- pixman/pixman-access.c | 619 ++--- pixman/pixman-bits-image.c | 40 ++- pixman/pixman-private.h| 8 + 3 files changed, 624 insertions(+), 43 deletions(-) diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c index 9feafc4..1eef621 100644 --- a/pixman/pixman-access.c +++ b/pixman/pixman-access.c @@ -31,6 +31,7 @@ #include stdlib.h #include string.h #include assert.h +#include math.h #include pixman-accessor.h #include pixman-private.h @@ -635,6 +636,231 @@ fetch_scanline_x2b10g10r10 (pixman_image_t *image, } } +/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded + * floating point numbers. We assume that single precision + * floating point follows the IEEE 754 format. + */ +static const uint32_t to_linear_u[256] = +{ +0x, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61, +0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a, +0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d, +0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152, +0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43, +0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e, +0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601, +0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9, +0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae, +0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380, +0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466, +0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65, +0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48, +0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728, +0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4, +0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16, +0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f, +0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50, +0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a, +0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702, +0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027, +0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf, +0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0, +0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281, +0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0, +0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a, +0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15, +0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14, +0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508, +0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64, +0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594, +0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8, +0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65, +0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237, +0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c, +0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680, +0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24, +0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e, +0x3f469c49, 0x3f4895ee
[Pixman] [PATCH 01/10] pixman-cpu.c: Rename disabled to _pixman_disabled() and export it
From: Søren Sandmann Pedersen s...@redhat.com --- pixman/pixman-cpu.c | 22 +++--- pixman/pixman-private.h |2 ++ 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index aa9036f..a0d2f8c 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -729,8 +729,8 @@ pixman_have_sse2 (void) #endif /* __amd64__ */ #endif -static pixman_bool_t -disabled (const char *name) +pixman_bool_t +_pixman_disabled (const char *name) { const char *env; @@ -767,44 +767,44 @@ _pixman_choose_implementation (void) imp = _pixman_implementation_create_general(); -if (!disabled (fast)) +if (!_pixman_disabled (fast)) imp = _pixman_implementation_create_fast_path (imp); #ifdef USE_X86_MMX -if (!disabled (mmx) pixman_have_mmx ()) +if (!_pixman_disabled (mmx) pixman_have_mmx ()) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_SSE2 -if (!disabled (sse2) pixman_have_sse2 ()) +if (!_pixman_disabled (sse2) pixman_have_sse2 ()) imp = _pixman_implementation_create_sse2 (imp); #endif #ifdef USE_ARM_SIMD -if (!disabled (arm-simd) pixman_have_arm_simd ()) +if (!_pixman_disabled (arm-simd) pixman_have_arm_simd ()) imp = _pixman_implementation_create_arm_simd (imp); #endif #ifdef USE_ARM_IWMMXT -if (!disabled (arm-iwmmxt) pixman_have_arm_iwmmxt ()) +if (!_pixman_disabled (arm-iwmmxt) pixman_have_arm_iwmmxt ()) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_LOONGSON_MMI -if (!disabled (loongson-mmi) pixman_have_loongson_mmi ()) +if (!_pixman_disabled (loongson-mmi) pixman_have_loongson_mmi ()) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_ARM_NEON -if (!disabled (arm-neon) pixman_have_arm_neon ()) +if (!_pixman_disabled (arm-neon) pixman_have_arm_neon ()) imp = _pixman_implementation_create_arm_neon (imp); #endif #ifdef USE_MIPS_DSPR2 -if (!disabled (mips-dspr2) pixman_have_mips_dspr2 ()) +if (!_pixman_disabled (mips-dspr2) pixman_have_mips_dspr2 ()) imp = _pixman_implementation_create_mips_dspr2 (imp); #endif #ifdef USE_VMX -if (!disabled (vmx) pixman_have_vmx ()) +if (!_pixman_disabled (vmx) pixman_have_vmx ()) imp = _pixman_implementation_create_vmx (imp); #endif diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 72e3b4f..89020c9 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -574,6 +574,8 @@ _pixman_implementation_create_vmx (pixman_implementation_t *fallback); pixman_implementation_t * _pixman_choose_implementation (void); +pixman_bool_t +_pixman_disabled (const char *name); /* -- 1.7.10.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 00/10] Cleanups to CPU detection
git://people.freedesktop.org/~sandmann/pixman in the branch cpudetectfiles. Hi, The following patches contains some cleanups to the CPU detection in general, and some improvements to the x86 specific parts in particular. I was looking at making use of some of the newer x86 SIMD instruction sets and realized that (a) we don't ever call cpuid on x86-64, we just assume that MMX and SSE2 are present, and (b) pixman-cpu.c is a royal mess. The following patches split pixman-cpu.c into four different files: pixman-arm.c, pixman-mips.c, pixman-ppc.c, and pixman-x86.c. All the files are still compiled on all arhicitectures, but they have #ifdefs in them that make them no-ops on the ones that they are not specific to. The remaining bits of pixman-cpu.c are moved into pixman-implementation.c There are also some cleanups to the logic for all architectures. In particular, all the have_feature() functions are gone and replaced with a single function that detects all the features that the CPU offers. This function is implemented by each #ifdef variation, and then this is called from shared code. The changes to x86 are the most involved. There is now a pixman_cpuid() function that uses inline assembly on GCC and the cpuid__ intrinsic on MSVC. The assembly is written such that it will work on both 32 and 64 bit; the main change required was the save %ebx in %esi instead of on the stack. There is also a have_cpuid() function that detects the presence of cpuid. On MSVC, this simply returns TRUE, so the an MSVC-compiled pixman will now not work on old 486s. I am very tempted to remove this on GCC as well and just require cpuid to be present for pixman work. These two functions together make it possible to write the CPU detection code in plain C, rather than the #ifdef ridden mess of assembly it used to be. I have tested the patches on ppc64, x86-64, x86-32, and on an ARM Cortex A8 running Linux, but more testing would definitely be appreciated. In particular if you use MSVC, MIPS, XO-1, or ARM on Android/iPhone. Thanks, Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 07/10] Simplify MIPS CPU detection
From: Søren Sandmann Pedersen s...@redhat.com There is no reason to have pixman_have_feature functions when all they do is call pixman_have_mips_feature(). Instead rename pixman_have_mips_feature() to have_feature() and call it directly from _pixman_mips_get_implementations(). Also on non-Linux, just make have_feature() return FALSE. --- pixman/pixman-mips.c | 44 +--- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c index 9d3ee59..2b280c6 100644 --- a/pixman/pixman-mips.c +++ b/pixman/pixman-mips.c @@ -30,21 +30,18 @@ #include string.h #include stdlib.h -#if defined (__linux__) /* linux ELF */ - static pixman_bool_t -pixman_have_mips_feature (const char *search_string) +have_feature (const char *search_string) { -const char *file_name = /proc/cpuinfo; +#if defined (__linux__) /* linux ELF */ /* Simple detection of MIPS features at runtime for Linux. * It is based on /proc/cpuinfo, which reveals hardware configuration * to user-space applications. According to MIPS (early 2010), no similar * facility is universally available on the MIPS architectures, so it's up * to individual OSes to provide such. */ - +const char *file_name = /proc/cpuinfo; char cpuinfo_line[256]; - FILE *f = NULL; if ((f = fopen (file_name, r)) == NULL) @@ -60,51 +57,28 @@ pixman_have_mips_feature (const char *search_string) } fclose (f); +#endif -/* Did not find string in the proc file. */ +/* Did not find string in the proc file, or not Linux ELF. */ return FALSE; } -#if defined(USE_MIPS_DSPR2) -pixman_bool_t -pixman_have_mips_dspr2 (void) -{ - /* Only currently available MIPS core that supports DSPr2 is 74K. */ -return pixman_have_mips_feature (MIPS 74K); -} #endif -#if defined(USE_LOONGSON_MMI) -pixman_bool_t -pixman_have_loongson_mmi (void) -{ -/* I really don't know if some Loongson CPUs don't have MMI. */ -return pixman_have_mips_feature (Loongson); -} -#endif - -#else /* linux ELF */ - -#define pixman_have_mips_dspr2() FALSE -#define pixman_have_loongson_mmi() FALSE - -#endif /* linux ELF */ - -#endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */ - pixman_implementation_t * _pixman_mips_get_implementations (pixman_implementation_t *imp) { #ifdef USE_LOONGSON_MMI -if (!_pixman_disabled (loongson-mmi) pixman_have_loongson_mmi ()) +/* I really don't know if some Loongson CPUs don't have MMI. */ +if (!_pixman_disabled (loongson-mmi) have_feature (Loongson)) imp = _pixman_implementation_create_mmx (imp); #endif #ifdef USE_MIPS_DSPR2 -if (!_pixman_disabled (mips-dspr2) pixman_have_mips_dspr2 ()) +/* Only currently available MIPS core that supports DSPr2 is 74K. */ +if (!_pixman_disabled (mips-dspr2) have_feature (MIPS 74K)) imp = _pixman_implementation_create_mips_dspr2 (imp); #endif return imp; } - -- 1.7.10.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 03/10] Move ARM specific CPU detection to a new file pixman-arm.c
From: Søren Sandmann Pedersen s...@redhat.com Similar to the x86 commit, this moves the ARM specific CPU detection to its own file which exports a pixman_arm_get_implementations() function that is supposed to be a noop on non-ARM. --- pixman/Makefile.sources |1 + pixman/pixman-arm.c | 295 +++ pixman/pixman-cpu.c | 254 +--- pixman/pixman-private.h |3 + 4 files changed, 300 insertions(+), 253 deletions(-) create mode 100644 pixman/pixman-arm.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index 4e0137a..7f2b75f 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -8,6 +8,7 @@ libpixman_sources = \ pixman-conical-gradient.c \ pixman-cpu.c\ pixman-x86.c\ + pixman-arm.c\ pixman-edge.c \ pixman-edge-accessors.c \ pixman-fast-path.c \ diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c new file mode 100644 index 000..6625d7f --- /dev/null +++ b/pixman/pixman-arm.c @@ -0,0 +1,295 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided as is + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include config.h +#endif + +#include pixman-private.h + +#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) + +#include string.h +#include stdlib.h + +#if defined(USE_ARM_SIMD) defined(_MSC_VER) +/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ +#include windows.h +#endif + +#if defined(__APPLE__) +#include TargetConditionals.h +#endif + +#if defined(_MSC_VER) + +#if defined(USE_ARM_SIMD) +extern int pixman_msvc_try_arm_simd_op (); + +pixman_bool_t +pixman_have_arm_simd (void) +{ +static pixman_bool_t initialized = FALSE; +static pixman_bool_t have_arm_simd = FALSE; + +if (!initialized) +{ + __try { + pixman_msvc_try_arm_simd_op (); + have_arm_simd = TRUE; + } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { + have_arm_simd = FALSE; + } + initialized = TRUE; +} + +return have_arm_simd; +} + +#endif /* USE_ARM_SIMD */ + +#if defined(USE_ARM_NEON) +extern int pixman_msvc_try_arm_neon_op (); + +pixman_bool_t +pixman_have_arm_neon (void) +{ +static pixman_bool_t initialized = FALSE; +static pixman_bool_t have_arm_neon = FALSE; + +if (!initialized) +{ + __try + { + pixman_msvc_try_arm_neon_op (); + have_arm_neon = TRUE; + } + __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) + { + have_arm_neon = FALSE; + } + initialized = TRUE; +} + +return have_arm_neon; +} + +#endif /* USE_ARM_NEON */ + +#elif (defined (__APPLE__) defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */ + +/* Detection of ARM NEON on iOS is fairly simple because iOS binaries + * contain separate executable images for each processor architecture. + * So all we have to do is detect the armv7 architecture build. The + * operating system automatically runs the armv7 binary for armv7 devices + * and the armv6 binary for armv6 devices. + */ + +pixman_bool_t +pixman_have_arm_simd (void) +{ +#if defined(USE_ARM_SIMD) +return TRUE; +#else +return FALSE; +#endif +} + +pixman_bool_t +pixman_have_arm_neon (void) +{ +#if defined(USE_ARM_NEON) defined(__ARM_NEON__) +/* This is an armv7 cpu build */ +return TRUE; +#else +/* This is an armv6 cpu build */ +return FALSE; +#endif +} + +pixman_bool_t +pixman_have_arm_iwmmxt (void) +{ +#if defined(USE_ARM_IWMMXT) +return FALSE; +#else +return FALSE; +#endif +} + +#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux ELF or ANDROID */ + +static pixman_bool_t
[Pixman] [PATCH 02/10] Move x86 specific CPU detection to a new file pixman-x86.c
From: Søren Sandmann Pedersen s...@redhat.com Extract the x86 specific parts of pixman-cpu.c and put them in their own file called pixman-x86.c which exports one function pixman_x86_get_implementations() that creates the MMX and SSE2 implementations. This file is supposed to be compiled on all architectures, but pixman_x86_get_implementations() should be a noop on non-x86. --- pixman/Makefile.sources |1 + pixman/pixman-cpu.c | 250 + pixman/pixman-private.h |6 + pixman/pixman-x86.c | 282 +++ 4 files changed, 291 insertions(+), 248 deletions(-) create mode 100644 pixman/pixman-x86.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index 11f959d..4e0137a 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -7,6 +7,7 @@ libpixman_sources = \ pixman-combine64.c \ pixman-conical-gradient.c \ pixman-cpu.c\ + pixman-x86.c\ pixman-edge.c \ pixman-edge-accessors.c \ pixman-fast-path.c \ diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index a0d2f8c..0bfc90f 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -491,244 +491,6 @@ pixman_have_loongson_mmi (void) #endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */ -#if defined(USE_X86_MMX) || defined(USE_SSE2) -/* The CPU detection code needs to be in a file not compiled with - * -mmmx -msse, as gcc would generate CMOV instructions otherwise - * that would lead to SIGILL instructions on old CPUs that don't have - * it. - */ -#if !defined(__amd64__) !defined(__x86_64__) !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include sys/auxv.h -#endif - -typedef enum -{ -NO_FEATURES = 0, -MMX = 0x1, -MMX_EXTENSIONS = 0x2, -SSE = 0x6, -SSE2 = 0x8, -CMOV = 0x10 -} cpu_features_t; - - -static unsigned int -detect_cpu_features (void) -{ -unsigned int features = 0; -unsigned int result = 0; - -#ifdef HAVE_GETISAX -if (getisax (result, 1)) -{ - if (result AV_386_CMOV) - features |= CMOV; - if (result AV_386_MMX) - features |= MMX; - if (result AV_386_AMD_MMX) - features |= MMX_EXTENSIONS; - if (result AV_386_SSE) - features |= SSE; - if (result AV_386_SSE2) - features |= SSE2; -} -#else -char vendor[13]; -#ifdef _MSC_VER -int vendor0 = 0, vendor1, vendor2; -#endif -vendor[0] = 0; -vendor[12] = 0; - -#ifdef __GNUC__ -/* see p. 118 of amd64 instruction set manual Vol3 */ -/* We need to be careful about the handling of %ebx and - * %esp here. We can't declare either one as clobbered - * since they are special registers (%ebx is the PIC - * register holding an offset to global data, %esp the - * stack pointer), so we need to make sure they have their - * original values when we access the output operands. - */ -__asm__ ( -pushf\n -pop %%eax\n -mov %%eax, %%ecx\n -xor $0x0020, %%eax\n -push %%eax\n -popf\n -pushf\n -pop %%eax\n -mov $0x0, %%edx\n -xor %%ecx, %%eax\n -jz 1f\n - -mov $0x, %%eax\n -push %%ebx\n -cpuid\n -mov %%ebx, %%eax\n -pop %%ebx\n -mov %%eax, %1\n -mov %%edx, %2\n -mov %%ecx, %3\n -mov $0x0001, %%eax\n -push %%ebx\n -cpuid\n -pop %%ebx\n -1:\n -mov %%edx, %0\n - : =r (result), -=m (vendor[0]), -=m (vendor[4]), -=m (vendor[8]) - : - : %eax, %ecx, %edx -); - -#elif defined (_MSC_VER) - -_asm { - pushfd - pop eax - mov ecx, eax - xor eax, 0020h - push eax - popfd - pushfd - pop eax - mov edx, 0 - xor eax, ecx - jz nocpuid - - mov eax, 0 - push ebx - cpuid - mov eax, ebx - pop ebx - mov vendor0, eax - mov vendor1, edx - mov vendor2, ecx - mov eax, 1 - push ebx - cpuid - pop ebx -nocpuid: - mov result, edx -} -memmove (vendor + 0, vendor0, 4); -memmove (vendor + 4, vendor1, 4); -memmove (vendor + 8, vendor2, 4); - -#else -# error unsupported compiler -#endif - -features = 0; -if (result) -{ - /* result now contains the standard feature bits */ - if (result (1 15)) - features |= CMOV; - if (result (1 23)) - features |= MMX; - if (result (1 25)) - features |= SSE; - if (result (1 26)) - features |= SSE2; - if ((features MMX) !(features SSE) - (strcmp (vendor, AuthenticAMD) == 0 || -strcmp (vendor, Geode by NSC) == 0
[Pixman] [PATCH 09/10] Simplifications to ARM CPU detection
From: Søren Sandmann Pedersen s...@redhat.com Organize pixman-arm.c such that each operating system/compiler exports a detect_cpu_features() function that returns a bitmask with the various features that we are interested in. A new function have_feature() then calls this function, caches the result, and return whether the given feature is available. The result is that all the pixman_have_arm_feature functions become redundant and can be deleted. --- pixman/pixman-arm.c | 244 ++- 1 file changed, 87 insertions(+), 157 deletions(-) diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c index 6625d7f..23374e4 100644 --- a/pixman/pixman-arm.c +++ b/pixman/pixman-arm.c @@ -25,132 +25,83 @@ #include pixman-private.h +typedef enum +{ +ARM_V7 = (1 0), +ARM_V6 = (1 1), +ARM_VFP= (1 2), +ARM_NEON = (1 3), +ARM_IWMMXT = (1 4) +} arm_cpu_features_t; + #if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT) -#include string.h -#include stdlib.h +#if defined(_MSC_VER) -#if defined(USE_ARM_SIMD) defined(_MSC_VER) /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */ #include windows.h -#endif -#if defined(__APPLE__) -#include TargetConditionals.h -#endif - -#if defined(_MSC_VER) - -#if defined(USE_ARM_SIMD) +extern int pixman_msvc_try_arm_neon_op (); extern int pixman_msvc_try_arm_simd_op (); -pixman_bool_t -pixman_have_arm_simd (void) +static arm_cpu_features_t +detect_cpu_features (void) { -static pixman_bool_t initialized = FALSE; -static pixman_bool_t have_arm_simd = FALSE; +arm_cpu_features_t features = 0; -if (!initialized) +__try +{ + pixman_msvc_try_arm_simd_op (); + features |= ARM_V6; +} +__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - __try { - pixman_msvc_try_arm_simd_op (); - have_arm_simd = TRUE; - } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - have_arm_simd = FALSE; - } - initialized = TRUE; } -return have_arm_simd; -} - -#endif /* USE_ARM_SIMD */ - -#if defined(USE_ARM_NEON) -extern int pixman_msvc_try_arm_neon_op (); - -pixman_bool_t -pixman_have_arm_neon (void) -{ -static pixman_bool_t initialized = FALSE; -static pixman_bool_t have_arm_neon = FALSE; - -if (!initialized) +__try +{ + pixman_msvc_try_arm_neon_op (); + features |= ARM_NEON; +} +__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) { - __try - { - pixman_msvc_try_arm_neon_op (); - have_arm_neon = TRUE; - } - __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) - { - have_arm_neon = FALSE; - } - initialized = TRUE; } -return have_arm_neon; +return features; } -#endif /* USE_ARM_NEON */ - -#elif (defined (__APPLE__) defined(TARGET_OS_IPHONE)) /* iOS (iPhone/iPad/iPod touch) */ - -/* Detection of ARM NEON on iOS is fairly simple because iOS binaries - * contain separate executable images for each processor architecture. - * So all we have to do is detect the armv7 architecture build. The - * operating system automatically runs the armv7 binary for armv7 devices - * and the armv6 binary for armv6 devices. - */ +#elif defined(__APPLE__) defined(TARGET_OS_IPHONE) /* iOS */ -pixman_bool_t -pixman_have_arm_simd (void) -{ -#if defined(USE_ARM_SIMD) -return TRUE; -#else -return FALSE; -#endif -} +#include TargetConditionals.h -pixman_bool_t -pixman_have_arm_neon (void) +static arm_cpu_features_t +detect_cpu_features (void) { -#if defined(USE_ARM_NEON) defined(__ARM_NEON__) -/* This is an armv7 cpu build */ -return TRUE; -#else -/* This is an armv6 cpu build */ -return FALSE; +arm_cpu_features_t features = 0; + +features |= ARM_V6; + +/* Detection of ARM NEON on iOS is fairly simple because iOS binaries + * contain separate executable images for each processor architecture. + * So all we have to do is detect the armv7 architecture build. The + * operating system automatically runs the armv7 binary for armv7 devices + * and the armv6 binary for armv6 devices. + */ +#if defined(__ARM_NEON__) +features |= ARM_NEON; #endif -} -pixman_bool_t -pixman_have_arm_iwmmxt (void) -{ -#if defined(USE_ARM_IWMMXT) -return FALSE; -#else -return FALSE; -#endif +return features; } -#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux ELF or ANDROID */ - -static pixman_bool_t arm_has_v7 = FALSE; -static pixman_bool_t arm_has_v6 = FALSE; -static pixman_bool_t arm_has_vfp = FALSE; -static pixman_bool_t arm_has_neon = FALSE; -static pixman_bool_t arm_has_iwmmxt = FALSE; -static pixman_bool_t arm_tests_initialized = FALSE; - -#if defined(__ANDROID__) || defined(ANDROID) /* Android device
[Pixman] [PATCH 05/10] Move MIPS specific CPU detection to its own file, pixman-mips.c
From: Søren Sandmann Pedersen s...@redhat.com --- pixman/Makefile.sources |1 + pixman/pixman-cpu.c | 77 + pixman/pixman-mips.c| 110 +++ pixman/pixman-private.h |3 ++ 4 files changed, 115 insertions(+), 76 deletions(-) create mode 100644 pixman/pixman-mips.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index 414ac02..73758ff 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -8,6 +8,7 @@ libpixman_sources = \ pixman-conical-gradient.c \ pixman-cpu.c\ pixman-x86.c\ + pixman-mips.c \ pixman-arm.c\ pixman-ppc.c\ pixman-edge.c \ diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index 914f116..5cef480 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -22,76 +22,10 @@ #ifdef HAVE_CONFIG_H #include config.h #endif - -#include string.h #include stdlib.h #include pixman-private.h -#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) - -#if defined (__linux__) /* linux ELF */ - -static pixman_bool_t -pixman_have_mips_feature (const char *search_string) -{ -const char *file_name = /proc/cpuinfo; -/* Simple detection of MIPS features at runtime for Linux. - * It is based on /proc/cpuinfo, which reveals hardware configuration - * to user-space applications. According to MIPS (early 2010), no similar - * facility is universally available on the MIPS architectures, so it's up - * to individual OSes to provide such. - */ - -char cpuinfo_line[256]; - -FILE *f = NULL; - -if ((f = fopen (file_name, r)) == NULL) -return FALSE; - -while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL) -{ -if (strstr (cpuinfo_line, search_string) != NULL) -{ -fclose (f); -return TRUE; -} -} - -fclose (f); - -/* Did not find string in the proc file. */ -return FALSE; -} - -#if defined(USE_MIPS_DSPR2) -pixman_bool_t -pixman_have_mips_dspr2 (void) -{ - /* Only currently available MIPS core that supports DSPr2 is 74K. */ -return pixman_have_mips_feature (MIPS 74K); -} -#endif - -#if defined(USE_LOONGSON_MMI) -pixman_bool_t -pixman_have_loongson_mmi (void) -{ -/* I really don't know if some Loongson CPUs don't have MMI. */ -return pixman_have_mips_feature (Loongson); -} -#endif - -#else /* linux ELF */ - -#define pixman_have_mips_dspr2() FALSE -#define pixman_have_loongson_mmi() FALSE - -#endif /* linux ELF */ - -#endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */ - pixman_bool_t _pixman_disabled (const char *name) { @@ -136,16 +70,7 @@ _pixman_choose_implementation (void) imp = _pixman_x86_get_implementations (imp); imp = _pixman_arm_get_implementations (imp); imp = _pixman_ppc_get_implementations (imp); - -#ifdef USE_LOONGSON_MMI -if (!_pixman_disabled (loongson-mmi) pixman_have_loongson_mmi ()) - imp = _pixman_implementation_create_mmx (imp); -#endif - -#ifdef USE_MIPS_DSPR2 -if (!_pixman_disabled (mips-dspr2) pixman_have_mips_dspr2 ()) - imp = _pixman_implementation_create_mips_dspr2 (imp); -#endif +imp = _pixman_mips_get_implementations (imp); imp = _pixman_implementation_create_noop (imp); diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c new file mode 100644 index 000..9d3ee59 --- /dev/null +++ b/pixman/pixman-mips.c @@ -0,0 +1,110 @@ +/* + * Copyright © 2000 SuSE, Inc. + * Copyright © 2007 Red Hat, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided as is + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include config.h +#endif + +#include pixman-private.h + +#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) + +#include string.h +#include stdlib.h + +#if defined
[Pixman] [PATCH 06/10] Move the remaining bits of pixman-cpu into pixman-implementation.c
From: Søren Sandmann Pedersen s...@redhat.com --- pixman/Makefile.sources|1 - pixman/pixman-cpu.c| 79 pixman/pixman-implementation.c | 51 ++ 3 files changed, 51 insertions(+), 80 deletions(-) delete mode 100644 pixman/pixman-cpu.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index 73758ff..6472994 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -6,7 +6,6 @@ libpixman_sources = \ pixman-combine32.c \ pixman-combine64.c \ pixman-conical-gradient.c \ - pixman-cpu.c\ pixman-x86.c\ pixman-mips.c \ pixman-arm.c\ diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c deleted file mode 100644 index 5cef480..000 --- a/pixman/pixman-cpu.c +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright © 2000 SuSE, Inc. - * Copyright © 2007 Red Hat, Inc. - * - * Permission to use, copy, modify, distribute, and sell this software and its - * documentation for any purpose is hereby granted without fee, provided that - * the above copyright notice appear in all copies and that both that - * copyright notice and this permission notice appear in supporting - * documentation, and that the name of SuSE not be used in advertising or - * publicity pertaining to distribution of the software without specific, - * written prior permission. SuSE makes no representations about the - * suitability of this software for any purpose. It is provided as is - * without express or implied warranty. - * - * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE - * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ -#ifdef HAVE_CONFIG_H -#include config.h -#endif -#include stdlib.h - -#include pixman-private.h - -pixman_bool_t -_pixman_disabled (const char *name) -{ -const char *env; - -if ((env = getenv (PIXMAN_DISABLE))) -{ - do - { - const char *end; - int len; - - if ((end = strchr (env, ' '))) - len = end - env; - else - len = strlen (env); - - if (strlen (name) == len strncmp (name, env, len) == 0) - { - printf (pixman: Disabled %s implementation\n, name); - return TRUE; - } - - env += len; - } - while (*env++); -} - -return FALSE; -} - -pixman_implementation_t * -_pixman_choose_implementation (void) -{ -pixman_implementation_t *imp; - -imp = _pixman_implementation_create_general(); - -if (!_pixman_disabled (fast)) - imp = _pixman_implementation_create_fast_path (imp); - -imp = _pixman_x86_get_implementations (imp); -imp = _pixman_arm_get_implementations (imp); -imp = _pixman_ppc_get_implementations (imp); -imp = _pixman_mips_get_implementations (imp); - -imp = _pixman_implementation_create_noop (imp); - -return imp; -} - diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c index c769ab8..77d0906 100644 --- a/pixman/pixman-implementation.c +++ b/pixman/pixman-implementation.c @@ -223,3 +223,54 @@ _pixman_implementation_dest_iter_init (pixman_implementation_t *imp, (*imp-dest_iter_init) (imp, iter); } + +pixman_bool_t +_pixman_disabled (const char *name) +{ +const char *env; + +if ((env = getenv (PIXMAN_DISABLE))) +{ + do + { + const char *end; + int len; + + if ((end = strchr (env, ' '))) + len = end - env; + else + len = strlen (env); + + if (strlen (name) == len strncmp (name, env, len) == 0) + { + printf (pixman: Disabled %s implementation\n, name); + return TRUE; + } + + env += len; + } + while (*env++); +} + +return FALSE; +} + +pixman_implementation_t * +_pixman_choose_implementation (void) +{ +pixman_implementation_t *imp; + +imp = _pixman_implementation_create_general(); + +if (!_pixman_disabled (fast)) + imp = _pixman_implementation_create_fast_path (imp); + +imp = _pixman_x86_get_implementations (imp); +imp = _pixman_arm_get_implementations (imp); +imp = _pixman_ppc_get_implementations (imp); +imp = _pixman_mips_get_implementations (imp); + +imp = _pixman_implementation_create_noop (imp); + +return imp; +} -- 1.7.10.4 ___ Pixman mailing list Pixman
[Pixman] [PATCH 04/10] Move PowerPC specific CPU detection to its own file pixman-ppc.c
From: Søren Sandmann Pedersen s...@redhat.com --- pixman/Makefile.sources |1 + pixman/pixman-cpu.c | 165 +--- pixman/pixman-ppc.c | 192 +++ pixman/pixman-private.h |3 + 4 files changed, 197 insertions(+), 164 deletions(-) create mode 100644 pixman/pixman-ppc.c diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources index 7f2b75f..414ac02 100644 --- a/pixman/Makefile.sources +++ b/pixman/Makefile.sources @@ -9,6 +9,7 @@ libpixman_sources = \ pixman-cpu.c\ pixman-x86.c\ pixman-arm.c\ + pixman-ppc.c\ pixman-edge.c \ pixman-edge-accessors.c \ pixman-fast-path.c \ diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index 319d71f..914f116 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -26,167 +26,8 @@ #include string.h #include stdlib.h -#if defined(__APPLE__) -#include TargetConditionals.h -#endif - #include pixman-private.h -#ifdef USE_VMX - -/* The CPU detection code needs to be in a file not compiled with - * -maltivec -mabi=altivec, as gcc would try to save vector register - * across function calls causing SIGILL on cpus without Altivec/vmx. - */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - -#ifdef __APPLE__ -#include sys/sysctl.h - -static pixman_bool_t -pixman_have_vmx (void) -{ -if (!initialized) -{ - size_t length = sizeof(have_vmx); - int error = - sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0); - - if (error) - have_vmx = FALSE; - - initialized = TRUE; -} -return have_vmx; -} - -#elif defined (__OpenBSD__) -#include sys/param.h -#include sys/sysctl.h -#include machine/cpu.h - -static pixman_bool_t -pixman_have_vmx (void) -{ -if (!initialized) -{ - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, have_vmx, length, NULL, 0); - - if (error != 0) - have_vmx = FALSE; - - initialized = TRUE; -} -return have_vmx; -} - -#elif defined (__linux__) -#include sys/types.h -#include sys/stat.h -#include fcntl.h -#include unistd.h -#include stdio.h -#include linux/auxvec.h -#include asm/cputable.h - -static pixman_bool_t -pixman_have_vmx (void) -{ -if (!initialized) -{ - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; - - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, /proc/%d/auxv, pid); - - fd = open (fname, O_RDONLY); - if (fd = 0) - { - for (i = 0; i = (count / sizeof(unsigned long)); i += 2) - { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count = 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } - } - close (fd); - } -} -if (!initialized) -{ - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; -} - -return have_vmx; -} - -#else /* !__APPLE__ !__OpenBSD__ !__linux__ */ -#include signal.h -#include setjmp.h - -static jmp_buf jump_env; - -static void -vmx_test (intsig, - siginfo_t *si, - void * unused) -{ -longjmp (jump_env, 1); -} - -static pixman_bool_t -pixman_have_vmx (void) -{ -struct sigaction sa, osa; -int jmp_result; - -if (!initialized) -{ - sa.sa_flags = SA_SIGINFO; - sigemptyset (sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, sa, osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( vor 0, 0, 0 ); - } - sigaction (SIGILL, osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; -} -return have_vmx; -} - -#endif /* __APPLE__ */ -#endif /* USE_VMX */ - #if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI) #if defined (__linux__) /* linux ELF */ @@ -294,6 +135,7 @@ _pixman_choose_implementation (void) imp = _pixman_x86_get_implementations (imp); imp = _pixman_arm_get_implementations (imp); +imp
[Pixman] [PATCH 10/10] Simplify CPU detection on PPC.
From: Søren Sandmann Pedersen s...@redhat.com Get rid of the initialized and have_vmx static variables in pixman-ppc.c There is no point to them since CPU detection only happens once per process. On Linux, just read /proc/self/auxv instead of generating the filename with getpid() and don't bother with the stack buffer. Instead just read the aux entries one by one. --- pixman/pixman-ppc.c | 113 +-- 1 file changed, 38 insertions(+), 75 deletions(-) diff --git a/pixman/pixman-ppc.c b/pixman/pixman-ppc.c index 786f204..f1bea1e 100644 --- a/pixman/pixman-ppc.c +++ b/pixman/pixman-ppc.c @@ -31,26 +31,20 @@ * -maltivec -mabi=altivec, as gcc would try to save vector register * across function calls causing SIGILL on cpus without Altivec/vmx. */ -static pixman_bool_t initialized = FALSE; -static volatile pixman_bool_t have_vmx = TRUE; - #ifdef __APPLE__ #include sys/sysctl.h static pixman_bool_t pixman_have_vmx (void) { -if (!initialized) -{ - size_t length = sizeof(have_vmx); - int error = - sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0); +size_t length = sizeof(have_vmx); +int error, have_mmx; - if (error) - have_vmx = FALSE; +sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0); + +if (error) + return FALSE; - initialized = TRUE; -} return have_vmx; } @@ -62,22 +56,20 @@ pixman_have_vmx (void) static pixman_bool_t pixman_have_vmx (void) { -if (!initialized) -{ - int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; - size_t length = sizeof(have_vmx); - int error = - sysctl (mib, 2, have_vmx, length, NULL, 0); +int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC }; +size_t length = sizeof(have_vmx); +int error, have_vmx; - if (error != 0) - have_vmx = FALSE; +error = sysctl (mib, 2, have_vmx, length, NULL, 0); + +if (error != 0) + return FALSE; - initialized = TRUE; -} return have_vmx; } #elif defined (__linux__) + #include sys/types.h #include sys/stat.h #include fcntl.h @@ -89,51 +81,27 @@ pixman_have_vmx (void) static pixman_bool_t pixman_have_vmx (void) { -if (!initialized) +int have_vmx = FALSE; +int fd; +struct { - char fname[64]; - unsigned long buf[64]; - ssize_t count = 0; - pid_t pid; - int fd, i; + unsigned long type; + unsigned long value; +} aux; - pid = getpid (); - snprintf (fname, sizeof(fname) - 1, /proc/%d/auxv, pid); - - fd = open (fname, O_RDONLY); - if (fd = 0) +fd = open (/proc/self/auxv, O_RDONLY); +if (fd = 0) +{ + while (read (fd, aux, sizeof (aux)) == sizeof (aux)) { - for (i = 0; i = (count / sizeof(unsigned long)); i += 2) + if (aux.type == AT_HWCAP (aux.value PPC_FEATURE_HAS_ALTIVEC)) { - /* Read more if buf is empty... */ - if (i == (count / sizeof(unsigned long))) - { - count = read (fd, buf, sizeof(buf)); - if (count = 0) - break; - i = 0; - } - - if (buf[i] == AT_HWCAP) - { - have_vmx = !!(buf[i + 1] PPC_FEATURE_HAS_ALTIVEC); - initialized = TRUE; - break; - } - else if (buf[i] == AT_NULL) - { - break; - } + have_vmx = TRUE; + break; } - close (fd); } -} -if (!initialized) -{ - /* Something went wrong. Assume 'no' rather than playing - fragile tricks with catching SIGILL. */ - have_vmx = FALSE; - initialized = TRUE; + + close (fd); } return have_vmx; @@ -159,22 +127,17 @@ pixman_have_vmx (void) struct sigaction sa, osa; int jmp_result; -if (!initialized) +sa.sa_flags = SA_SIGINFO; +sigemptyset (sa.sa_mask); +sa.sa_sigaction = vmx_test; +sigaction (SIGILL, sa, osa); +jmp_result = setjmp (jump_env); +if (jmp_result == 0) { - sa.sa_flags = SA_SIGINFO; - sigemptyset (sa.sa_mask); - sa.sa_sigaction = vmx_test; - sigaction (SIGILL, sa, osa); - jmp_result = setjmp (jump_env); - if (jmp_result == 0) - { - asm volatile ( vor 0, 0, 0 ); - } - sigaction (SIGILL, osa, NULL); - have_vmx = (jmp_result == 0); - initialized = TRUE; + asm volatile ( vor 0, 0, 0 ); } -return have_vmx; +sigaction (SIGILL, osa, NULL); +return (jmp_result == 0); } #endif /* __APPLE__ */ -- 1.7.10.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 08/10] Cleanups and simplifications in x86 CPU feature detection
From: Søren Sandmann Pedersen s...@redhat.com A new function pixman_cpuid() is added that runs the cpuid instruction and returns the results. On GCC this function uses inline assembly that is written such that it will work on both 32 and 64 bit. Compared to the old code, the only difference is %ebx is saved in %esi instead of on the stack. Saving 32 bit registers on a 64 bit stack is difficult or impossible because in 64 bit mode, the push and pop instructions work on 64 bit registers. On MSVC, the function calls the __cpuid intrinsic. There is also a new function called have_cpuid() which detects whether cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on x86-32 bit, it checks whether the 22nd bit of eflags can be modified. On MSVC this does have the consequence that pixman will no longer work CPUS without cpuid (ie., older than 486 and some 486 models). These two functions together makes it possible to write a generic detect_cpu_features() in plain C. This function is then used in a new have_feature() function that checks whether a specific set of feature bits is available. Aside from the cleanups and simplifications, the main benefit from this patch is that pixman now can do feature detection on x86-64, so that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And apparently the assumption that x86-64 CPUs always have MMX and SSE2 is no longer correct: Knight's Corner is x86-64, but doesn't have them). --- pixman/pixman-x86.c | 311 +-- 1 file changed, 129 insertions(+), 182 deletions(-) diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c index 52ad3df..84590d2 100644 --- a/pixman/pixman-x86.c +++ b/pixman/pixman-x86.c @@ -32,30 +32,25 @@ * that would lead to SIGILL instructions on old CPUs that don't have * it. */ -#if !defined(__amd64__) !defined(__x86_64__) !defined(_M_AMD64) - -#ifdef HAVE_GETISAX -#include sys/auxv.h -#endif typedef enum { -NO_FEATURES = 0, -MMX = 0x1, -MMX_EXTENSIONS = 0x2, -SSE = 0x6, -SSE2 = 0x8, -CMOV = 0x10 +X86_MMX= (1 0), +X86_MMX_EXTENSIONS = (1 1), +X86_SSE= (1 2) | X86_MMX_EXTENSIONS, +X86_SSE2 = (1 3), +X86_CMOV = (1 4) } cpu_features_t; +#ifdef HAVE_GETISAX -static unsigned int +#include sys/auxv.h + +static cpu_features_t detect_cpu_features (void) { -unsigned int features = 0; -unsigned int result = 0; - -#ifdef HAVE_GETISAX +cpu_features_t features; + if (getisax (result, 1)) { if (result AV_386_CMOV) @@ -69,15 +64,47 @@ detect_cpu_features (void) if (result AV_386_SSE2) features |= SSE2; } + +return features; +} + +#else + +static pixman_bool_t +have_cpuid (void) +{ +#if defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64) || defined (_MSC_VER) + +return TRUE; + +#elif defined (__GNUC__) +uint32_t result; + +__asm__ volatile ( +pushf\n\t +pop %%eax\n\t +mov %%eax, %%ecx \n\t +xor $0x0020, %%eax \n\t +push %%eax \n\t +popf \n\t +pushf\n\t +pop %%eax\n\t +xor %%ecx, %%eax \n\t + mov %%eax, %0 \n\t + : =r (result) + : + : %eax, %ecx); + +return !!result; + #else -char vendor[13]; -#ifdef _MSC_VER -int vendor0 = 0, vendor1, vendor2; +#error Unknown compiler #endif -vendor[0] = 0; -vendor[12] = 0; - -#ifdef __GNUC__ +} + +static void +pixman_cpuid (uint32_t feature, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) +{ /* see p. 118 of amd64 instruction set manual Vol3 */ /* We need to be careful about the handling of %ebx and * %esp here. We can't declare either one as clobbered @@ -86,195 +113,115 @@ detect_cpu_features (void) * stack pointer), so we need to make sure they have their * original values when we access the output operands. */ -__asm__ ( -pushf\n -pop %%eax\n -mov %%eax, %%ecx\n -xor $0x0020, %%eax\n -push %%eax\n -popf\n -pushf\n -pop %%eax\n -mov $0x0, %%edx\n -xor %%ecx, %%eax\n -jz 1f\n - -mov $0x, %%eax\n -push %%ebx\n -cpuid\n -mov %%ebx, %%eax\n -pop %%ebx\n -mov %%eax, %1\n -mov %%edx, %2\n -mov %%ecx, %3\n -mov $0x0001, %%eax\n -push %%ebx\n -cpuid\n -pop %%ebx\n -1:\n -mov %%edx, %0\n - : =r (result), - =m (vendor[0]), - =m (vendor[4]), - =m (vendor[8]) - : - : %eax, %ecx, %edx -); - +#if defined
[Pixman] NOOP implementation
The following patches add a noop implementation, which is used as topmost in the implementation hierarchy. It is supposed to contain iterators and compositing routines that don't do anything. For example, there is a compositing fast path for the DST operator. This is useful because it allows more CPU specific iterators to be added without worrying about them being selected ahead of noop iterators. Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 1/6] Add a noop implementation.
From: Søren Sandmann Pedersen s...@redhat.com This new implementation is ahead of all other implementations in the fallback chain and is supposed to contain operations that don't require any work. For examples, it might contain a fast path for the DST operator that doesn't actually do anything. --- pixman/Makefile.am |1 + pixman/pixman-cpu.c |2 ++ pixman/pixman-noop.c| 45 + pixman/pixman-private.h |3 +++ 4 files changed, 51 insertions(+), 0 deletions(-) create mode 100644 pixman/pixman-noop.c diff --git a/pixman/Makefile.am b/pixman/Makefile.am index be08266..1e20bb0 100644 --- a/pixman/Makefile.am +++ b/pixman/Makefile.am @@ -20,6 +20,7 @@ libpixman_1_la_SOURCES = \ pixman-combine64.h \ pixman-general.c\ pixman.c\ + pixman-noop.c \ pixman-fast-path.c \ pixman-fast-path.h \ pixman-solid-fill.c \ diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c index 0e14ecb..973ed54 100644 --- a/pixman/pixman-cpu.c +++ b/pixman/pixman-cpu.c @@ -606,6 +606,8 @@ _pixman_choose_implementation (void) imp = _pixman_implementation_create_vmx (imp); #endif +imp = _pixman_implementation_create_noop (imp); + return imp; } diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c new file mode 100644 index 000..50bbfb0 --- /dev/null +++ b/pixman/pixman-noop.c @@ -0,0 +1,45 @@ +/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ +/* + * Copyright © 2011 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the Software), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#ifdef HAVE_CONFIG_H +#include config.h +#endif +#include string.h +#include stdlib.h +#include pixman-private.h +#include pixman-combine32.h +#include pixman-fast-path.h + +static const pixman_fast_path_t noop_fast_paths[] = +{ +{ PIXMAN_OP_NONE }, +}; + +pixman_implementation_t * +_pixman_implementation_create_noop (pixman_implementation_t *fallback) +{ +pixman_implementation_t *imp = + _pixman_implementation_create (fallback, noop_fast_paths); + +return imp; +} diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index 60060a9..2996907 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -523,6 +523,9 @@ _pixman_implementation_create_general (void); pixman_implementation_t * _pixman_implementation_create_fast_path (pixman_implementation_t *fallback); +pixman_implementation_t * +_pixman_implementation_create_noop (pixman_implementation_t *fallback); + #ifdef USE_MMX pixman_implementation_t * _pixman_implementation_create_mmx (pixman_implementation_t *fallback); -- 1.7.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 3/6] Move noop dest fetching to noop implementation
From: Søren Sandmann Pedersen s...@redhat.com It will at some point become useful to have CPU specific destination iterators. However, a problem with that is that such iterators should not be used if we can composite directly in the destination image. By moving the noop destination iterator to the noop implementation, we can ensure that it will be chosen before any CPU specific iterator. --- pixman/pixman-bits-image.c | 31 +-- pixman/pixman-noop.c | 32 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c index 835ecfb..4e9ed14 100644 --- a/pixman/pixman-bits-image.c +++ b/pixman/pixman-bits-image.c @@ -1462,43 +1462,22 @@ dest_write_back_wide (pixman_iter_t *iter) iter-y++; } -static void -dest_write_back_direct (pixman_iter_t *iter) -{ -iter-buffer += iter-image-bits.rowstride; -} - void _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter) { if (iter-flags ITER_NARROW) { - if (((image-common.flags - (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) == -(FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) - (image-bits.format == PIXMAN_a8r8g8b8 || -(image-bits.format == PIXMAN_x8r8g8b8 - (iter-flags ITER_LOCALIZED_ALPHA + if ((iter-flags (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == + (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) { - iter-buffer = image-bits.bits + iter-y * image-bits.rowstride + iter-x; - iter-get_scanline = _pixman_iter_get_scanline_noop; - iter-write_back = dest_write_back_direct; } else { - if ((iter-flags (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == - (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) - { - iter-get_scanline = _pixman_iter_get_scanline_noop; - } - else - { - iter-get_scanline = dest_get_scanline_narrow; - } - - iter-write_back = dest_write_back_narrow; + iter-get_scanline = dest_get_scanline_narrow; } + + iter-write_back = dest_write_back_narrow; } else { diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c index 673a02a..d753843 100644 --- a/pixman/pixman-noop.c +++ b/pixman/pixman-noop.c @@ -48,6 +48,36 @@ noop_composite (pixman_implementation_t *imp, return; } +static void +dest_write_back_direct (pixman_iter_t *iter) +{ +iter-buffer += iter-image-bits.rowstride; +} + +static void +noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ +pixman_image_t *image = iter-image; +uint32_t image_flags = image-common.flags; +uint32_t iter_flags = iter-flags; + +if ((image_flags FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS + (iter_flags ITER_NARROW) == ITER_NARROW + ((image-common.extended_format_code == PIXMAN_a8r8g8b8)|| +(image-common.extended_format_code == PIXMAN_x8r8g8b8 + (iter_flags (ITER_LOCALIZED_ALPHA) +{ + iter-buffer = image-bits.bits + iter-y * image-bits.rowstride + iter-x; + + iter-get_scanline = _pixman_iter_get_scanline_noop; + iter-write_back = dest_write_back_direct; +} +else +{ + (* imp-delegate-dest_iter_init) (imp-delegate, iter); +} +} + static const pixman_fast_path_t noop_fast_paths[] = { { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, noop_composite }, @@ -60,5 +90,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); +imp-dest_iter_init = noop_dest_iter_init; + return imp; } -- 1.7.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 5/6] Move NULL iterator into pixman-noop.c
From: Søren Sandmann Pedersen s...@redhat.com Iterating a NULL image returns NULL for all scanlines. This may as well be done in the noop iterator. --- pixman/pixman-implementation.c | 12 +--- pixman/pixman-noop.c | 24 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c index f1d3f99..2706ceb 100644 --- a/pixman/pixman-implementation.c +++ b/pixman/pixman-implementation.c @@ -241,12 +241,6 @@ _pixman_implementation_fill (pixman_implementation_t *imp, return (*imp-fill) (imp, bits, stride, bpp, x, y, width, height, xor); } -static uint32_t * -get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) -{ -return NULL; -} - void _pixman_implementation_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter, @@ -266,11 +260,7 @@ _pixman_implementation_src_iter_init (pixman_implementation_t *imp, iter-height = height; iter-flags = flags; -if (!image) -{ - iter-get_scanline = get_scanline_null; -} -else if ((flags (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == +if ((flags (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) == (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) { iter-get_scanline = _pixman_iter_get_scanline_noop; diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c index 5dc528d..75ecf0b 100644 --- a/pixman/pixman-noop.c +++ b/pixman/pixman-noop.c @@ -64,22 +64,30 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) return result; } +static uint32_t * +get_scanline_null (pixman_iter_t *iter, const uint32_t *mask) +{ +return NULL; +} + static void noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { pixman_image_t *image = iter-image; -uint32_t iter_flags = iter-flags; -uint32_t image_flags = image-common.flags; #define FLAGS \ (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) -if ((iter_flags ITER_NARROW) - (image_flags FLAGS) == FLAGS - iter-x = 0 iter-y = 0 - iter-x + iter-width = image-bits.width - iter-y + iter-height = image-bits.height - image-common.extended_format_code == PIXMAN_a8r8g8b8) +if (!image) +{ + iter-get_scanline = get_scanline_null; +} +else if ((iter-flags ITER_NARROW) +(image-common.flags FLAGS) == FLAGS +iter-x = 0 iter-y = 0 +iter-x + iter-width = image-bits.width +iter-y + iter-height = image-bits.height +image-common.extended_format_code == PIXMAN_a8r8g8b8) { iter-buffer = image-bits.bits + iter-y * image-bits.rowstride + iter-x; -- 1.7.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 4/6] Add a noop src iterator
From: Søren Sandmann Pedersen s...@redhat.com When the image is a8r8g8b8 and not transformed, and the fetched rectangle is within the image bounds, scanlines can be fetched by simply returning a pointer instead of copying the bits. --- pixman/pixman-noop.c | 39 +++ 1 files changed, 39 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c index d753843..5dc528d 100644 --- a/pixman/pixman-noop.c +++ b/pixman/pixman-noop.c @@ -54,6 +54,44 @@ dest_write_back_direct (pixman_iter_t *iter) iter-buffer += iter-image-bits.rowstride; } +static uint32_t * +noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask) +{ +uint32_t *result = iter-buffer; + +iter-buffer += iter-image-bits.rowstride; + +return result; +} + +static void +noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) +{ +pixman_image_t *image = iter-image; +uint32_t iter_flags = iter-flags; +uint32_t image_flags = image-common.flags; + +#define FLAGS \ +(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM) + +if ((iter_flags ITER_NARROW) + (image_flags FLAGS) == FLAGS + iter-x = 0 iter-y = 0 + iter-x + iter-width = image-bits.width + iter-y + iter-height = image-bits.height + image-common.extended_format_code == PIXMAN_a8r8g8b8) +{ + iter-buffer = + image-bits.bits + iter-y * image-bits.rowstride + iter-x; + + iter-get_scanline = noop_get_scanline; +} +else +{ + (* imp-delegate-src_iter_init) (imp-delegate, iter); +} +} + static void noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) { @@ -90,6 +128,7 @@ _pixman_implementation_create_noop (pixman_implementation_t *fallback) pixman_implementation_t *imp = _pixman_implementation_create (fallback, noop_fast_paths); +imp-src_iter_init = noop_src_iter_init; imp-dest_iter_init = noop_dest_iter_init; return imp; -- 1.7.4 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] Fix
I forgot to CC pixman@lists.freedesktop.org on the following patch. The patch is necessary to make trapezoid rendering directly to X windows work and also makes the pixman_composite_trapezoids() API more similar to pixman_image_composite(). See this thread: http://lists.x.org/archives/xorg-devel/2011-March/021056.html for context. Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
Re: [Pixman] [PATCH] Add forgotten _mm_empty() calls in the SSE2 fetchers.
Here is a patch series that removes all use of MMX from pixman-sse2.c. This avoids all the emms issues and is likely also a speedup on Windows x64, where MMX intrinsics are not supported and therefore had to be emulated. b/configure.ac|2 b/pixman/pixman-sse2.c| 1601 +- pixman/pixman-x64-mmx-emulation.h | 263 -- 3 files changed, 572 insertions(+), 1294 deletions(-) Søren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 4/8] sse2: Don't compile pixman-sse2.c with -mmmx anymore
From: Søren Sandmann Pedersen s...@redhat.com It's not necessary now that the file doesn't use MMX instructions. --- configure.ac |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/configure.ac b/configure.ac index 5242799..8d96647 100644 --- a/configure.ac +++ b/configure.ac @@ -326,7 +326,7 @@ if test x$SSE2_CFLAGS = x ; then SSE2_CFLAGS=-xarch=sse2 fi else - SSE2_CFLAGS=-mmmx -msse2 -Winline + SSE2_CFLAGS=-msse2 -Winline fi fi -- 1.7.3.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 6/8] sse2: Delete obsolete or redundant comments
From: Søren Sandmann Pedersen s...@redhat.com --- pixman/pixman-sse2.c | 137 -- 1 files changed, 0 insertions(+), 137 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 0753b6d..286dea8 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -46,12 +46,6 @@ # include pixman-x64-mmx-emulation.h #endif -#ifdef USE_SSE2 - -/* - * Locals - */ - static __m128i mask_0080; static __m128i mask_00ff; static __m128i mask_0101; @@ -69,9 +63,6 @@ static __m128i mask_blue; static __m128i mask_565_fix_rb; static __m128i mask_565_fix_g; -/* -- - * SSE2 Inlines - */ static force_inline __m128i unpack_32_1x128 (uint32_t data) { @@ -389,10 +380,6 @@ save_128_unaligned (__m128i* dst, _mm_storeu_si128 (dst, data); } -/* -- - * MMX inlines - */ - static force_inline __m128i load_32_1x128 (uint32_t data) { @@ -486,9 +473,6 @@ expand565_16_1x128 (uint16_t pixel) return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ()); } -/* - * Compose Core transformations - */ static force_inline uint32_t core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst) { @@ -2365,9 +2349,6 @@ sse2_combine_add_ca (pixman_implementation_t *imp, } } -/* --- - * fb_compose_setup_sSE2 - */ static force_inline __m128i create_mask_16_128 (uint16_t mask) { @@ -2387,10 +2368,6 @@ create_mask_2x32_128 (uint32_t mask0, } #endif -/* --- - * composite_over_n_ - */ - static void sse2_composite_over_n_ (pixman_implementation_t *imp, pixman_op_t op, @@ -2470,9 +2447,6 @@ sse2_composite_over_n_ (pixman_implementation_t *imp, } } -/* - - * composite_over_n_0565 - */ static void sse2_composite_over_n_0565 (pixman_implementation_t *imp, pixman_op_t op, @@ -2558,9 +2532,6 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp, } -/* -- - * composite_add_n___ca - */ static void sse2_composite_add_n___ca (pixman_implementation_t *imp, pixman_op_t op, @@ -2684,10 +2655,6 @@ sse2_composite_add_n___ca (pixman_implementation_t *imp, } -/* --- - * composite_over_n___ca - */ - static void sse2_composite_over_n___ca (pixman_implementation_t *imp, pixman_op_t op, @@ -2811,10 +2778,6 @@ sse2_composite_over_n___ca (pixman_implementation_t *imp, } -/*- - * composite_over__n_ - */ - static void sse2_composite_over__n_ (pixman_implementation_t *imp, pixman_op_t op, @@ -2929,10 +2892,6 @@ sse2_composite_over__n_ (pixman_implementation_t *imp, } -/*- - * composite_over__n_ - */ - static void sse2_composite_src_x888_ (pixman_implementation_t *imp, pixman_op_t op, @@ -3001,9 +2960,6 @@ sse2_composite_src_x888_ (pixman_implementation_t *imp, } -/* - - * composite_over_x888_n_ - */ static void sse2_composite_over_x888_n_ (pixman_implementation_t *imp, pixman_op_t op, @@ -3105,9 +3061,6 @@ sse2_composite_over_x888_n_ (pixman_implementation_t *imp, } -/* - * composite_over__ - */ static void sse2_composite_over__ (pixman_implementation_t *imp, pixman_op_t op, @@ -3144,9 +3097,6 @@ sse2_composite_over__ (pixman_implementation_t *imp, } } -/* -- - * composite_over__0565 - */ static force_inline uint16_t composite_over__0565pixel (uint32_t src, uint16_t dst) { @@ -3188,15 +3138,6 @@ sse2_composite_over__0565 (pixman_implementation_t *imp, PIXMAN_IMAGE_GET_LINE ( src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); -#if 0 -/* FIXME - * - * I copy the code from MMX one and keep the fixme. - * If it's a problem there, probably is a problem here. - */ -assert
[Pixman] [PATCH 8/8] sse2: Minor coding style cleanups.
From: Søren Sandmann Pedersen s...@redhat.com Also make pixman_fill_sse2() static. --- pixman/pixman-sse2.c | 18 -- 1 files changed, 12 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c index 0509613..88287b4 100644 --- a/pixman/pixman-sse2.c +++ b/pixman/pixman-sse2.c @@ -2587,7 +2587,8 @@ sse2_composite_add_n___ca (pixman_implementation_t *imp, mmx_dest = unpack_32_1x128 (d); *pd = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), mmx_dest)); + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); } pd++; @@ -2635,7 +2636,8 @@ sse2_composite_add_n___ca (pixman_implementation_t *imp, mmx_dest = unpack_32_1x128 (d); *pd = pack_1x128_32 ( - _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), mmx_dest)); + _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), + mmx_dest)); } pd++; @@ -,7 +3335,7 @@ sse2_composite_over_n_8_ (pixman_implementation_t *imp, } -pixman_bool_t +static pixman_bool_t pixman_fill_sse2 (uint32_t *bits, int stride, int bpp, @@ -4886,7 +4888,8 @@ sse2_composite_over_x888_8_ (pixman_implementation_t *imp, while (w = 4) { m = *(uint32_t*) mask; -xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff00); +xmm_src = _mm_or_si128 ( + load_128_unaligned ((__m128i*)src), mask_ff00); if (m == 0x) { @@ -4902,9 +4905,12 @@ sse2_composite_over_x888_8_ (pixman_implementation_t *imp, unpack_128_2x128 (xmm_mask, xmm_mask_lo, xmm_mask_hi); unpack_128_2x128 (xmm_dst, xmm_dst_lo, xmm_dst_hi); -expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, xmm_mask_lo, xmm_mask_hi); +expand_alpha_rev_2x128 ( + xmm_mask_lo, xmm_mask_hi, xmm_mask_lo, xmm_mask_hi); -in_over_2x128 (xmm_src_lo, xmm_src_hi, mask_00ff, mask_00ff, xmm_mask_lo, xmm_mask_hi, xmm_dst_lo, xmm_dst_hi); +in_over_2x128 (xmm_src_lo, xmm_src_hi, + mask_00ff, mask_00ff, xmm_mask_lo, xmm_mask_hi, + xmm_dst_lo, xmm_dst_hi); save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi)); } -- 1.7.3.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 2/6] Add a test program for pixman_composite_trapezoids().
From: Søren Sandmann Pedersen s...@redhat.com A CRC32 based test program to check that pixman_composite_trapezoids() actually works. --- test/Makefile.am|5 + test/composite-traps-test.c | 253 +++ 2 files changed, 258 insertions(+), 0 deletions(-) create mode 100644 test/composite-traps-test.c diff --git a/test/Makefile.am b/test/Makefile.am index 8d8471d..0b7d05c 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -17,6 +17,7 @@ TESTPROGRAMS =\ gradient-crash-test \ alphamap\ stress-test \ + composite-traps-test\ blitters-test \ scaling-test\ affine-test \ @@ -52,6 +53,10 @@ blitters_test_LDADD = $(TEST_LDADD) blitters_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ blitters_test_SOURCES = blitters-test.c utils.c utils.h +composite_traps_test_LDADD = $(TEST_LDADD) +composite_traps_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ +composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h + scaling_test_LDADD = $(TEST_LDADD) scaling_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@ scaling_test_SOURCES = scaling-test.c utils.c utils.h diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c new file mode 100644 index 000..8f32778 --- /dev/null +++ b/test/composite-traps-test.c @@ -0,0 +1,253 @@ +/* Based loosely on scaling-test */ + +#include assert.h +#include stdlib.h +#include stdio.h +#include utils.h + +#define MAX_SRC_WIDTH 48 +#define MAX_SRC_HEIGHT 48 +#define MAX_DST_WIDTH 48 +#define MAX_DST_HEIGHT 48 +#define MAX_STRIDE 4 + +static pixman_format_code_t formats[] = +{ +PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4 +}; + +static pixman_format_code_t mask_formats[] = +{ +PIXMAN_a1, PIXMAN_a4, PIXMAN_a8, +}; + +static pixman_op_t operators[] = +{ +PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN +}; + +#define RANDOM_ELT(array) \ +((array)[lcg_rand_n(ARRAY_LENGTH((array)))]) + +static void +destroy_bits (pixman_image_t *image, void *data) +{ +fence_free (data); +} + +static pixman_fixed_t +random_fixed (int n) +{ +return lcg_rand_N (n 16); +} + +/* + * Composite operation with pseudorandom images + */ +uint32_t +test_composite (int testnum, + int verbose) +{ +inti; +pixman_image_t * src_img; +pixman_image_t * dst_img; +pixman_region16_t clip; +intdst_width, dst_height; +intdst_stride; +intdst_x, dst_y; +intdst_bpp; +pixman_op_top; +uint32_t * dst_bits; +uint32_t crc32; +pixman_format_code_t mask_format, dst_format; +pixman_trapezoid_t *traps; +int src_x, src_y; +int n_traps; + +static pixman_color_t colors[] = +{ + { 0x, 0x, 0x, 0x }, + { 0x, 0x, 0x, 0x }, + { 0xabcd, 0xabcd, 0x, 0xabcd }, + { 0x, 0x, 0x, 0x }, + { 0x0101, 0x0101, 0x0101, 0x0101 }, + { 0x, 0x, 0x, 0x }, +}; + +FLOAT_REGS_CORRUPTION_DETECTOR_START (); + +lcg_srand (testnum); + +op = RANDOM_ELT (operators); +mask_format = RANDOM_ELT (mask_formats); + +/* Create source image */ + +if (lcg_rand_n (4) == 0) +{ + src_img = pixman_image_create_solid_fill ( + (colors[lcg_rand_n (ARRAY_LENGTH (colors))])); + + src_x = 10; + src_y = 234; +} +else +{ + pixman_format_code_t src_format = RANDOM_ELT(formats); + int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8; + int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1; + int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1; + int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp; + uint32_t *bits; + + src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2); + src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2); + + src_stride = (src_stride + 3) ~3; + + bits = (uint32_t *)make_random_bytes (src_stride * src_height); + + src_img = pixman_image_create_bits ( + src_format, src_width, src_height, bits, src_stride); + + pixman_image_set_destroy_function (src_img, destroy_bits, bits); + + if (lcg_rand_n (8) == 0) + { + pixman_box16_t clip_boxes[2]; + intn = lcg_rand_n (2) + 1; + + for (i = 0; i n; i++) + { + clip_boxes[i].x1 = lcg_rand_n (src_width); + clip_boxes[i].y1 = lcg_rand_n (src_height); + clip_boxes[i].x2 = + clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1); + clip_boxes[i].y2 = + clip_boxes[i].y1
[Pixman] [PATCH 3/6] Add support for triangles to pixman.
From: Søren Sandmann Pedersen s...@redhat.com The Render X extension can draw triangles as well as trapezoids, but the implementation has always converted them to trapezoids. This patch moves the X server's triangle conversion code into pixman, where we can reuse the pixman_composite_trapezoid() code. --- pixman/pixman-trap.c | 136 ++ pixman/pixman.h | 15 ++ 2 files changed, 151 insertions(+), 0 deletions(-) diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c index ecec5d4..2675773 100644 --- a/pixman/pixman-trap.c +++ b/pixman/pixman-trap.c @@ -1,4 +1,5 @@ /* + * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc. * Copyright © 2004 Keith Packard * * Permission to use, copy, modify, distribute, and sell this software and its @@ -25,6 +26,7 @@ #endif #include stdio.h +#include stdlib.h #include pixman-private.h /* @@ -471,3 +473,137 @@ pixman_composite_trapezoids (pixman_op_t op, pixman_image_unref (tmp); } + +static int +greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b) +{ +if (a-y == b-y) + return a-x b-x; +return a-y b-y; +} + +/* + * Note that the definition of this function is a bit odd because + * of the X coordinate space (y increasing downwards). + */ +static int +clockwise (const pixman_point_fixed_t *ref, + const pixman_point_fixed_t *a, + const pixman_point_fixed_t *b) +{ +pixman_point_fixed_t ad, bd; + +ad.x = a-x - ref-x; +ad.y = a-y - ref-y; +bd.x = b-x - ref-x; +bd.y = b-y - ref-y; + +return ((pixman_fixed_32_32_t) bd.y * ad.x - + (pixman_fixed_32_32_t) ad.y * bd.x) 0; +} + +static void +triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) +{ +const pixman_point_fixed_t *top, *left, *right, *tmp; + +top = tri-p1; +left = tri-p2; +right = tri-p3; + +if (greater_y (top, left)) +{ + tmp = left; + left = top; + top = tmp; +} + +if (greater_y (top, right)) +{ + tmp = right; + right = top; + top = tmp; +} + +if (clockwise (top, right, left)) +{ + tmp = right; + right = left; + left = tmp; +} + +/* + * Two cases: + * + * + + + */ \ / \ + * / \ / \ + * / + + \ + * /-- --\ + * / -- -- \ + */ --- --- \ + * +-- --+ + */ + +traps-top = top-y; +traps-left.p1 = *top; +traps-left.p2 = *left; +traps-right.p1 = *top; +traps-right.p2 = *right; + +if (right-y left-y) + traps-bottom = right-y; +else + traps-bottom = left-y; + +traps++; + +*traps = *(traps - 1); + +if (right-y left-y) +{ + traps-top = right-y; + traps-bottom = left-y; + traps-right.p1 = *right; + traps-right.p2 = *left; +} +else +{ + traps-top = left-y; + traps-bottom = right-y; + traps-left.p1 = *left; + traps-left.p2 = *right; +} +} + +PIXMAN_EXPORT void +pixman_composite_triangles (pixman_op_top, + pixman_image_t *src, + pixman_image_t *dst, + pixman_format_code_tmask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_tris, + const pixman_triangle_t * tris) +{ +pixman_trapezoid_t *trapezoids; +int i; + +if (n_tris = 0) + return; + +trapezoids = malloc (2 * n_tris * sizeof (pixman_trapezoid_t)); +if (!trapezoids) + return; + +for (i = 0; i n_tris; ++i) + triangle_to_trapezoids ((tris[i]), trapezoids + 2 * i); + +pixman_composite_trapezoids (op, src, dst, mask_format, +x_src, y_src, x_dst, y_dst, +n_tris * 2, trapezoids); + +free (trapezoids); +} diff --git a/pixman/pixman.h b/pixman/pixman.h index 52ab8a5..7d28e78 100644 --- a/pixman/pixman.h +++ b/pixman/pixman.h @@ -868,6 +868,7 @@ typedef struct pixman_edge pixman_edge_t; typedef struct pixman_trapezoid pixman_trapezoid_t; typedef struct pixman_trap pixman_trap_t; typedef struct pixman_span_fix pixman_span_fix_t; +typedef struct pixman_triangle pixman_triangle_t; /* * An edge structure. This represents a single polygon edge @@ -895,6 +896,10 @@ struct pixman_trapezoid pixman_line_fixed_tleft, right; }; +struct
[Pixman] [PATCH 5/6] Optimize adding opaque trapezoids onto a8 destination.
From: Søren Sandmann Pedersen s...@redhat.com When the source is opaque and the destination is alpha only, we can avoid the temporary mask and just add the trapezoids directly. --- pixman/pixman-trap.c | 133 - 1 files changed, 76 insertions(+), 57 deletions(-) diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c index 2675773..adf822c 100644 --- a/pixman/pixman-trap.c +++ b/pixman/pixman-trap.c @@ -399,10 +399,7 @@ pixman_composite_trapezoids (pixman_op_t op, intn_traps, const pixman_trapezoid_t * traps) { -pixman_image_t *tmp; -pixman_box32_t box; int i; -int x_rel, y_rel; if (n_traps = 0) return; @@ -410,68 +407,90 @@ pixman_composite_trapezoids (pixman_op_t op, _pixman_image_validate (src); _pixman_image_validate (dst); -box.x1 = INT32_MAX; -box.y1 = INT32_MAX; -box.x2 = INT32_MIN; -box.y2 = INT32_MIN; - -for (i = 0; i n_traps; ++i) +if (op == PIXMAN_OP_ADD + (src-common.flags FAST_PATH_IS_OPAQUE) + (mask_format == dst-common.extended_format_code) + !(dst-common.have_clip_region)) { - const pixman_trapezoid_t *trap = (traps[i]); - int y1, y2; - - if (!pixman_trapezoid_valid (trap)) - continue; + for (i = 0; i n_traps; ++i) + { + const pixman_trapezoid_t *trap = (traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (dst, trap, 0, 0); + } +} +else +{ + pixman_image_t *tmp; + pixman_box32_t box; + int x_rel, y_rel; - y1 = pixman_fixed_to_int (trap-top); - if (y1 box.y1) - box.y1 = y1; + box.x1 = INT32_MAX; + box.y1 = INT32_MAX; + box.x2 = INT32_MIN; + box.y2 = INT32_MIN; - y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom)); - if (y2 box.y2) - box.y2 = y2; - + for (i = 0; i n_traps; ++i) + { + const pixman_trapezoid_t *trap = (traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap-top); + if (y1 box.y1) + box.y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom)); + if (y2 box.y2) + box.y2 = y2; + #define EXTEND_MIN(x) \ - if (pixman_fixed_to_int ((x)) box.x1) \ - box.x1 = pixman_fixed_to_int ((x)); + if (pixman_fixed_to_int ((x)) box.x1) \ + box.x1 = pixman_fixed_to_int ((x)); #define EXTEND_MAX(x) \ - if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) box.x2) \ - box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); - + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) box.x2) \ + box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + #define EXTEND(x) \ - EXTEND_MIN(x); \ - EXTEND_MAX(x); - - EXTEND(trap-left.p1.x); - EXTEND(trap-left.p2.x); - EXTEND(trap-right.p1.x); - EXTEND(trap-right.p2.x); -} - -if (box.x1 = box.x2 || box.y1 = box.y2) - return; - -tmp = pixman_image_create_bits ( - mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap-left.p1.x); + EXTEND(trap-left.p2.x); + EXTEND(trap-right.p1.x); + EXTEND(trap-right.p2.x); + } -for (i = 0; i n_traps; ++i) -{ - const pixman_trapezoid_t *trap = (traps[i]); - - if (!pixman_trapezoid_valid (trap)) - continue; - - pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + if (box.x1 = box.x2 || box.y1 = box.y2) + return; + + tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + + for (i = 0; i n_traps; ++i) + { + const pixman_trapezoid_t *trap = (traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); + } + + x_rel = box.x1 + x_src - x_dst; + y_rel = box.y1 + y_src - y_dst; + + pixman_image_composite (op, src, tmp, dst, + x_rel, y_rel
[Pixman] [PATCH 6/6] Add new public function pixman_add_triangles()
From: Søren Sandmann Pedersen s...@redhat.com This allows some more code to be deleted from the X server. The implementation consists of converting to trapezoids, and is shared with pixman_composite_triangles(). --- pixman/pixman-trap.c | 61 - pixman/pixman.h |7 +- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c index adf822c..2957a2b 100644 --- a/pixman/pixman-trap.c +++ b/pixman/pixman-trap.c @@ -595,6 +595,25 @@ triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t *traps) } } +static pixman_trapezoid_t * +convert_triangles (int n_tris, const pixman_triangle_t *tris) +{ +pixman_trapezoid_t *traps; +int i; + +if (n_tris = 0) + return NULL; + +traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t)); +if (!traps) + return NULL; + +for (i = 0; i n_tris; ++i) + triangle_to_trapezoids ((tris[i]), traps + 2 * i); + +return traps; +} + PIXMAN_EXPORT void pixman_composite_triangles (pixman_op_top, pixman_image_t *src, @@ -607,22 +626,32 @@ pixman_composite_triangles (pixman_op_t op, int n_tris, const pixman_triangle_t * tris) { -pixman_trapezoid_t *trapezoids; -int i; +pixman_trapezoid_t *traps; -if (n_tris = 0) - return; - -trapezoids = malloc (2 * n_tris * sizeof (pixman_trapezoid_t)); -if (!trapezoids) - return; +if ((traps = convert_triangles (n_tris, tris))) +{ + pixman_composite_trapezoids (op, src, dst, mask_format, +x_src, y_src, x_dst, y_dst, +n_tris * 2, traps); + + free (traps); +} +} -for (i = 0; i n_tris; ++i) - triangle_to_trapezoids ((tris[i]), trapezoids + 2 * i); - -pixman_composite_trapezoids (op, src, dst, mask_format, -x_src, y_src, x_dst, y_dst, -n_tris * 2, trapezoids); - -free (trapezoids); +PIXMAN_EXPORT void +pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris) +{ +pixman_trapezoid_t *traps; + +if ((traps = convert_triangles (n_tris, tris))) +{ + pixman_add_trapezoids (image, x_off, y_off, + n_tris * 2, traps); + + free (traps); +} } diff --git a/pixman/pixman.h b/pixman/pixman.h index 7d28e78..1305bc1 100644 --- a/pixman/pixman.h +++ b/pixman/pixman.h @@ -975,7 +975,12 @@ void pixman_composite_triangles (pixman_op_t op, int y_dst, int n_tris, const pixman_triangle_t *tris); - +void pixman_add_triangles (pixman_image_t *image, + int32_t x_off, + int32_t y_off, + int n_tris, + const pixman_triangle_t *tris); + PIXMAN_END_DECLS #endif /* PIXMAN_H__ */ -- 1.7.3.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 2/3] Move miTriangles to fb as fbTriangles().
From: Søren Sandmann Pedersen s...@redhat.com The fb version simply calls the new pixman_composite_triangles(). This allows us to get rid of miCreateAlphaPicture(). Signed-off-by: Søren Sandmann s...@redhat.com --- fb/fbpict.c |1 + fb/fbpict.h | 10 + fb/fbtrap.c | 109 +++ render/mipict.c |2 +- render/mipict.h | 17 - render/mitrap.c | 49 - render/mitri.c | 59 -- 7 files changed, 90 insertions(+), 157 deletions(-) diff --git a/fb/fbpict.c b/fb/fbpict.c index 6e66db8..312f3df 100644 --- a/fb/fbpict.c +++ b/fb/fbpict.c @@ -367,6 +367,7 @@ fbPictureInit (ScreenPtr pScreen, PictFormatPtr formats, int nformats) ps-Trapezoids = fbTrapezoids; ps-AddTraps = fbAddTraps; ps-AddTriangles = fbAddTriangles; +ps-Triangles = fbTriangles; return TRUE; } diff --git a/fb/fbpict.h b/fb/fbpict.h index 03d2665..b880ebb 100644 --- a/fb/fbpict.h +++ b/fb/fbpict.h @@ -75,4 +75,14 @@ fbTrapezoids (CARD8 op, int ntrap, xTrapezoid*traps); +extern _X_EXPORT void +fbTriangles (CARD8 op, +PicturePtr pSrc, +PicturePtr pDst, +PictFormatPtr maskFormat, +INT16 xSrc, +INT16 ySrc, +intntris, +xTriangle *tris); + #endif /* _FBPICT_H_ */ diff --git a/fb/fbtrap.c b/fb/fbtrap.c index 687de55..3b197b4 100644 --- a/fb/fbtrap.c +++ b/fb/fbtrap.c @@ -157,51 +157,56 @@ fbAddTriangles (PicturePtr pPicture, } } +typedef void (* CompositeShapesFunc) (pixman_op_t op, + pixman_image_t *src, + pixman_image_t *dst, + pixman_format_code_t mask_format, + int x_src, int y_src, + int x_dst, int y_dst, + int n_shapes, const uint8_t *shapes); -void -fbTrapezoids (CARD8op, - PicturePtrpSrc, - PicturePtrpDst, - PictFormatPtr maskFormat, - INT16 xSrc, - INT16 ySrc, - int ntrap, - xTrapezoid*traps) +static void +fbShapes (CompositeShapesFunc composite, + pixman_op_t op, + PicturePtrpSrc, + PicturePtrpDst, + PictFormatPtr maskFormat, + int16_t xSrc, + int16_t ySrc, + int16_t xDst, + int16_t yDst, + int nshapes, + int shape_size, + const uint8_t * shapes) { pixman_image_t *src, *dst; int src_xoff, src_yoff; int dst_xoff, dst_yoff; -if (ntrap == 0) - return; - src = image_from_pict (pSrc, FALSE, src_xoff, src_yoff); dst = image_from_pict (pDst, TRUE, dst_xoff, dst_yoff); if (src dst) { pixman_format_code_t format; - int x_dst, y_dst; - int i; - x_dst = traps[0].left.p1.x 16; - y_dst = traps[0].left.p1.y 16; - if (!maskFormat) { + int i; + if (pDst-polyEdge == PolyEdgeSharp) format = PIXMAN_a1; else format = PIXMAN_a8; - for (i = 0; i ntrap; ++i) + for (i = 0; i nshapes; ++i) { - pixman_composite_trapezoids (op, src, dst, format, -xSrc + src_xoff, -ySrc + src_yoff, -x_dst + dst_xoff, -y_dst + dst_yoff, -1, (pixman_trapezoid_t *)traps++); + composite (op, src, dst, format, + xSrc + src_xoff, + ySrc + src_yoff, + xDst + dst_xoff, + yDst + dst_yoff, + 1, shapes + i * shape_size); } } else @@ -221,16 +226,58 @@ fbTrapezoids (CARD8 op, format = PIXMAN_a8; break; } - - pixman_composite_trapezoids (op, src, dst, format, -xSrc + src_xoff, -ySrc + src_yoff, -x_dst + dst_xoff, -y_dst + dst_yoff, -ntrap, (pixman_trapezoid_t *)traps); + + composite (op, src, dst, format, + xSrc + src_xoff, + ySrc + src_yoff
[Pixman] [PATCH 3/3] Implement fbAddTriangles() in terms of pixman_add_triangles().
From: Søren Sandmann Pedersen s...@redhat.com This allows the remaining triangle-to-trap conversion code to be deleted. Signed-off-by: Søren Sandmann s...@redhat.com --- fb/fbtrap.c | 91 ++- 1 files changed, 9 insertions(+), 82 deletions(-) diff --git a/fb/fbtrap.c b/fb/fbtrap.c index 3b197b4..2554fcc 100644 --- a/fb/fbtrap.c +++ b/fb/fbtrap.c @@ -65,32 +65,6 @@ fbRasterizeTrapezoid (PicturePtrpPicture, free_pixman_pict (pPicture, image); } -static int -_GreaterY (xPointFixed *a, xPointFixed *b) -{ -if (a-y == b-y) - return a-x b-x; -return a-y b-y; -} - -/* - * Note that the definition of this function is a bit odd because - * of the X coordinate space (y increasing downwards). - */ -static int -_Clockwise (xPointFixed *ref, xPointFixed *a, xPointFixed *b) -{ -xPointFixedad, bd; - -ad.x = a-x - ref-x; -ad.y = a-y - ref-y; -bd.x = b-x - ref-x; -bd.y = b-y - ref-y; - -return ((xFixed_32_32) bd.y * ad.x - (xFixed_32_32) ad.y * bd.x) 0; -} - -/* FIXME -- this could be made more efficient */ void fbAddTriangles (PicturePtr pPicture, INT16 x_off, @@ -98,63 +72,16 @@ fbAddTriangles (PicturePtr pPicture, int ntri, xTriangle *tris) { -xPointFixed *top, *left, *right, *tmp; -xTrapezoid trap; +int image_xoff, image_yoff; +pixman_image_t *image = + image_from_pict (pPicture, FALSE, image_xoff, image_yoff); -for (; ntri; ntri--, tris++) -{ - top = tris-p1; - left = tris-p2; - right = tris-p3; - if (_GreaterY (top, left)) { - tmp = left; left = top; top = tmp; - } - if (_GreaterY (top, right)) { - tmp = right; right = top; top = tmp; - } - if (_Clockwise (top, right, left)) { - tmp = right; right = left; left = tmp; - } - - /* -* Two cases: -* -* + + -* / \ / \ -*/ \ / \ -* / + + \ -* /-- --\ -* / -- -- \ -*/ --- --- \ -* +-- --+ -*/ - - trap.top = top-y; - trap.left.p1 = *top; - trap.left.p2 = *left; - trap.right.p1 = *top; - trap.right.p2 = *right; - if (right-y left-y) - trap.bottom = right-y; - else - trap.bottom = left-y; - fbRasterizeTrapezoid (pPicture, trap, x_off, y_off); - if (right-y left-y) - { - trap.top = right-y; - trap.bottom = left-y; - trap.right.p1 = *right; - trap.right.p2 = *left; - } - else - { - trap.top = left-y; - trap.bottom = right-y; - trap.left.p1 = *left; - trap.left.p2 = *right; - } - fbRasterizeTrapezoid (pPicture, trap, x_off, y_off); -} +if (!image) + return; + +pixman_add_triangles (image, x_off, y_off, ntri, (pixman_triangle_t *)tris); + +free_pixman_pict (pPicture, image); } typedef void (* CompositeShapesFunc) (pixman_op_t op, -- 1.7.3.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH 1/3] Add pixman_composite_trapezoids().
This function is an implementation of the X server request Trapezoids. That request is what the X backend of cairo is using all the time; by moving it into pixman we can hopefully make it faster. --- pixman/pixman-trap.c | 87 ++ pixman/pixman.h | 12 ++- 2 files changed, 98 insertions(+), 1 deletions(-) diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c index 8353992..a924326 100644 --- a/pixman/pixman-trap.c +++ b/pixman/pixman-trap.c @@ -390,3 +390,90 @@ pixman_rasterize_trapezoid (pixman_image_t * image, pixman_rasterize_edges (image, l, r, t, b); } } + +PIXMAN_EXPORT void +pixman_composite_trapezoids (pixman_op_t op, +pixman_image_t * src, +pixman_image_t * dst, +pixman_format_code_t mask_format, +intx_src, +inty_src, +intx_dst, +inty_dst, +intn_traps, +pixman_trapezoid_t * traps) +{ +pixman_image_t *tmp; +pixman_box32_t box; +int i; +int x_rel, y_rel; + +if (n_traps = 0) + return; + +_pixman_image_validate (src); +_pixman_image_validate (dst); + +box.x1 = INT32_MAX; +box.y1 = INT32_MAX; +box.x2 = INT32_MIN; +box.y2 = INT32_MIN; + +for (i = 0; i n_traps; ++i) +{ + pixman_trapezoid_t *trap = (traps[i]); + int y1, y2; + + if (!pixman_trapezoid_valid (trap)) + continue; + + y1 = pixman_fixed_to_int (trap-top); + if (y1 box.y1) + box.y1 = y1; + + y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom)); + if (y2 box.y2) + box.y2 = y2; + +#define EXTEND_MIN(x) \ + if (pixman_fixed_to_int ((x)) box.x1) \ + box.x1 = pixman_fixed_to_int ((x)); +#define EXTEND_MAX(x) \ + if (pixman_fixed_to_int (pixman_fixed_ceil ((x))) box.x2) \ + box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x))); + +#define EXTEND(x) \ + EXTEND_MIN(x); \ + EXTEND_MAX(x); + + EXTEND(trap-left.p1.x); + EXTEND(trap-left.p2.x); + EXTEND(trap-right.p1.x); + EXTEND(trap-right.p2.x); +} + +if (box.x1 = box.x2 || box.y1 = box.y2) + return; + +tmp = pixman_image_create_bits ( + mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1); + +for (i = 0; i n_traps; ++i) +{ + pixman_trapezoid_t *trap = (traps[i]); + + if (!pixman_trapezoid_valid (trap)) + continue; + + pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1); +} + +x_rel = box.x1 + x_src - x_dst; +y_rel = box.y1 + y_src - y_dst; + +pixman_image_composite (op, src, tmp, dst, + x_rel, y_rel, 0, 0, box.x1, box.y1, + box.x2 - box.x1, box.y2 - box.y1); + +pixman_image_unref (tmp); +} diff --git a/pixman/pixman.h b/pixman/pixman.h index b95d0e9..c2f7da3 100644 --- a/pixman/pixman.h +++ b/pixman/pixman.h @@ -950,7 +950,17 @@ void pixman_rasterize_trapezoid (pixman_image_t *image, const pixman_trapezoid_t *trap, intx_off, inty_off); - +void pixman_composite_trapezoids (pixman_op_top, + pixman_image_t *src, + pixman_image_t *dst, + pixman_format_code_t mask_format, + int x_src, + int y_src, + int x_dst, + int y_dst, + int n_traps, + pixman_trapezoid_t *traps); + PIXMAN_END_DECLS #endif /* PIXMAN_H__ */ -- 1.7.3.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH] Only try to compute the FAST_SAMPLES_COVER_CLIP for bits images
From: Søren Sandmann Pedersen s...@redhat.com It doesn't make sense in other cases, and the computation would make use of image-bits.{width,height} which lead to uninitialized memory accesses when the image wasn't of type BITS. --- pixman/pixman.c | 17 ++--- 1 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pixman/pixman.c b/pixman/pixman.c index 55c5981..ddd4935 100644 --- a/pixman/pixman.c +++ b/pixman/pixman.c @@ -787,14 +787,17 @@ analyze_extent (pixman_image_t *image, int x, int y, if (!compute_sample_extents (transform, ex, x, y, x_off, y_off, width, height)) return FALSE; -/* Check whether the non-expanded, transformed extent is entirely within - * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is. - */ -ex = *extents; -if (compute_sample_extents (transform, ex, x, y, x_off, y_off, width, height)) +if (image-type == BITS) { - if (ex.x1 = 0 ex.y1 = 0 ex.x2 = image-bits.width ex.y2 = image-bits.height) - *flags |= FAST_PATH_SAMPLES_COVER_CLIP; + /* Check whether the non-expanded, transformed extent is entirely within +* the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is. +*/ + ex = *extents; + if (compute_sample_extents (transform, ex, x, y, x_off, y_off, width, height)) + { + if (ex.x1 = 0 ex.y1 = 0 ex.x2 = image-bits.width ex.y2 = image-bits.height) + *flags |= FAST_PATH_SAMPLES_COVER_CLIP; + } } return TRUE; -- 1.7.1.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
Re: [Pixman] [PATCH] ARM: NEON: don't hit general path for r5g6b5 OVER r5g6b5 operation
Would it be possible instead to add a new flag OPAQUE_SAMPLES that would be set whenever the image format is opaque, and then use it along with SAMPLES_COVER_CLIP to add the OPAQUE flag before strength reducing the operator? That would help all the backends, including the general one, and all the opaque image formats. OK, I'll try to see what can be done. I'm a bit worried about the applications using pixman in such a way that the pixels outside of the source image are also fetched and whether this will be handled efficiently with the new flag. I think the clip analysis is finally to the point that this can be done without introducing new bugs. See the following patch, and please let me know if it doesn't work for the r5g6b5 OVER r5g6b5 case. Thanks, Soren ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH] Introduce new FAST_PATH_SAMPLES_OPAQUE flag
From: Søren Sandmann Pedersen s...@redhat.com This flag is set whenever the pixels of a bits image don't have an alpha channel. Together with FAST_PATH_SAMPLES_COVER_CLIP it implies that the image effectively is opaque, so we can do operator reductions such as OVER-SRC. --- pixman/pixman-image.c | 10 ++ pixman/pixman-private.h |1 + pixman/pixman.c | 11 +++ 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c index 0b8bb3c..0e3601f 100644 --- a/pixman/pixman-image.c +++ b/pixman/pixman-image.c @@ -406,12 +406,14 @@ compute_image_info (pixman_image_t *image) } } - if (image-common.repeat != PIXMAN_REPEAT_NONE - !PIXMAN_FORMAT_A (image-bits.format) + if (!PIXMAN_FORMAT_A (image-bits.format) PIXMAN_FORMAT_TYPE (image-bits.format) != PIXMAN_TYPE_GRAY PIXMAN_FORMAT_TYPE (image-bits.format) != PIXMAN_TYPE_COLOR) { - flags |= FAST_PATH_IS_OPAQUE; + flags |= FAST_PATH_SAMPLES_OPAQUE; + + if (image-common.repeat != PIXMAN_REPEAT_NONE) + flags |= FAST_PATH_IS_OPAQUE; } if (source_image_needs_out_of_bounds_workaround (image-bits)) @@ -459,7 +461,7 @@ compute_image_info (pixman_image_t *image) image-common.filter == PIXMAN_FILTER_CONVOLUTION || image-common.component_alpha) { - flags = ~FAST_PATH_IS_OPAQUE; + flags = ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE); } image-common.flags = flags; diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h index c4e6bb8..dedea0b 100644 --- a/pixman/pixman-private.h +++ b/pixman/pixman-private.h @@ -571,6 +571,7 @@ _pixman_choose_implementation (void); #define FAST_PATH_BILINEAR_FILTER (1 20) #define FAST_PATH_NO_NORMAL_REPEAT (1 21) #define FAST_PATH_HAS_TRANSFORM(1 22) +#define FAST_PATH_SAMPLES_OPAQUE (1 23) #define FAST_PATH_PAD_REPEAT \ (FAST_PATH_NO_NONE_REPEAT | \ diff --git a/pixman/pixman.c b/pixman/pixman.c index e79e135..55c5981 100644 --- a/pixman/pixman.c +++ b/pixman/pixman.c @@ -884,6 +884,17 @@ do_composite (pixman_op_t op, if (!analyze_extent (mask, dest_x - mask_x, dest_y - mask_y, extents, mask_flags)) goto out; +/* If the clip is within the source samples, and the samples are opaque, + * then the source is effectively opaque. + */ +#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP) + +if ((src_flags BOTH) == BOTH) + src_flags |= FAST_PATH_IS_OPAQUE; + +if ((mask_flags BOTH) == BOTH) + mask_flags |= FAST_PATH_IS_OPAQUE; + /* * Check if we can replace our operator by a simpler one * if the src or dest are opaque. The output operator should be -- 1.7.1.1 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
Re: [Pixman] FAST_PATH_SAMPLES_COVER_CLIP flag fast_composite_scaled_nearest_*
Siarhei Siamashka siarhei.siamas...@gmail.com writes: Overall looks like a good fix, a few comments below. Thanks for the comments. I'll send a new patch with a long commit log as a follow-up to this message (provided I can make it work with git-send-email), but I'll reply to some specifics below. The main difference in the new patch is that the 16BIT_SAFE flag is gone entirely, and instead pixman will simply bail out in that case. +if (!transform) +{ +box-x1 = pixman_fixed_to_int (pixman_int_to_fixed (box-x1) + pixman_fixed_1 / 2 + x_off); +box-y1 = pixman_fixed_to_int (pixman_int_to_fixed (box-y1) + pixman_fixed_1 / 2 + y_off); +box-x2 = pixman_fixed_to_int (pixman_int_to_fixed (box-x2) - pixman_fixed_1 / 2 + x_off) + width + 1; +box-y2 = pixman_fixed_to_int (pixman_int_to_fixed (box-y2) - pixman_fixed_1 / 2 + y_off) + height + 1; +return TRUE; That's an interesting case. If I understand it correctly, without any transform at all, both NEAREST and BILINEAR filters should introduce no changes in the bounds. This is fine for NEAREST, but not for BILINEAR filter which gets the bounds expanded by 1. The bilinear filter is a bit special, because the sampling of extra rightmost pixels may technically happen according to formulas, but they get multiplied by zero anyway, so make no difference and are ignored by non-transformed fast paths. The non-transformed fast paths certainly make this assumption, but the general path will actually read these pixels, so we still have to account for them. If we start seeing a lot of cases where images have a bilinear filter, an identity transform, and we end up hitting the general case, we could look into it, but I don't think this is a very common case. +v[0].vector[0] = pixman_int_to_fixed (box-x1) + pixman_fixed_1 / 2; +v[0].vector[1] = pixman_int_to_fixed (box-y1) + pixman_fixed_1 / 2; +v[0].vector[2] = pixman_int_to_fixed (1); + +v[1].vector[0] = pixman_int_to_fixed (box-x2) - pixman_fixed_1 / 2; +v[1].vector[1] = pixman_int_to_fixed (box-y1) + pixman_fixed_1 / 2; +v[1].vector[2] = pixman_int_to_fixed (1); + +v[2].vector[0] = pixman_int_to_fixed (box-x2) - pixman_fixed_1 / 2; +v[2].vector[1] = pixman_int_to_fixed (box-y2) - pixman_fixed_1 / 2; +v[2].vector[2] = pixman_int_to_fixed (1); + +v[3].vector[0] = pixman_int_to_fixed (box-x1) + pixman_fixed_1 / 2; +v[3].vector[1] = pixman_int_to_fixed (box-y2) - pixman_fixed_1 / 2; +v[3].vector[2] = pixman_int_to_fixed (1); + +for (i = 0; i 4; ++i) +{ + if (!pixman_transform_point (transform, v[i])) + return FALSE; Still what about the subtle differences between pixman_transform_point() and pixman_transform_point_3d()? They are not exactly the same. Transformed fetchers and fast path functions are all using pixman_transform_point_3d(). It's true that they are not exactly the same. In the new patch, I have introduced a bit of slack in the computation of the source area (8 * pixman_fixed_e). This is hopefully enough to account for any rounding differences. The common special case where a NEAREST image is being scaled so that the source area exactly matches the image will still work because all we need in that case is for the computed bounds to be within [0, 0.5] and 8 * pixman_fixed_1 is not even close to that. It does mean of course that if you scale an image very slightly down, the new code might decide to not set the FAST_PATH_SAMPLES_COVER_CLIP flag, but I'm not that concerned about this. I'd like to avoid relying on the two transformation functions being exactly the same because I think it should be considered legitimate to write compositing functions that transform in a different way if that's more efficient, even if it means slightly different results. Another (minor) issue is that pixman_transform_point() has division operations, which may be not very good for performance. We can't really avoid the divisions if the flags are to be set correctly when the transformation is projective. I realize we don't actually make use of the flags in that case, but it's still unpleasent to rely on the knowledge that there aren't any projective fast paths, in a place that should only have knowledge about a particular image. So basically, I think the flags should always be computed correctly, even if we know that an incorrect computation won't have any ill effects. + + x1 = pixman_fixed_to_int (v[i].vector[0] + x_off); + y1 = pixman_fixed_to_int (v[i].vector[1] + y_off); + x2 = x1 + width + 1; + y2 = y1 + height + 1; A minor performance improvement is possible. Addition of (width + 1) and (height + 1) to x2/y2 is done inside of the loop on each iteration here, 8 times total. If done after the loop just before returning, it would be 2 additions only. The new patch is different is quite different, but it does
[Pixman] [PATCH 1/2] Extend scaling-crash-test in various ways
From: Søren Sandmann Pedersen s...@redhat.com This extends scaling-crash-test to test some more things: - All combinations of NEAREST/BILINEAR/CONVOLUTION filters and NORMAL/PAD/REFLECT repeat modes. - Tests various scale factors very close to 1/7th such that the source area is very close to edge of the source image. - The same things, only with scale factors very close to 1/32767th. - Enables the commented-out tests for accessing memory outside the source buffer. Also there is now a border around the source buffer which has a different color than the source buffer itself so that if we sample outside, it will show up. Finally, the test now allows the destination buffer to not be changed at all. This allows pixman to simply bail out in cases where the transformation too strange. --- test/scaling-crash-test.c | 193 - 1 files changed, 139 insertions(+), 54 deletions(-) diff --git a/test/scaling-crash-test.c b/test/scaling-crash-test.c index 4ab01e3..7a94115 100644 --- a/test/scaling-crash-test.c +++ b/test/scaling-crash-test.c @@ -8,117 +8,202 @@ * We have a source image filled with solid color, set NORMAL or PAD repeat, * and some transform which results in nearest neighbour scaling. * - * The expected result is the destination image filled with this solid - * color. + * The expected result is either that the destination image filled with this solid + * color or, if the transformation is such that we can't composite anything at + * all, that nothing has changed in the destination. + * + * The surrounding memory of the source image is a different solid color so that + * we are sure to get failures if we access it. */ static int -do_test (int32_t dst_size, -int32_tsrc_size, -int32_tsrc_offs, -int32_tscale_factor, -pixman_repeat_trepeat) +run_test (int32_t dst_width, + int32_t dst_height, + int32_t src_width, + int32_t src_height, + int32_t src_x, + int32_t src_y, + int32_t scale_x, + int32_t scale_y, + pixman_filter_t filter, + pixman_repeat_t repeat) { -int i; pixman_image_t * src_img; pixman_image_t * dst_img; pixman_transform_t transform; uint32_t * srcbuf; uint32_t * dstbuf; +pixman_box32_t box = { 0, 0, src_width, src_height }; +pixman_color_t color_cc = { 0x, 0x, 0x, 0x }; +int result; +int i; -srcbuf = (uint32_t *)malloc (src_size * 4); -dstbuf = (uint32_t *)malloc (dst_size * 4); +static const pixman_fixed_t kernel[] = +{ +#define D(f) (pixman_double_to_fixed (f) + 0x0001) + + pixman_int_to_fixed (5), + pixman_int_to_fixed (5), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), + D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0) +}; + +result = 0; -/* horizontal test */ -memset (srcbuf, 0xCC, src_size * 4); -memset (dstbuf, 0x33, dst_size * 4); +srcbuf = (uint32_t *)malloc ((src_width + 10) * (src_height + 10) * 4); +dstbuf = (uint32_t *)malloc (dst_width * dst_height * 4); + +memset (srcbuf, 0x88, src_width * src_height * 4); +memset (dstbuf, 0x33, dst_width * dst_height * 4); src_img = pixman_image_create_bits ( -PIXMAN_a8r8g8b8, src_size, 1, srcbuf, src_size * 4); +PIXMAN_a8r8g8b8, src_width, src_height, + srcbuf + (src_width + 10) * 5 + 5, (src_width + 10) * 4); + +pixman_image_fill_boxes (PIXMAN_OP_SRC, src_img, color_cc, 1, box); + dst_img = pixman_image_create_bits ( -PIXMAN_a8r8g8b8, dst_size, 1, dstbuf, dst_size * 4); +PIXMAN_a8r8g8b8, dst_width, dst_height, dstbuf, dst_width * 4); -pixman_transform_init_scale (transform, scale_factor, 65536); +pixman_transform_init_scale (transform, scale_x, scale_y); pixman_image_set_transform (src_img, transform); pixman_image_set_repeat (src_img, repeat); -pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0); +if (filter == PIXMAN_FILTER_CONVOLUTION) + pixman_image_set_filter (src_img, filter, kernel, 27); +else + pixman_image_set_filter (src_img, filter, NULL, 0); pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img, -src_offs, 0, 0, 0, 0, 0, dst_size, 1); +src_x, src_y, 0, 0, 0, 0, dst_width, dst_height); pixman_image_unref (src_img); pixman_image_unref (dst_img); -for (i = 0; i dst_size; i++) +for (i = 0; i dst_width
[Pixman] [PATCH] Cache the implementation along with the fast paths.
From: Søren Sandmann Pedersen s...@redhat.com When calling a fast path, we need to pass the corresponding implementation since it might contain information necessary to run the fast path. --- pixman/pixman.c | 26 -- 1 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pixman/pixman.c b/pixman/pixman.c index b76143f..4dfd3ae 100644 --- a/pixman/pixman.c +++ b/pixman/pixman.c @@ -563,7 +563,11 @@ compute_src_extents_flags (pixman_image_t *image, typedef struct { -pixman_fast_path_t cache [N_CACHED_FAST_PATHS]; +struct +{ + pixman_implementation_t * imp; + pixman_fast_path_t fast_path; +} cache [N_CACHED_FAST_PATHS]; } cache_t; PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); @@ -667,7 +671,7 @@ do_composite (pixman_implementation_t *imp, for (i = 0; i N_CACHED_FAST_PATHS; ++i) { - info = (cache-cache[i]); + info = (cache-cache[i].fast_path); /* Note that we check for equality here, not whether * the cached fast path matches. This is to prevent @@ -683,6 +687,7 @@ do_composite (pixman_implementation_t *imp, info-dest_flags == dest_flags info-func) { + imp = cache-cache[i].imp; goto found; } } @@ -745,14 +750,15 @@ found: while (i--) cache-cache[i + 1] = cache-cache[i]; - cache-cache[0].op = op; - cache-cache[0].src_format = src_format; - cache-cache[0].src_flags = src_flags; - cache-cache[0].mask_format = mask_format; - cache-cache[0].mask_flags = mask_flags; - cache-cache[0].dest_format = dest_format; - cache-cache[0].dest_flags = dest_flags; - cache-cache[0].func = func; + cache-cache[0].imp = imp; + cache-cache[0].fast_path.op = op; + cache-cache[0].fast_path.src_format = src_format; + cache-cache[0].fast_path.src_flags = src_flags; + cache-cache[0].fast_path.mask_format = mask_format; + cache-cache[0].fast_path.mask_flags = mask_flags; + cache-cache[0].fast_path.dest_format = dest_format; + cache-cache[0].fast_path.dest_flags = dest_flags; + cache-cache[0].fast_path.func = func; } out: -- 1.6.0.6 ___ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman
[Pixman] [PATCH] Split the fast path caching into its own force_inline function
From: Søren Sandmann Pedersen s...@redhat.com The do_composite() function is a lot more readable this way. --- pixman/pixman.c | 200 +- 1 files changed, 107 insertions(+), 93 deletions(-) diff --git a/pixman/pixman.c b/pixman/pixman.c index 4dfd3ae..80a766a 100644 --- a/pixman/pixman.c +++ b/pixman/pixman.c @@ -572,9 +572,105 @@ typedef struct PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache); +static force_inline void +lookup_composite_function (pixman_op_t op, + pixman_format_code_t src_format, + uint32_t src_flags, + pixman_format_code_t mask_format, + uint32_t mask_flags, + pixman_format_code_t dest_format, + uint32_t dest_flags, + pixman_implementation_t**out_imp, + pixman_composite_func_t *out_func) +{ +pixman_implementation_t *imp; +cache_t *cache; +int i; + +/* Check cache for fast paths */ +cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); + +for (i = 0; i N_CACHED_FAST_PATHS; ++i) +{ + const pixman_fast_path_t *info = (cache-cache[i].fast_path); + + /* Note that we check for equality here, not whether +* the cached fast path matches. This is to prevent +* us from selecting an overly general fast path +* when a more specific one would work. +*/ + if (info-op == op + info-src_format == src_format + info-mask_format == mask_format + info-dest_format == dest_format + info-src_flags == src_flags + info-mask_flags == mask_flags + info-dest_flags == dest_flags + info-func) + { + *out_imp = cache-cache[i].imp; + *out_func = cache-cache[i].fast_path.func; + + goto update_cache; + } +} + +for (imp = get_implementation (); imp != NULL; imp = imp-delegate) +{ + const pixman_fast_path_t *info = imp-fast_paths; + + while (info-op != PIXMAN_OP_NONE) + { + if ((info-op == op || info-op == PIXMAN_OP_any) + /* Formats */ + ((info-src_format == src_format) || +(info-src_format == PIXMAN_any)) + ((info-mask_format == mask_format) || +(info-mask_format == PIXMAN_any)) + ((info-dest_format == dest_format) || +(info-dest_format == PIXMAN_any)) + /* Flags */ + (info-src_flags src_flags) == info-src_flags + (info-mask_flags mask_flags) == info-mask_flags + (info-dest_flags dest_flags) == info-dest_flags) + { + *out_imp = imp; + *out_func = info-func; + + /* Set i to the last spot in the cache so that the +* move-to-front code below will work +*/ + i = N_CACHED_FAST_PATHS - 1; + + goto update_cache; + } + + ++info; + } +} +return; + +update_cache: +if (i) +{ + while (i--) + cache-cache[i + 1] = cache-cache[i]; + + cache-cache[0].imp = *out_imp; + cache-cache[0].fast_path.op = op; + cache-cache[0].fast_path.src_format = src_format; + cache-cache[0].fast_path.src_flags = src_flags; + cache-cache[0].fast_path.mask_format = mask_format; + cache-cache[0].fast_path.mask_flags = mask_flags; + cache-cache[0].fast_path.dest_format = dest_format; + cache-cache[0].fast_path.dest_flags = dest_flags; + cache-cache[0].fast_path.func = *out_func; +} +} + + static void -do_composite (pixman_implementation_t *imp, - pixman_op_t op, +do_composite (pixman_op_t op, pixman_image_t *src, pixman_image_t *mask, pixman_image_t *dest, @@ -598,9 +694,8 @@ do_composite (pixman_implementation_t *imp, uint32_t *dest_bits; int dest_dx, dest_dy; pixman_bool_t need_workaround; -const pixman_fast_path_t *info; -cache_t *cache; -int i; +pixman_implementation_t *imp; +pixman_composite_func_t func; src_format = src-common.extended_format_code; src_flags = src-common.flags; @@ -666,71 +761,12 @@ do_composite (pixman_implementation_t *imp, if (op == PIXMAN_OP_DST) return; -/* Check cache for fast paths */ -cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache); - -for (i = 0; i N_CACHED_FAST_PATHS; ++i) -{ - info = (cache-cache[i].fast_path