[Pixman] [PATCH 03/14] More general BILINEAR=>NEAREST reduction

2016-04-11 Thread Søren Sandmann Pedersen
Generalize and simplify the code that reduces BILINEAR to NEAREST so
that the reduction happens for all affine transformations where
t00...t12 are integers and (t00 + t01) and (t10 + t11) are both
odd. This is a sufficient condition for the resulting transformed
coordinates to be exactly at the center of a pixel so that BILINEAR
becomes identical to NEAREST.

V2: Address some comments by Bill Spitzak

Signed-off-by: Søren Sandmann 
---
 pixman/pixman-image.c | 66 +--
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 1ff1a49..681864e 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -335,37 +335,47 @@ compute_image_info (pixman_image_t *image)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
-   else if (
-   /* affine and integer translation components in matrix ... */
-   ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
-!pixman_fixed_frac (image->common.transform->matrix[0][2] |
-image->common.transform->matrix[1][2])) &&
-   (
-   /* ... combined with a simple rotation */
-   (flags & (FAST_PATH_ROTATE_90_TRANSFORM |
- FAST_PATH_ROTATE_180_TRANSFORM |
- FAST_PATH_ROTATE_270_TRANSFORM)) ||
-   /* ... or combined with a simple non-rotated translation */
-   (image->common.transform->matrix[0][0] == pixman_fixed_1 &&
-image->common.transform->matrix[1][1] == pixman_fixed_1 &&
-image->common.transform->matrix[0][1] == 0 &&
-image->common.transform->matrix[1][0] == 0)
-   )
-   )
+   else if (flags & FAST_PATH_AFFINE_TRANSFORM)
{
-   /* FIXME: there are some affine-test failures, showing that
-* handling of BILINEAR and NEAREST filter is not quite
-* equivalent when getting close to 32K for the translation
-* components of the matrix. That's likely some bug, but for
-* now just skip BILINEAR->NEAREST optimization in this case.
+   /* Suppose the transform is
+*
+*[ t00, t01, t02 ]
+*[ t10, t11, t12 ]
+*[   0,   0,   1 ]
+*
+* and the destination coordinates are (n + 0.5, m + 0.5). Then
+* the transformed x coordinate is:
+*
+* tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02
+*= t00 * n + t01 * m + t02 + (t00 + t01) * 0.5
+*
+* which implies that if t00, t01 and t02 are all integers
+* and (t00 + t01) is odd, then tx will be an integer plus 0.5,
+* which means a BILINEAR filter will reduce to NEAREST. The same
+* applies in the y direction
 */
-   pixman_fixed_t magic_limit = pixman_int_to_fixed (3);
-   if (image->common.transform->matrix[0][2] <= magic_limit  &&
-   image->common.transform->matrix[1][2] <= magic_limit  &&
-   image->common.transform->matrix[0][2] >= -magic_limit &&
-   image->common.transform->matrix[1][2] >= -magic_limit)
+   pixman_fixed_t (*t)[3] = image->common.transform->matrix;
+
+   if ((pixman_fixed_frac (
+t[0][0] | t[0][1] | t[0][2] |
+t[1][0] | t[1][1] | t[1][2]) == 0) &&
+   (pixman_fixed_to_int (
+   (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1)
{
-   flags |= FAST_PATH_NEAREST_FILTER;
+   /* FIXME: there are some affine-test failures, showing that
+* handling of BILINEAR and NEAREST filter is not quite
+* equivalent when getting close to 32K for the translation
+* components of the matrix. That's likely some bug, but for
+* now just skip BILINEAR->NEAREST optimization in this case.
+*/
+   pixman_fixed_t magic_limit = pixman_int_to_fixed (3);
+   if (image->common.transform->matrix[0][2] <= magic_limit  &&
+   image->common.transform->matrix[1][2] <= magic_limit  &&
+   image->common.transform->matrix[0][2] >= -magic_limit &&
+   image->common.transform->matrix[1][2] >= -magic_limit)
+   {
+   flags |= FAST_PATH_NEAREST_FILTER;
+   }
}
}
break;
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 13/14] pixman-filter: Nested polynomial for cubic

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

v11: Restored range checks

Signed-off-by: Bill Spitzak 
Reviewed-by: Oded Gabbay 
---
 pixman/pixman-filter.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index 4abd05f..db4ab6e 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -109,14 +109,16 @@ general_cubic (double x, double B, double C)
 
 if (ax < 1)
 {
-   return ((12 - 9 * B - 6 * C) * ax * ax * ax +
-   (-18 + 12 * B + 6 * C) * ax * ax + (6 - 2 * B)) / 6;
+   return (((12 - 9 * B - 6 * C) * ax +
+(-18 + 12 * B + 6 * C)) * ax * ax +
+   (6 - 2 * B)) / 6;
 }
-else if (ax >= 1 && ax < 2)
+else if (ax < 2)
 {
-   return ((-B - 6 * C) * ax * ax * ax +
-   (6 * B + 30 * C) * ax * ax + (-12 * B - 48 * C) *
-   ax + (8 * B + 24 * C)) / 6;
+   return -B - 6 * C) * ax +
+ (6 * B + 30 * C)) * ax +
+(-12 * B - 48 * C)) * ax +
+   (8 * B + 24 * C)) / 6;
 }
 else
 {
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] Bill Spitzak patches

2016-04-11 Thread Søren Sandmann Pedersen
Hi,

The following patch series contains those of Bill's patches that I
think are ready to be pushed to master, plus some other related
changes that I also think are ready.

01-03: These are patches to do more BILINEAR->NEAREST filter
   reductions. They were inspired by a similar patch in Bill's
   series, but these patches do the reduction in more cases and
   include tests.

04:Compute the filter size from a transformed ellipse.

05-06: UI fixes to demos/scale.

07:gnuplot output

   This is based on Bill's gnuplot patch, but I rewrote the logic
   in pixman-filter.c to generate correct coordinates, and added a
   big comment explaining how the phase interleaving works.

08:Reduce malloc()/free()/memcpy()

09:Correct Simpson's integration

10:Integral splitting is only necessary for the LINEAR filter

   I rebased this so that it doesn't depend on the changes to the
   integral() from Bill's series, and made the comment in the code
   match the new code.

11:Speed up BOX/BOX

   I rebased this and removed the normalization

12:Fix several issues related to normalization

   This patch fixes several normalization issues including the one
   fixed in Bill's series.

13:Nested polynomial for cubic

14:Made Gaussian a bit wider

In the patches where I made changes, I have generally retained Bill as
the author if the patch still contained a substantial block of code
that was written by Bill. Those that I rewrote completely (the
BILINEAR=>NEAREST and the normalization ones), I have put myself as
author.

However, in all cases I'm happy enough to put either me or Bill as the
author. If anyone has strong opinions about this, let me know.

In all the patches I have also reformatted the commit log so that it
fits within 80 characters.

With the exception of the scale->rscale one, I think the remaining
patches in Bill's series should not be accepted, though it is possible
that a new series rebased on top of this will reveal that I
missed something.


Søren
___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 11/14] pixman-filter: Speed up BOX/BOX filter

2016-04-11 Thread Søren Sandmann Pedersen
The convolution of two BOX filters is simply the length of the
interval where both are non-zero, so we can simply return width from
the integral() function because the integration region has already
been restricted to be such that both functions are non-zero on it.

This is both faster and more accurate than doing numerical integration.

This patch is based on one by Bill Spitzak

https://lists.freedesktop.org/archives/pixman/2016-March/004446.html

with these changes:

- Rebased to not assume any changes in the arguments to integral().

- Dropped the multiplication by scale

- Added more details in the commit message.

Signed-off-by: Søren Sandmann 
---
 pixman/pixman-filter.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index c868723..32aaa9a 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -160,11 +160,15 @@ integral (pixman_kernel_t kernel1, double x1,
  pixman_kernel_t kernel2, double scale, double x2,
  double width)
 {
+if (kernel1 == PIXMAN_KERNEL_BOX && kernel2 == PIXMAN_KERNEL_BOX)
+{
+   return width;
+}
 /* The LINEAR filter is not differentiable at 0, so if the
  * integration interval crosses zero, break it into two
  * separate integrals.
  */
-if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
+else if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
 {
return
integral (kernel1, x1, kernel2, scale, x2, - x1) +
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 02/14] Add new test of filter reduction from BILINEAR to NEAREST

2016-04-11 Thread Søren Sandmann Pedersen
This new test tests a bunch of bilinear downscalings, where many have
a transformation such that the BILINEAR filter can be reduced to
NEAREST (and many don't).

A CRC32 is computed for all the resulting images and compared to a
known-good value for both 4-bit and 7-bit interpolation.

V2: Remove leftover comment, some minor formatting fixes, use a
timestamp as the PRNG seed.

Signed-off-by: Søren Sandmann 
---
 test/Makefile.sources|   1 +
 test/filter-reduction-test.c | 112 +++
 2 files changed, 113 insertions(+)
 create mode 100644 test/filter-reduction-test.c

diff --git a/test/Makefile.sources b/test/Makefile.sources
index 5d55e67..0a56231 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -21,6 +21,7 @@ TESTPROGRAMS =  \
gradient-crash-test   \
pixel-test\
matrix-test   \
+   filter-reduction-test \
composite-traps-test  \
region-contains-test  \
glyph-test\
diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c
new file mode 100644
index 000..705fa4b
--- /dev/null
+++ b/test/filter-reduction-test.c
@@ -0,0 +1,112 @@
+#include 
+#include 
+#include "utils.h"
+
+static const pixman_fixed_t entries[] =
+{
+pixman_double_to_fixed (-1.0),
+pixman_double_to_fixed (-0.5),
+pixman_double_to_fixed (-1/3.0),
+pixman_double_to_fixed (0.0),
+pixman_double_to_fixed (0.5),
+pixman_double_to_fixed (1.0),
+pixman_double_to_fixed (1.5),
+pixman_double_to_fixed (2.0),
+pixman_double_to_fixed (3.0),
+};
+
+#define SIZE 12
+
+static uint32_t
+test_scale (const pixman_transform_t *xform, uint32_t crc)
+{
+uint32_t *srcbuf, *dstbuf;
+pixman_image_t *src, *dest;
+
+srcbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (srcbuf, SIZE * SIZE * 4, 0);
+src = pixman_image_create_bits (
+   PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE * 4);
+
+dstbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (dstbuf, SIZE * SIZE * 4, 0);
+dest = pixman_image_create_bits (
+   PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE * 4);
+
+pixman_image_set_transform (src, xform);
+pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL);
+pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+image_endian_swap (src);
+image_endian_swap (dest);
+
+pixman_image_composite (PIXMAN_OP_SRC,
+   src, NULL, dest,
+   0, 0, 0, 0, 0, 0,
+   SIZE, SIZE);
+
+crc = compute_crc32_for_image (crc, dest);
+
+pixman_image_unref (src);
+pixman_image_unref (dest);
+
+free (srcbuf);
+free (dstbuf);
+
+return crc;
+}
+
+#if BILINEAR_INTERPOLATION_BITS == 7
+#define CHECKSUM 0x02169677
+#elif BILINEAR_INTERPOLATION_BITS == 4
+#define CHECKSUM 0xE44B29AC
+#else
+#define CHECKSUM 0x
+#endif
+
+int
+main (int argc, const char *argv[])
+{
+const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries);
+const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5;
+uint32_t crc = 0;
+
+prng_srand (0x56EA1DBD);
+
+for (t0 = entries; t0 < end; ++t0)
+{
+   for (t1 = entries; t1 < end; ++t1)
+   {
+   for (t2 = entries; t2 < end; ++t2)
+   {
+   for (t3 = entries; t3 < end; ++t3)
+   {
+   for (t4 = entries; t4 < end; ++t4)
+   {
+   for (t5 = entries; t5 < end; ++t5)
+   {
+   pixman_transform_t xform = {
+   { { *t0, *t1, *t2 },
+ { *t3, *t4, *t5 },
+ { 0, 0, pixman_fixed_1 } }
+   };
+
+   crc = test_scale (, crc);
+   }
+   }
+   }
+   }
+   }
+}
+
+if (crc != CHECKSUM)
+{
+   printf ("filter-reduction-test failed! (checksum=0x%08X, expected 
0x%08X)\n", crc, CHECKSUM);
+   return 1;
+}
+else
+{
+   printf ("filter-reduction-test passed (checksum=0x%08X)\n", crc);
+   return 0;
+}
+}
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 07/14] pixman-image: Added enable-gnuplot config to view filters in gnuplot

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

If enable-gnuplot is configured, then you can pipe the output of a
pixman-using program to gnuplot and get a continuously-updated plot of
the horizontal filter. This works well with demos/scale to test the
filter generation.

The plot is all the different subposition filters shuffled
together. This is misleading in a few cases:

  IMPULSE.BOX - goes up and down as the subfilters have different
numbers of non-zero samples

  IMPULSE.TRIANGLE - somewhat crooked for the same reason

  1-wide filters - looks triangular, but a 1-wide box would be more
   accurate

Changes by Søren: Rewrote the pixman-filter.c part to
 - make it generate correct coordinates
 - add a comment on how coordinates are generated
 - in rounding.txt, add a ceil() variant of the first-sample
   formula
 - make the gnuplot output slightly prettier

v7: First time this ability was included

v8: Use config option
Moved code to the filter generator
Modified scale demo to not call filter generator a second time.

v10: Only print if successful generation of plots
 Use #ifdef, not #if

v11: small whitespace fixes

Signed-off-by: Bill Spitzak 
Signed-off-by: Søren Sandmann 
---
 configure.ac   |  13 ++
 pixman/pixman-filter.c | 115 +
 pixman/rounding.txt|   1 +
 3 files changed, 129 insertions(+)

diff --git a/configure.ac b/configure.ac
index 6b2134e..e833e45 100644
--- a/configure.ac
+++ b/configure.ac
@@ -834,6 +834,19 @@ fi
 AC_SUBST(PIXMAN_TIMERS)
 
 dnl ===
+dnl gnuplot
+
+AC_ARG_ENABLE(gnuplot,
+   [AC_HELP_STRING([--enable-gnuplot],
+   [enable output of filters that can be piped to gnuplot 
[default=no]])],
+   [enable_gnuplot=$enableval], [enable_gnuplot=no])
+
+if test $enable_gnuplot = yes ; then
+   AC_DEFINE(PIXMAN_GNUPLOT, 1, [enable output that can be piped to gnuplot])
+fi
+AC_SUBST(PIXMAN_GNUPLOT)
+
+dnl ===
 dnl GTK+
 
 AC_ARG_ENABLE(gtk,
diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index b2bf53f..af46a43 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -297,6 +297,117 @@ create_1d_filter (int *width,
 return params;
 }
 
+#ifdef PIXMAN_GNUPLOT
+
+/* If enable-gnuplot is configured, then you can pipe the output of a
+ * pixman-using program to gnuplot and get a continuously-updated plot
+ * of the horizontal filter. This works well with demos/scale to test
+ * the filter generation.
+ *
+ * The plot is all the different subposition filters shuffled
+ * together. This is misleading in a few cases:
+ *
+ *  IMPULSE.BOX - goes up and down as the subfilters have different
+ *   numbers of non-zero samples
+ *  IMPULSE.TRIANGLE - somewhat crooked for the same reason
+ *  1-wide filters - looks triangular, but a 1-wide box would be more
+ *  accurate
+ */
+static void
+gnuplot_filter (int width, int n_phases, const pixman_fixed_t* p)
+{
+double step;
+int i, j;
+int first;
+
+step = 1.0 / n_phases;
+
+printf ("set style line 1 lc rgb '#0060ad' lt 1 lw 0.5 pt 7 pi 1 ps 
0.5\n");
+printf ("plot '-' with linespoints ls 1\n");
+
+/* The position of the first sample of the phase corresponding to
+ * frac is given by:
+ * 
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ * 
+ * We have to find the frac that minimizes this expression.
+ * 
+ * For odd widths, we have
+ * 
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ *   = ceil (frac) + K - frac
+ *   = 1 + K - frac
+ * 
+ * for some K, so this is minimized when frac is maximized and
+ * strictly growing with frac. So for odd widths, we can simply
+ * start at the last phase and go backwards.
+ * 
+ * For even widths, we have
+ * 
+ * ceil (frac - width / 2.0 - 0.5) + 0.5 - frac
+ *   = ceil (frac - 0.5) + K - frac
+ * 
+ * The graph for this function (ignoring K) looks like this:
+ * 
+ *0.5
+ *   ||\ 
+ *   || \ 
+ *   ||  \ 
+ * 0 ||   \ 
+ *   |\   |
+ *   | \  |
+ *   |  \ |
+ *  -0.5 |   \|
+ *   -
+ *   00.5   1
+ * 
+ * So in this case we need to start with the phase whose frac is
+ * less than, but as close as possible to 0.5, then go backwards
+ * until we hit the first phase, then wrap around to the last
+ * phase and continue backwards.
+ * 
+ * Which phase is as close as possible 0.5? The locations of the
+ * sampling point corresponding to the kth phase is given by
+ * 1/(2 * n_phases) + k / n_phases:
+ * 
+ * 1/(2 * n_phases) + k / n_phases = 0.5
+ 

[Pixman] [PATCH 10/14] pixman-filter: integral splitting is only needed for triangle filter

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

Only the triangle is discontinuous at 0. The other filters resemble a
cubic closely enough that Simpsons integration works without
splitting.

Changes by Søren: Rebase without the changes to the integral function,
update comment to match the new code.

Signed-off-by: Bill Spitzak 
Signed-off-by: Søren Sandmann 
Reviewed-by: Søren Sandmann 
---
 pixman/pixman-filter.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index 8d4872a..c868723 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -160,18 +160,17 @@ integral (pixman_kernel_t kernel1, double x1,
  pixman_kernel_t kernel2, double scale, double x2,
  double width)
 {
-/* If the integration interval crosses zero, break it into
- * two separate integrals. This ensures that filters such
- * as LINEAR that are not differentiable at 0 will still
- * integrate properly.
+/* The LINEAR filter is not differentiable at 0, so if the
+ * integration interval crosses zero, break it into two
+ * separate integrals.
  */
-if (x1 < 0 && x1 + width > 0)
+if (kernel1 == PIXMAN_KERNEL_LINEAR && x1 < 0 && x1 + width > 0)
 {
return
integral (kernel1, x1, kernel2, scale, x2, - x1) +
integral (kernel1, 0, kernel2, scale, x2 - x1, width + x1);
 }
-else if (x2 < 0 && x2 + width > 0)
+else if (kernel2 == PIXMAN_KERNEL_LINEAR && x2 < 0 && x2 + width > 0)
 {
return
integral (kernel1, x1, kernel2, scale, x2, - x2) +
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 04/14] demos/scale: Compute filter size using boundary of xformed ellipse

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

Instead of using the boundary of xformed rectangle, use the boundary
of xformed ellipse. This is much more accurate and less blurry. In
particular the filtering does not change as the image is rotated.

Signed-off-by: Bill Spitzak 
Reviewed-by: Oded Gabbay 
Reviewed-by: Soren Sandmann 
---
 demos/scale.c | 102 +++---
 1 file changed, 61 insertions(+), 41 deletions(-)

diff --git a/demos/scale.c b/demos/scale.c
index d00307e..0995ad0 100644
--- a/demos/scale.c
+++ b/demos/scale.c
@@ -55,50 +55,70 @@ get_widget (app_t *app, const char *name)
 return widget;
 }
 
-static double
-min4 (double a, double b, double c, double d)
-{
-double m1, m2;
-
-m1 = MIN (a, b);
-m2 = MIN (c, d);
-return MIN (m1, m2);
-}
-
-static double
-max4 (double a, double b, double c, double d)
-{
-double m1, m2;
-
-m1 = MAX (a, b);
-m2 = MAX (c, d);
-return MAX (m1, m2);
-}
-
+/* Figure out the boundary of a diameter=1 circle transformed into an ellipse
+ * by trans. Proof that this is the correct calculation:
+ *
+ * Transform x,y to u,v by this matrix calculation:
+ *
+ *  |u|   |a c| |x|
+ *  |v| = |b d|*|y|
+ *
+ * Horizontal component:
+ *
+ *  u = ax+cy (1)
+ *
+ * For each x,y on a radius-1 circle (p is angle to the point):
+ *
+ *  x^2+y^2 = 1
+ *  x = cos(p)
+ *  y = sin(p)
+ *  dx/dp = -sin(p) = -y
+ *  dy/dp = cos(p) = x
+ *
+ * Figure out derivative of (1) relative to p:
+ *
+ *  du/dp = a(dx/dp) + c(dy/dp)
+ *= -ay + cx
+ *
+ * The min and max u are when du/dp is zero:
+ *
+ *  -ay + cx = 0
+ *  cx = ay
+ *  c = ay/x  (2)
+ *  y = cx/a  (3)
+ *
+ * Substitute (2) into (1) and simplify:
+ *
+ *  u = ax + ay^2/x
+ *= a(x^2+y^2)/x
+ *= a/x (because x^2+y^2 = 1)
+ *  x = a/u (4)
+ *
+ * Substitute (4) into (3) and simplify:
+ *
+ *  y = c(a/u)/a
+ *  y = c/u (5)
+ *
+ * Square (4) and (5) and add:
+ *
+ *  x^2+y^2 = (a^2+c^2)/u^2
+ *
+ * But x^2+y^2 is 1:
+ *
+ *  1 = (a^2+c^2)/u^2
+ *  u^2 = a^2+c^2
+ *  u = hypot(a,c)
+ *
+ * Similarily the max/min of v is at:
+ *
+ *  v = hypot(b,d)
+ *
+ */
 static void
 compute_extents (pixman_f_transform_t *trans, double *sx, double *sy)
 {
-double min_x, max_x, min_y, max_y;
-pixman_f_vector_t v[4] =
-{
-   { { 1, 1, 1 } },
-   { { -1, 1, 1 } },
-   { { -1, -1, 1 } },
-   { { 1, -1, 1 } },
-};
-
-pixman_f_transform_point (trans, [0]);
-pixman_f_transform_point (trans, [1]);
-pixman_f_transform_point (trans, [2]);
-pixman_f_transform_point (trans, [3]);
-
-min_x = min4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]);
-max_x = max4 (v[0].v[0], v[1].v[0], v[2].v[0], v[3].v[0]);
-min_y = min4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]);
-max_y = max4 (v[0].v[1], v[1].v[1], v[2].v[1], v[3].v[1]);
-
-*sx = (max_x - min_x) / 2.0;
-*sy = (max_y - min_y) / 2.0;
+*sx = hypot (trans->m[0][0], trans->m[0][1]) / trans->m[2][2];
+*sy = hypot (trans->m[1][0], trans->m[1][1]) / trans->m[2][2];
 }
 
 typedef struct
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 06/14] demos/scale: Default to locked axis

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

Signed-off-by: Bill Spitzak 
Reviewed-by: Søren Sandmann 
---
 demos/scale.ui | 1 +
 1 file changed, 1 insertion(+)

diff --git a/demos/scale.ui b/demos/scale.ui
index f6f6e89..d498d26 100644
--- a/demos/scale.ui
+++ b/demos/scale.ui
@@ -177,6 +177,7 @@
  id="lock_checkbutton">
Lock X and Y 
Dimensions
0.0
+   True
  
   
 False
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 09/14] pixman-filter: Correct Simpsons integration

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

Simpsons uses cubic curve fitting, with 3 samples defining each
cubic. This makes the weights of the samples be in a pattern of
1,4,2,4,2...4,1, and then dividing the result by 3.

The previous code was using weights of 1,2,0,6,0,6...,2,1.

With this fix the integration is accurate enough that the number of
samples could be reduced a lot. Multiples of 12 seem to work best.

v7: Merged with patch to reduce from 128 samples to 16
v9: Changed samples from 16 to 12
v10: Fixed rebase error that made it not compile
v11: minor whitespace change
v14: more whitespace changes

Signed-off-by: Bill Spitzak 
Reviewed-by: Oded Gabbay 
Reviewed-by: Søren Sandmann 
---
 pixman/pixman-filter.c | 21 +++--
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index dd5176d..8d4872a 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -189,13 +189,19 @@ integral (pixman_kernel_t kernel1, double x1,
 }
 else
 {
-   /* Integration via Simpson's rule */
-#define N_SEGMENTS 128
+   /* Integration via Simpson's rule
+* See http://www.intmath.com/integration/6-simpsons-rule.php
+* 12 segments (6 cubic approximations) seems to produce best
+* result for lanczos3.linear, which was the combination that
+* showed the most errors.  This makes sense as the lanczos3
+* filter is 6 wide.
+*/
+#define N_SEGMENTS 12
 #define SAMPLE(a1, a2) \
(filters[kernel1].func ((a1)) * filters[kernel2].func ((a2) * scale))

double s = 0.0;
-   double h = width / (double)N_SEGMENTS;
+   double h = width / N_SEGMENTS;
int i;
 
s = SAMPLE (x1, x2);
@@ -204,11 +210,14 @@ integral (pixman_kernel_t kernel1, double x1,
{
double a1 = x1 + h * i;
double a2 = x2 + h * i;
+   s += 4 * SAMPLE (a1, a2);
+   }
 
+   for (i = 2; i < N_SEGMENTS; i += 2)
+   {
+   double a1 = x1 + h * i;
+   double a2 = x2 + h * i;
s += 2 * SAMPLE (a1, a2);
-
-   if (i >= 2 && i < N_SEGMENTS - 1)
-   s += 4 * SAMPLE (a1, a2);
}
 
s += SAMPLE (x1 + width, x2 + width);
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 01/14] pixman-fast-path.c: Pick NEAREST affine fast paths before BILINEAR ones

2016-04-11 Thread Søren Sandmann Pedersen
When a BILINEAR filter is reduced to NEAREST, it is possible for both
types of fast paths to run; in this case, the NEAREST ones should be
preferred as that is the simpler filter.

Signed-off-by: Soren Sandmann 
---
 pixman/pixman-fast-path.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 53d4a1f..b4daa26 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -3258,9 +3258,9 @@ static const pixman_iter_info_t fast_iters[] =
 },
 
 #define AFFINE_FAST_PATHS(name, format, repeat)
\
-SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
+NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
 BILINEAR_AFFINE_FAST_PATH(name, format, repeat)\
-NEAREST_AFFINE_FAST_PATH(name, format, repeat)
+SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
 
 AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
 AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 08/14] pixman-filter: reduce amount of malloc/free/memcpy to generate filter

2016-04-11 Thread Søren Sandmann Pedersen
From: Bill Spitzak 

Rearranged so that the entire block of memory for the filter pair
is allocated first, and then filled in. Previous version allocated
and freed two temporary buffers for each filter and did an extra
memcpy.

v8: small refactor to remove the filter_width function

v10: Restored filter_width function but with arguments changed to
 match later patches

v11: Removed unused arg and pointer from filter_width function
 Whitespace fixes.

Signed-off-by: Bill Spitzak 
Reviewed-by: Oded Gabbay 
Acked-by: Søren Sandmann 
---
 pixman/pixman-filter.c | 56 +-
 1 file changed, 23 insertions(+), 33 deletions(-)

diff --git a/pixman/pixman-filter.c b/pixman/pixman-filter.c
index af46a43..dd5176d 100644
--- a/pixman/pixman-filter.c
+++ b/pixman/pixman-filter.c
@@ -217,25 +217,17 @@ integral (pixman_kernel_t kernel1, double x1,
 }
 }
 
-static pixman_fixed_t *
-create_1d_filter (int *width,
+static void
+create_1d_filter (int  width,
  pixman_kernel_t  reconstruct,
  pixman_kernel_t  sample,
  double   scale,
- int  n_phases)
+ int  n_phases,
+ pixman_fixed_t *p)
 {
-pixman_fixed_t *params, *p;
 double step;
-double size;
 int i;
 
-size = scale * filters[sample].width + filters[reconstruct].width;
-*width = ceil (size);
-
-p = params = malloc (*width * n_phases * sizeof (pixman_fixed_t));
-if (!params)
-return NULL;
-
 step = 1.0 / n_phases;
 
 for (i = 0; i < n_phases; ++i)
@@ -250,8 +242,8 @@ create_1d_filter (int *width,
 * and sample positions.
 */
 
-   x1 = ceil (frac - *width / 2.0 - 0.5);
-x2 = x1 + *width;
+   x1 = ceil (frac - width / 2.0 - 0.5);
+   x2 = x1 + width;
 
total = 0;
 for (x = x1; x < x2; ++x)
@@ -279,7 +271,7 @@ create_1d_filter (int *width,
 }
 
/* Normalize */
-   p -= *width;
+   p -= width;
 total = 1 / total;
 new_total = 0;
for (x = x1; x < x2; ++x)
@@ -291,10 +283,15 @@ create_1d_filter (int *width,
}
 
if (new_total != pixman_fixed_1)
-   *(p - *width / 2) += (pixman_fixed_1 - new_total);
+   *(p - width / 2) += (pixman_fixed_1 - new_total);
 }
+}
 
-return params;
+
+static int
+filter_width (pixman_kernel_t reconstruct, pixman_kernel_t sample, double size)
+{
+return ceil (filters[reconstruct].width + size * filters[sample].width);
 }
 
 #ifdef PIXMAN_GNUPLOT
@@ -424,38 +421,31 @@ pixman_filter_create_separable_convolution (int   
  *n_values,
 {
 double sx = fabs (pixman_fixed_to_double (scale_x));
 double sy = fabs (pixman_fixed_to_double (scale_y));
-pixman_fixed_t *horz = NULL, *vert = NULL, *params = NULL;
+pixman_fixed_t *params;
 int subsample_x, subsample_y;
 int width, height;
 
+width = filter_width (reconstruct_x, sample_x, sx);
 subsample_x = (1 << subsample_bits_x);
-subsample_y = (1 << subsample_bits_y);
 
-horz = create_1d_filter (, reconstruct_x, sample_x, sx, subsample_x);
-vert = create_1d_filter (, reconstruct_y, sample_y, sy, 
subsample_y);
+height = filter_width (reconstruct_y, sample_y, sy);
+subsample_y = (1 << subsample_bits_y);
 
-if (!horz || !vert)
-goto out;
-
 *n_values = 4 + width * subsample_x + height * subsample_y;
 
 params = malloc (*n_values * sizeof (pixman_fixed_t));
 if (!params)
-goto out;
+   return NULL;
 
 params[0] = pixman_int_to_fixed (width);
 params[1] = pixman_int_to_fixed (height);
 params[2] = pixman_int_to_fixed (subsample_bits_x);
 params[3] = pixman_int_to_fixed (subsample_bits_y);
 
-memcpy (params + 4, horz,
-   width * subsample_x * sizeof (pixman_fixed_t));
-memcpy (params + 4 + width * subsample_x, vert,
-   height * subsample_y * sizeof (pixman_fixed_t));
-
-out:
-free (horz);
-free (vert);
+create_1d_filter (width, reconstruct_x, sample_x, sx, subsample_x,
+ params + 4);
+create_1d_filter (height, reconstruct_y, sample_y, sy, subsample_y,
+ params + 4 + width * subsample_x);
 
 #ifdef PIXMAN_GNUPLOT
 gnuplot_filter(width, subsample_x, params + 4);
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 3/3] More general BILINEAR=>NEAREST reduction

2016-03-19 Thread Søren Sandmann Pedersen
Generalize and simplify the code that reduces BILINEAR to NEAREST so
that all the reduction happens for all affine transformations where
t00..t12 are integers and (t00 + t01) and (t10 + t11) are both
odd. This is a sufficient condition for the resulting transformed
coordinates to be exactly at the center of a pixel so that BILINEAR
becomes identical to NEAREST.

V2: Address some comments by Bill Spitzak

Signed-off-by: Søren Sandmann 
---
 pixman/pixman-image.c | 66 +--
 1 file changed, 38 insertions(+), 28 deletions(-)

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 1ff1a49..681864e 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -335,37 +335,47 @@ compute_image_info (pixman_image_t *image)
{
flags |= FAST_PATH_NEAREST_FILTER;
}
-   else if (
-   /* affine and integer translation components in matrix ... */
-   ((flags & FAST_PATH_AFFINE_TRANSFORM) &&
-!pixman_fixed_frac (image->common.transform->matrix[0][2] |
-image->common.transform->matrix[1][2])) &&
-   (
-   /* ... combined with a simple rotation */
-   (flags & (FAST_PATH_ROTATE_90_TRANSFORM |
- FAST_PATH_ROTATE_180_TRANSFORM |
- FAST_PATH_ROTATE_270_TRANSFORM)) ||
-   /* ... or combined with a simple non-rotated translation */
-   (image->common.transform->matrix[0][0] == pixman_fixed_1 &&
-image->common.transform->matrix[1][1] == pixman_fixed_1 &&
-image->common.transform->matrix[0][1] == 0 &&
-image->common.transform->matrix[1][0] == 0)
-   )
-   )
+   else if (flags & FAST_PATH_AFFINE_TRANSFORM)
{
-   /* FIXME: there are some affine-test failures, showing that
-* handling of BILINEAR and NEAREST filter is not quite
-* equivalent when getting close to 32K for the translation
-* components of the matrix. That's likely some bug, but for
-* now just skip BILINEAR->NEAREST optimization in this case.
+   /* Suppose the transform is
+*
+*[ t00, t01, t02 ]
+*[ t10, t11, t12 ]
+*[   0,   0,   1 ]
+*
+* and the destination coordinates are (n + 0.5, m + 0.5). Then
+* the transformed x coordinate is:
+*
+* tx = t00 * (n + 0.5) + t01 * (m + 0.5) + t02
+*= t00 * n + t01 * m + t02 + (t00 + t01) * 0.5
+*
+* which implies that if t00, t01 and t02 are all integers
+* and (t00 + t01) is odd, then tx will be an integer plus 0.5,
+* which means a BILINEAR filter will reduce to NEAREST. The same
+* applies in the y direction
 */
-   pixman_fixed_t magic_limit = pixman_int_to_fixed (3);
-   if (image->common.transform->matrix[0][2] <= magic_limit  &&
-   image->common.transform->matrix[1][2] <= magic_limit  &&
-   image->common.transform->matrix[0][2] >= -magic_limit &&
-   image->common.transform->matrix[1][2] >= -magic_limit)
+   pixman_fixed_t (*t)[3] = image->common.transform->matrix;
+
+   if ((pixman_fixed_frac (
+t[0][0] | t[0][1] | t[0][2] |
+t[1][0] | t[1][1] | t[1][2]) == 0) &&
+   (pixman_fixed_to_int (
+   (t[0][0] + t[0][1]) & (t[1][0] + t[1][1])) % 2) == 1)
{
-   flags |= FAST_PATH_NEAREST_FILTER;
+   /* FIXME: there are some affine-test failures, showing that
+* handling of BILINEAR and NEAREST filter is not quite
+* equivalent when getting close to 32K for the translation
+* components of the matrix. That's likely some bug, but for
+* now just skip BILINEAR->NEAREST optimization in this case.
+*/
+   pixman_fixed_t magic_limit = pixman_int_to_fixed (3);
+   if (image->common.transform->matrix[0][2] <= magic_limit  &&
+   image->common.transform->matrix[1][2] <= magic_limit  &&
+   image->common.transform->matrix[0][2] >= -magic_limit &&
+   image->common.transform->matrix[1][2] >= -magic_limit)
+   {
+   flags |= FAST_PATH_NEAREST_FILTER;
+   }
}
}
break;
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 1/3] pixman-fast-path.c: Pick NEAREST affine fast paths before BILINEAR ones

2016-03-19 Thread Søren Sandmann Pedersen
When a BILINEAR filter is reduced to NEAREST, it is possible for both
types of fast paths to run; in this case, the NEAREST ones should be
preferred as that is the simpler filter.

Signed-off-by: Soren Sandmann 
---
 pixman/pixman-fast-path.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 53d4a1f..b4daa26 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -3258,9 +3258,9 @@ static const pixman_iter_info_t fast_iters[] =
 },
 
 #define AFFINE_FAST_PATHS(name, format, repeat)
\
-SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)   \
+NEAREST_AFFINE_FAST_PATH(name, format, repeat) \
 BILINEAR_AFFINE_FAST_PATH(name, format, repeat)\
-NEAREST_AFFINE_FAST_PATH(name, format, repeat)
+SEPARABLE_CONVOLUTION_AFFINE_FAST_PATH(name, format, repeat)
 
 AFFINE_FAST_PATHS (pad_a8r8g8b8, a8r8g8b8, PAD)
 AFFINE_FAST_PATHS (none_a8r8g8b8, a8r8g8b8, NONE)
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 2/3] Add new test of filter reduction from BILINEAR to NEAREST

2016-03-19 Thread Søren Sandmann Pedersen
This new test tests a bunch of bilinear downscalings, where many have
a transformation such that the BILINEAR filter can be reduced to
NEAREST (and many don't).

A CRC32 is computed for all the resulting images and compared to a
known-good value for both 4-bit and 7-bit interpolation.

V2: Remove leftover comment, some minor formatting fixes, use a
timestamp as the PRNG seed.

Signed-off-by: Søren Sandmann 
---
 test/Makefile.sources|   1 +
 test/filter-reduction-test.c | 112 +++
 2 files changed, 113 insertions(+)
 create mode 100644 test/filter-reduction-test.c

diff --git a/test/Makefile.sources b/test/Makefile.sources
index 5d55e67..0a56231 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -21,6 +21,7 @@ TESTPROGRAMS =  \
gradient-crash-test   \
pixel-test\
matrix-test   \
+   filter-reduction-test \
composite-traps-test  \
region-contains-test  \
glyph-test\
diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c
new file mode 100644
index 000..705fa4b
--- /dev/null
+++ b/test/filter-reduction-test.c
@@ -0,0 +1,112 @@
+#include 
+#include 
+#include "utils.h"
+
+static const pixman_fixed_t entries[] =
+{
+pixman_double_to_fixed (-1.0),
+pixman_double_to_fixed (-0.5),
+pixman_double_to_fixed (-1/3.0),
+pixman_double_to_fixed (0.0),
+pixman_double_to_fixed (0.5),
+pixman_double_to_fixed (1.0),
+pixman_double_to_fixed (1.5),
+pixman_double_to_fixed (2.0),
+pixman_double_to_fixed (3.0),
+};
+
+#define SIZE 12
+
+static uint32_t
+test_scale (const pixman_transform_t *xform, uint32_t crc)
+{
+uint32_t *srcbuf, *dstbuf;
+pixman_image_t *src, *dest;
+
+srcbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (srcbuf, SIZE * SIZE * 4, 0);
+src = pixman_image_create_bits (
+   PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE * 4);
+
+dstbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (dstbuf, SIZE * SIZE * 4, 0);
+dest = pixman_image_create_bits (
+   PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE * 4);
+
+pixman_image_set_transform (src, xform);
+pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL);
+pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+image_endian_swap (src);
+image_endian_swap (dest);
+
+pixman_image_composite (PIXMAN_OP_SRC,
+   src, NULL, dest,
+   0, 0, 0, 0, 0, 0,
+   SIZE, SIZE);
+
+crc = compute_crc32_for_image (crc, dest);
+
+pixman_image_unref (src);
+pixman_image_unref (dest);
+
+free (srcbuf);
+free (dstbuf);
+
+return crc;
+}
+
+#if BILINEAR_INTERPOLATION_BITS == 7
+#define CHECKSUM 0x02169677
+#elif BILINEAR_INTERPOLATION_BITS == 4
+#define CHECKSUM 0xE44B29AC
+#else
+#define CHECKSUM 0x
+#endif
+
+int
+main (int argc, const char *argv[])
+{
+const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries);
+const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5;
+uint32_t crc = 0;
+
+prng_srand (0x56EA1DBD);
+
+for (t0 = entries; t0 < end; ++t0)
+{
+   for (t1 = entries; t1 < end; ++t1)
+   {
+   for (t2 = entries; t2 < end; ++t2)
+   {
+   for (t3 = entries; t3 < end; ++t3)
+   {
+   for (t4 = entries; t4 < end; ++t4)
+   {
+   for (t5 = entries; t5 < end; ++t5)
+   {
+   pixman_transform_t xform = {
+   { { *t0, *t1, *t2 },
+ { *t3, *t4, *t5 },
+ { 0, 0, pixman_fixed_1 } }
+   };
+
+   crc = test_scale (, crc);
+   }
+   }
+   }
+   }
+   }
+}
+
+if (crc != CHECKSUM)
+{
+   printf ("filter-reduction-test failed! (checksum=0x%08X, expected 
0x%08X)\n", crc, CHECKSUM);
+   return 1;
+}
+else
+{
+   printf ("filter-reduction-test passed (checksum=0x%08X)\n", crc);
+   return 0;
+}
+}
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 0/3] More general reduction of BILINEAR to NEAREST

2016-03-18 Thread Søren Sandmann Pedersen
This series addresses the comments by Bill and also changes
pixman-fast-path.c so that it picks NEAREST fast paths before
BILINEAR. (I noticed this because the new filter-reduction-test.c
failed to detect a bug that I deliberately introduced).

Søren

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/2] Add new test of filter reduction from BILINEAR to NEAREST

2016-03-15 Thread Søren Sandmann Pedersen
This new test tests a bunch of bilinear downscalings, where many have
a transformation such that the BILINEAR filter can be reduced to
NEAREST (and many don't).

A CRC32 is computed for all the resulting images and compared to a
known-good value for both 4-bit and 7-bit interpolation.

Signed-off-by: Søren Sandmann 
---
 test/Makefile.sources|   1 +
 test/filter-reduction-test.c | 119 +++
 2 files changed, 120 insertions(+)
 create mode 100644 test/filter-reduction-test.c

diff --git a/test/Makefile.sources b/test/Makefile.sources
index 5d55e67..0a56231 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -21,6 +21,7 @@ TESTPROGRAMS =  \
gradient-crash-test   \
pixel-test\
matrix-test   \
+   filter-reduction-test \
composite-traps-test  \
region-contains-test  \
glyph-test\
diff --git a/test/filter-reduction-test.c b/test/filter-reduction-test.c
new file mode 100644
index 000..72b3142
--- /dev/null
+++ b/test/filter-reduction-test.c
@@ -0,0 +1,119 @@
+/*
+ * Test program, which can detect some problems with nearest neighbour
+ * and bilinear scaling in pixman. Testing is done by running lots
+ * of random SRC and OVER compositing operations a8r8g8b8, x8a8r8g8b8
+ * and r5g6b5 color formats.
+ *
+ * Script 'fuzzer-find-diff.pl' can be used to narrow down the problem in
+ * the case of test failure.
+ */
+#include 
+#include 
+#include "utils.h"
+
+static const pixman_fixed_t entries[] =
+{
+pixman_double_to_fixed (-1.0),
+pixman_double_to_fixed (-0.5),
+pixman_double_to_fixed (-1/3.0),
+pixman_double_to_fixed (0.0),
+pixman_double_to_fixed (0.5),
+pixman_double_to_fixed (1.0),
+pixman_double_to_fixed (1.5),
+pixman_double_to_fixed (2.0),
+pixman_double_to_fixed (3.0),
+};
+
+#define SIZE 12
+
+static uint32_t
+test_scale (const pixman_transform_t *xform, uint32_t crc)
+{
+uint32_t *srcbuf, *dstbuf;
+pixman_image_t *src, *dest;
+
+srcbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (srcbuf, SIZE * SIZE * 4, 0);
+src = pixman_image_create_bits (PIXMAN_a8r8g8b8, SIZE, SIZE, srcbuf, SIZE 
* 4);
+
+dstbuf = malloc (SIZE * SIZE * 4);
+prng_randmemset (dstbuf, SIZE * SIZE * 4, 0);
+dest = pixman_image_create_bits (PIXMAN_a8r8g8b8, SIZE, SIZE, dstbuf, SIZE 
* 4);
+
+pixman_image_set_transform (src, xform);
+pixman_image_set_repeat (src, PIXMAN_REPEAT_NORMAL);
+pixman_image_set_filter (src, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+image_endian_swap (src);
+image_endian_swap (dest);
+
+pixman_image_composite (PIXMAN_OP_SRC,
+   src, NULL, dest,
+   0, 0, 0, 0, 0, 0,
+   SIZE, SIZE);
+
+crc = compute_crc32_for_image (crc, dest);
+
+pixman_image_unref (src);
+pixman_image_unref (dest);
+
+free (srcbuf);
+free (dstbuf);
+
+return crc;
+}
+
+#if BILINEAR_INTERPOLATION_BITS == 7
+#define CHECKSUM 0x40BDEAC4
+#elif BILINEAR_INTERPOLATION_BITS == 4
+#define CHECKSUM 0xF8245E72
+#else
+#define CHECKSUM 0x
+#endif
+
+int
+main (int argc, const char *argv[])
+{
+const pixman_fixed_t *end = entries + ARRAY_LENGTH (entries);
+const pixman_fixed_t *t0, *t1, *t2, *t3, *t4, *t5;
+uint32_t crc = 0;
+
+prng_srand (0xcafebabe);
+
+for (t0 = entries; t0 < end; ++t0)
+{
+   for (t1 = entries; t1 < end; ++t1)
+   {
+   for (t2 = entries; t2 < end; ++t2)
+   {
+   for (t3 = entries; t3 < end; ++t3)
+   {
+   for (t4 = entries; t4 < end; ++t4)
+   {
+   for (t5 = entries; t5 < end; ++t5)
+   {
+   pixman_transform_t xform = {
+   { { *t0, *t1, *t2 },
+ { *t3, *t4, *t5 },
+ { 0, 0, pixman_fixed_1 } }
+   };
+
+   crc = test_scale (, crc);
+   }
+   }
+   }
+   }
+   }
+}
+
+if (crc != CHECKSUM)
+{
+   printf ("filter-reduction-test failed! (checksum=%08X, expected 
%08X)\n", crc, CHECKSUM);
+   return 1;
+}
+else
+{
+   printf ("filter-reduction-test passed (checksum=%08X)\n", crc);
+   return 0;
+}
+}
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] More general BILINEAR to NEAREST reduction

2016-03-15 Thread Søren Sandmann Pedersen
The following two patches generalize the reduction of BILINEAR to
NEAREST based on the formula mentioned here:

  https://lists.freedesktop.org/archives/pixman/2010-August/000321.html


Søren

___
Pixman mailing list
Pixman@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH] Fix comment about BILINEAR_INTERPOLATION_BITS to say 8 rather than = 8

2014-09-19 Thread Søren Sandmann Pedersen
Since a4c79d695d52c94647b1aff7 the constant
BILINEAR_INTERPOLATION_BITS must be strictly less than 8, so fix the
comment to say this, and also add a COMPILE_TIME_ASSERT in the
bilinear fetcher in pixman-fast-path.c
---
 pixman/pixman-fast-path.c | 2 ++
 pixman/pixman-private.h   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index c6e43de..a9b7d3a 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2343,6 +2343,8 @@ fast_fetch_bilinear_cover (pixman_iter_t *iter, const 
uint32_t *mask)
 int32_t dist_y;
 int i;
 
+COMPILE_TIME_ASSERT(BILINEAR_INTERPOLATION_BITS  8);
+
 fx = info-x;
 ux = iter-image-common.transform-matrix[0][0];
 
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index fdc966a..73108a0 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -7,7 +7,7 @@
  * The defines which are shared between C and assembly code
  */
 
-/* bilinear interpolation precision (must be = 8) */
+/* bilinear interpolation precision (must be  8) */
 #define BILINEAR_INTERPOLATION_BITS 7
 #define BILINEAR_INTERPOLATION_RANGE (1  BILINEAR_INTERPOLATION_BITS)
 
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 0/5] Some changes to Color Dodge and Color Burn

2013-10-05 Thread Søren Sandmann Pedersen
The overall goal of the following patches is to make it more obvious
how the blend mode code relates to the specifications. To that end,
the comment for each blend routine is updated with some math that
shows how we go from specification to a formula that can deal with
premultiplied alpha, and the code is updated to follow the math as
much as possible.

The blend routines for Color Dodge and Color Burn are rewritten to
match the derived formulas. In the case of Color Dodge, this in some
sense makes the code less correct because the new code can now
underflow the unsigned variables when the source pixel is
superluminescent, while the old code was careful to clamp to zero. In
the case of Color Burn, I believe the new code is a net improvement
since the old code could underflow whereas the new code can't.

The reason I don't care too much about the code being correct is that
the blend mode code in general has a number of issues that makes it
not work very well, especially when superluminescent pixels are
involved. For one, the use of unsigned variables is wrong; for
another, the macros use a non-saturating sum that can sometimes
overflow.

I have some plans to fix the blend modes for real, but for now the
only thing I want to accomplish with this patch set, is to make the
connection between code and formulas clear.


Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/5] pixman-combine32.c: Formatting fixes

2013-10-05 Thread Søren Sandmann Pedersen
Fix a bunch of spacing issues.
---
 pixman/pixman-combine32.c | 112 +++---
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index 3ac7576..be3cfdf 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -142,12 +142,12 @@ combine_mask (const uint32_t *src, const uint32_t *mask, 
int i)
 static void
 combine_clear (pixman_implementation_t *imp,
pixman_op_t  op,
-   uint32_t *dest,
-   const uint32_t *  src,
-   const uint32_t *  mask,
+   uint32_t *   dest,
+   const uint32_t * src,
+   const uint32_t * mask,
int  width)
 {
-memset (dest, 0, width * sizeof(uint32_t));
+memset (dest, 0, width * sizeof (uint32_t));
 }
 
 static void
@@ -155,7 +155,7 @@ combine_dst (pixman_implementation_t *imp,
 pixman_op_t  op,
 uint32_t *   dest,
 const uint32_t * src,
-const uint32_t *  mask,
+const uint32_t * mask,
 int  width)
 {
 return;
@@ -164,9 +164,9 @@ combine_dst (pixman_implementation_t *imp,
 static void
 combine_src_u (pixman_implementation_t *imp,
pixman_op_t  op,
-   uint32_t *dest,
-   const uint32_t *  src,
-   const uint32_t *  mask,
+   uint32_t *   dest,
+   const uint32_t * src,
+   const uint32_t * mask,
int  width)
 {
 int i;
@@ -189,9 +189,9 @@ combine_src_u (pixman_implementation_t *imp,
 static void
 combine_over_u (pixman_implementation_t *imp,
 pixman_op_t  op,
-uint32_t *dest,
-const uint32_t *  src,
-const uint32_t *  mask,
+uint32_t *   dest,
+const uint32_t * src,
+const uint32_t * mask,
 int  width)
 {
 int i;
@@ -254,9 +254,9 @@ combine_over_u (pixman_implementation_t *imp,
 static void
 combine_over_reverse_u (pixman_implementation_t *imp,
 pixman_op_t  op,
-uint32_t *dest,
-const uint32_t *  src,
-const uint32_t *  mask,
+uint32_t *   dest,
+const uint32_t * src,
+const uint32_t * mask,
 int  width)
 {
 int i;
@@ -274,9 +274,9 @@ combine_over_reverse_u (pixman_implementation_t *imp,
 static void
 combine_in_u (pixman_implementation_t *imp,
   pixman_op_t  op,
-  uint32_t *dest,
-  const uint32_t *  src,
-  const uint32_t *  mask,
+  uint32_t *   dest,
+  const uint32_t * src,
+  const uint32_t * mask,
   int  width)
 {
 int i;
@@ -293,9 +293,9 @@ combine_in_u (pixman_implementation_t *imp,
 static void
 combine_in_reverse_u (pixman_implementation_t *imp,
   pixman_op_t  op,
-  uint32_t *dest,
-  const uint32_t *  src,
-  const uint32_t *  mask,
+  uint32_t *   dest,
+  const uint32_t * src,
+  const uint32_t * mask,
   int  width)
 {
 int i;
@@ -313,9 +313,9 @@ combine_in_reverse_u (pixman_implementation_t *imp,
 static void
 combine_out_u (pixman_implementation_t *imp,
pixman_op_t  op,
-   uint32_t *dest,
-   const uint32_t *  src,
-   const uint32_t *  mask,
+   uint32_t *   dest,
+   const uint32_t * src,
+   const uint32_t * mask,
int  width)
 {
 int i;
@@ -332,9 +332,9 @@ combine_out_u (pixman_implementation_t *imp,
 static void
 combine_out_reverse_u (pixman_implementation_t *imp,
pixman_op_t  op,
-   uint32_t *dest,
-   const uint32_t *  src,
-   const uint32_t *  mask,
+   uint32_t *   dest,
+   const 

[Pixman] [PATCH 2/5] pixman-combine32: Improve documentation for blend mode operators

2013-10-05 Thread Søren Sandmann Pedersen
This commit overhauls the comments in pixman-comine32.c regarding
blend modes:

- Add a link to the PDF supplement that clarifies the specification of
  ColorBurn and ColorDodge

- Clarify how the formulas for premultiplied colors are derived form
  the ones in the PDF specifications

- Write out the derivation of the formulas in each blend routine
---
 pixman/pixman-combine32.c | 330 --
 1 file changed, 204 insertions(+), 126 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index be3cfdf..ae9eed4 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -463,32 +463,59 @@ combine_saturate_u (pixman_implementation_t *imp,
 }
 }
 
+
 /*
  * PDF blend modes:
+ *
  * The following blend modes have been taken from the PDF ISO 32000
  * specification, which at this point in time is available from
- * http://www.adobe.com/devnet/acrobat/pdfs/PDF32000_2008.pdf
- * The relevant chapters are 11.3.5 and 11.3.6.
+ *
+ * http://www.adobe.com/devnet/pdf/pdf_reference.html
+ *
+ * The specific documents of interest are the PDF spec itself:
+ *
+ * 
http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/PDF32000_2008.pdf
+ *
+ * chapters 11.3.5 and 11.3.6 and a later supplement for Adobe Acrobat
+ * 9.1 and Reader 9.1:
+ *
+ * 
http://wwwimages.adobe.com/www.adobe.com/content/dam/Adobe/en/devnet/pdf/pdfs/adobe_supplement_iso32000_1.pdf
+ *
+ * that clarifies the specifications for blend modes ColorDodge and
+ * ColorBurn.
+ *
  * The formula for computing the final pixel color given in 11.3.6 is:
- * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
- * with B() being the blend function.
- * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
- *
- * These blend modes should match the SVG filter draft specification, as
- * it has been designed to mirror ISO 32000. Note that at the current point
- * no released draft exists that shows this, as the formulas have not been
- * updated yet after the release of ISO 32000.
- *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
- * argument. Note that this implementation operates on premultiplied colors,
- * while the PDF specification does not. Therefore the code uses the formula
- * Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
+ *
+ * αr × Cr = (1 – αs) × αb × Cb + (1 – αb) × αs × Cs + αb × αs × B(Cb, Cs)
+ *
+ * with B() is the blend function. When B(Cb, Cs) = Cs, this formula
+ * reduces to the regular OVER operator.
+ *
+ * Cs and Cb are not premultiplied, so in our implementation we instead
+ * use:
+ *
+ * cr = (1 – αs) × cb  +  (1 – αb) × cs  +  αb × αs × B (cb/αb, cs/αs)
+ *
+ * where cr, cs, and cb are premultiplied colors, and where the
+ *
+ * αb × αs × B(cb/αb, cs/αs)
+ *
+ * part is first arithmetically simplified under the assumption that αb
+ * and αs are not 0, and then updated to produce a meaningful result when
+ * they are.
+ *
+ * For all the blend mode operators, the alpha channel is given by
+ *
+ * αr = αs + αb + αb × αs
  */
 
 /*
  * Multiply
- * B(Dca, ad, Sca, as) = Dca.Sca
+ *
+ *  ad * as * B(d / ad, s / as)
+ *= ad * as * d/ad * s/as
+ *= d * s
+ *
  */
 static void
 combine_multiply_u (pixman_implementation_t *imp,
@@ -608,7 +635,10 @@ combine_multiply_ca (pixman_implementation_t *imp,
 
 /*
  * Screen
- * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
+ *
+ *  ad * as * B(d/ad, s/as)
+ *= ad * as * (d/ad + s/as - s/as * d/ad)
+ *= ad * s + as * d - s * d
  */
 static inline uint32_t
 blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
@@ -620,11 +650,25 @@ PDF_SEPARABLE_BLEND_MODE (screen)
 
 /*
  * Overlay
- * B(Dca, Da, Sca, Sa) =
- *   if 2.Dca  Da
- * 2.Sca.Dca
- *   otherwise
- * Sa.Da - 2.(Da - Dca).(Sa - Sca)
+ *
+ * ad * as * B(d/ad, s/as)
+ *   = ad * as * Hardlight (s, d)
+ *   = if (d / ad  0.5)
+ * as * ad * Multiply (s/as, 2 * d/ad)
+ * else
+ * as * ad * Screen (s/as, 2 * d / ad - 1)
+ *   = if (d  0.5 * ad)
+ * as * ad * s/as * 2 * d /ad
+ * else
+ * as * ad * (s/as + 2 * d / ad - 1 - s / as * (2 * d / ad - 1))
+ *   = if (2 * d  ad)
+ * 2 * s * d
+ * else
+ * ad * s + 2 * as * d - as * ad - ad * s * (2 * d / ad - 1)
+ *   = if (2 * d  ad)
+ * 2 * s * d
+ * else
+ * as * ad - 2 * (ad - d) * (as - s)
  */
 static inline uint32_t
 blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
@@ -642,7 +686,10 @@ PDF_SEPARABLE_BLEND_MODE (overlay)
 
 /*
  * Darken
- * B(Dca, Da, Sca, Sa) = min (Sca.Da, Dca.Sa)
+ *
+ * ad * as * B(d/ad, s/as)
+ *   = ad * as * MIN(d/ad, s/as)
+ *   = MIN (as * d, ad * s)
  */
 static inline uint32_t
 blend_darken (uint32_t dca, uint32_t da, uint32_t sca, 

[Pixman] [PATCH 4/5] Make ColorDodge code follow the math closer

2013-10-05 Thread Søren Sandmann Pedersen
Change blend_color_dodge() to follow the math in the comment more
closely.

Note, the new code here is in some sense worse than the old code
because it can now underflow the unsigned variables when the source is
superluminescent and (as - s) is therefore negative. The old code was
careful to clamp to 0.

But for superluminescent variables we really need the ability for the
blend function to become negative, and so the solution the underflow
problem is to just use signed variables. The use of unsigned variables
is a general problem in all of the blend mode code that will have to
be solved later.

The CRC32 values in thread-test and blitters-test are updated to
account for the changes in output.
---
 pixman/pixman-combine32.c | 15 +++
 test/blitters-test.c  |  2 +-
 test/thread-test.c|  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index e0a6a98..af059eb 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -742,15 +742,14 @@ PDF_SEPARABLE_BLEND_MODE (lighten)
 static inline uint32_t
 blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-if (s = as)
-{
-   return d == 0 ? 0 : DIV_ONE_UN8 (as * ad);
-}
+if (d == 0)
+return 0;
+else if (as * d = ad * (as - s))
+   return DIV_ONE_UN8 (as * ad);
+else if (as - s == 0)
+return DIV_ONE_UN8 (as * ad);
 else
-{
-   uint32_t r = d * as / (as - s);
-   return DIV_ONE_UN8 (as * MIN (r, ad));
-}
+return DIV_ONE_UN8 (as * ((d * as) / ((as - s;
 }
 
 PDF_SEPARABLE_BLEND_MODE (color_dodge)
diff --git a/test/blitters-test.c b/test/blitters-test.c
index 920cbbb..396b5b5 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -394,6 +394,6 @@ main (int argc, const char *argv[])
 }
 
 return fuzzer_test_main(blitters, 200,
-   0xAC8FDA98,
+   0x6A783AD5,
test_composite, argc, argv);
 }
diff --git a/test/thread-test.c b/test/thread-test.c
index f24c31d..a0c7819 100644
--- a/test/thread-test.c
+++ b/test/thread-test.c
@@ -181,7 +181,7 @@ main (void)
 
 crc32 = compute_crc32 (0, crc32s, sizeof crc32s);
 
-#define EXPECTED 0xFD497D8D
+#define EXPECTED 0x12F4B484
 
 if (crc32 != EXPECTED)
 {
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 3/5] pixman-combine32: Rename a number of variable from sa/sca to as/s

2013-10-05 Thread Søren Sandmann Pedersen
There are no semantic changes, just variables renames. The motivation
for these renames is so that the names are shorter and better match
the one used in the comments.
---
 pixman/pixman-combine32.c | 199 +++---
 1 file changed, 99 insertions(+), 100 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index ae9eed4..e0a6a98 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -641,9 +641,9 @@ combine_multiply_ca (pixman_implementation_t *imp,
  *= ad * s + as * d - s * d
  */
 static inline uint32_t
-blend_screen (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_screen (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-return DIV_ONE_UN8 (sca * da + dca * sa - sca * dca);
+return DIV_ONE_UN8 (s * ad + d * as - s * d);
 }
 
 PDF_SEPARABLE_BLEND_MODE (screen)
@@ -671,15 +671,16 @@ PDF_SEPARABLE_BLEND_MODE (screen)
  * as * ad - 2 * (ad - d) * (as - s)
  */
 static inline uint32_t
-blend_overlay (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_overlay (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-uint32_t rca;
+uint32_t r;
 
-if (2 * dca  da)
-   rca = 2 * sca * dca;
+if (2 * d  ad)
+   r = 2 * s * d;
 else
-   rca = sa * da - 2 * (da - dca) * (sa - sca);
-return DIV_ONE_UN8 (rca);
+   r = as * ad - 2 * (ad - d) * (as - s);
+
+return DIV_ONE_UN8 (r);
 }
 
 PDF_SEPARABLE_BLEND_MODE (overlay)
@@ -692,12 +693,11 @@ PDF_SEPARABLE_BLEND_MODE (overlay)
  *   = MIN (as * d, ad * s)
  */
 static inline uint32_t
-blend_darken (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_darken (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-uint32_t s, d;
+s = ad * s;
+d = as * d;
 
-s = sca * da;
-d = dca * sa;
 return DIV_ONE_UN8 (s  d ? d : s);
 }
 
@@ -711,12 +711,11 @@ PDF_SEPARABLE_BLEND_MODE (darken)
  *   = MAX (as * d, ad * s)
  */
 static inline uint32_t
-blend_lighten (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_lighten (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-uint32_t s, d;
-
-s = sca * da;
-d = dca * sa;
+s = ad * s;
+d = as * d;
+
 return DIV_ONE_UN8 (s  d ? s : d);
 }
 
@@ -741,16 +740,16 @@ PDF_SEPARABLE_BLEND_MODE (lighten)
  *
  */
 static inline uint32_t
-blend_color_dodge (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_color_dodge (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-if (sca = sa)
+if (s = as)
 {
-   return dca == 0 ? 0 : DIV_ONE_UN8 (sa * da);
+   return d == 0 ? 0 : DIV_ONE_UN8 (as * ad);
 }
 else
 {
-   uint32_t rca = dca * sa / (sa - sca);
-   return DIV_ONE_UN8 (sa * MIN (rca, da));
+   uint32_t r = d * as / (as - s);
+   return DIV_ONE_UN8 (as * MIN (r, ad));
 }
 }
 
@@ -777,16 +776,16 @@ PDF_SEPARABLE_BLEND_MODE (color_dodge)
  * ad * as  - as * as * (ad - d) / s
  */
 static inline uint32_t
-blend_color_burn (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-if (sca == 0)
+if (s == 0)
 {
-   return dca  da ? 0 : DIV_ONE_UN8 (sa * da);
+   return d  ad ? 0 : DIV_ONE_UN8 (as * ad);
 }
 else
 {
-   uint32_t rca = (da - dca) * sa / sca;
-   return DIV_ONE_UN8 (sa * (MAX (rca, da) - rca));
+   uint32_t r = (ad - d) * as / s;
+   return DIV_ONE_UN8 (as * (MAX (r, ad) - r));
 }
 }
 
@@ -810,12 +809,12 @@ PDF_SEPARABLE_BLEND_MODE (color_burn)
  * as * ad - 2 * (ad - d) * (as - s)
  */
 static inline uint32_t
-blend_hard_light (uint32_t dca, uint32_t da, uint32_t sca, uint32_t sa)
+blend_hard_light (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-if (2 * sca  sa)
-   return DIV_ONE_UN8 (2 * sca * dca);
+if (2 * s  as)
+   return DIV_ONE_UN8 (2 * s * d);
 else
-   return DIV_ONE_UN8 (sa * da - 2 * (da - dca) * (sa - sca));
+   return DIV_ONE_UN8 (as * ad - 2 * (ad - d) * (as - s));
 }
 
 PDF_SEPARABLE_BLEND_MODE (hard_light)
@@ -838,38 +837,38 @@ PDF_SEPARABLE_BLEND_MODE (hard_light)
  * d * as + (sqrt (d * ad) - d) * (2 * s - as);
  */
 static inline uint32_t
-blend_soft_light (uint32_t dca_org,
- uint32_t da_org,
- uint32_t sca_org,
- uint32_t sa_org)
-{
-double dca = dca_org * (1.0 / MASK);
-double da = da_org * (1.0 / MASK);
-double sca = sca_org * (1.0 / MASK);
-double sa = sa_org * (1.0 / MASK);
-double rca;
-
-if (2 * sca  sa)
+blend_soft_light (uint32_t d_org,
+ uint32_t ad_org,
+ uint32_t s_org,
+ uint32_t as_org)
+{
+double d = d_org * (1.0 / MASK);
+double ad = ad_org * (1.0 / MASK);
+double s = s_org * (1.0 / MASK);
+double as = as_org * (1.0 / MASK);
+double r;
+
+if (2 * s  as)
 {
-   if (da == 

[Pixman] [PATCH 5/5] Make code for color burn follow the math more closely

2013-10-05 Thread Søren Sandmann Pedersen
For superluminescent destinations, the old code could underflow in

uint32_t r = (ad - d) * as / s;

when (ad - d) was negative. The new code avoids this problem (and
therefore causes changes in the checksums of thread-test and
blitters-test), but it is likely still buggy due to the use of
unsigned variables and other issues in the blend mode code.
---
 pixman/pixman-combine32.c | 15 +++
 test/blitters-test.c  |  2 +-
 test/thread-test.c|  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/pixman/pixman-combine32.c b/pixman/pixman-combine32.c
index af059eb..54946cc 100644
--- a/pixman/pixman-combine32.c
+++ b/pixman/pixman-combine32.c
@@ -777,15 +777,14 @@ PDF_SEPARABLE_BLEND_MODE (color_dodge)
 static inline uint32_t
 blend_color_burn (uint32_t d, uint32_t ad, uint32_t s, uint32_t as)
 {
-if (s == 0)
-{
-   return d  ad ? 0 : DIV_ONE_UN8 (as * ad);
-}
+if (d = ad)
+   return DIV_ONE_UN8 (ad * as);
+else if (as * ad - as * d = ad * s)
+   return 0;
+else if (s == 0)
+   return 0;
 else
-{
-   uint32_t r = (ad - d) * as / s;
-   return DIV_ONE_UN8 (as * (MAX (r, ad) - r));
-}
+   return DIV_ONE_UN8 (ad * as - (as * as * (ad - d)) / s);
 }
 
 PDF_SEPARABLE_BLEND_MODE (color_burn)
diff --git a/test/blitters-test.c b/test/blitters-test.c
index 396b5b5..ea03f47 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -394,6 +394,6 @@ main (int argc, const char *argv[])
 }
 
 return fuzzer_test_main(blitters, 200,
-   0x6A783AD5,
+   0xE0A07495,
test_composite, argc, argv);
 }
diff --git a/test/thread-test.c b/test/thread-test.c
index a0c7819..71b84f0 100644
--- a/test/thread-test.c
+++ b/test/thread-test.c
@@ -181,7 +181,7 @@ main (void)
 
 crc32 = compute_crc32 (0, crc32s, sizeof crc32s);
 
-#define EXPECTED 0x12F4B484
+#define EXPECTED 0xE299B18E
 
 if (crc32 != EXPECTED)
 {
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 2/3] Move generated affine fetchers into pixman-fast-path.c

2013-09-18 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

The generated fetchers for NEAREST, BILINEAR, and
SEPARABLE_CONVOLUTION filters are fast paths and so they belong in
pixman-fast-path.c
---
 pixman/pixman-bits-image.c |  530 
 pixman/pixman-fast-path.c  |  530 
 2 files changed, 530 insertions(+), 530 deletions(-)

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 35247f9..f9121a3 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -505,472 +505,6 @@ bits_image_fetch_general (pixman_iter_t  *iter,
 return buffer;
 }
 
-typedef uint32_t (* convert_pixel_t) (const uint8_t *row, int x);
-
-static force_inline void
-bits_image_fetch_separable_convolution_affine (pixman_image_t * image,
-  int  offset,
-  int  line,
-  int  width,
-  uint32_t *   buffer,
-  const uint32_t * mask,
-
-  convert_pixel_t  convert_pixel,
-  pixman_format_code_t format,
-  pixman_repeat_t  repeat_mode)
-{
-bits_image_t *bits = image-bits;
-pixman_fixed_t *params = image-common.filter_params;
-int cwidth = pixman_fixed_to_int (params[0]);
-int cheight = pixman_fixed_to_int (params[1]);
-int x_off = ((cwidth  16) - pixman_fixed_1)  1;
-int y_off = ((cheight  16) - pixman_fixed_1)  1;
-int x_phase_bits = pixman_fixed_to_int (params[2]);
-int y_phase_bits = pixman_fixed_to_int (params[3]);
-int x_phase_shift = 16 - x_phase_bits;
-int y_phase_shift = 16 - y_phase_bits;
-pixman_fixed_t vx, vy;
-pixman_fixed_t ux, uy;
-pixman_vector_t v;
-int k;
-
-/* reference point is the center of the pixel */
-v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
-v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
-v.vector[2] = pixman_fixed_1;
-
-if (!pixman_transform_point_3d (image-common.transform, v))
-   return;
-
-ux = image-common.transform-matrix[0][0];
-uy = image-common.transform-matrix[1][0];
-
-vx = v.vector[0];
-vy = v.vector[1];
-
-for (k = 0; k  width; ++k)
-{
-   pixman_fixed_t *y_params;
-   int satot, srtot, sgtot, sbtot;
-   pixman_fixed_t x, y;
-   int32_t x1, x2, y1, y2;
-   int32_t px, py;
-   int i, j;
-
-   if (mask  !mask[k])
-   goto next;
-
-   /* Round x and y to the middle of the closest phase before continuing. 
This
-* ensures that the convolution matrix is aligned right, since it was
-* positioned relative to a particular phase (and not relative to 
whatever
-* exact fraction we happen to get here).
-*/
-   x = ((vx  x_phase_shift)  x_phase_shift) + ((1  x_phase_shift)  
1);
-   y = ((vy  y_phase_shift)  y_phase_shift) + ((1  y_phase_shift)  
1);
-
-   px = (x  0x)  x_phase_shift;
-   py = (y  0x)  y_phase_shift;
-
-   x1 = pixman_fixed_to_int (x - pixman_fixed_e - x_off);
-   y1 = pixman_fixed_to_int (y - pixman_fixed_e - y_off);
-   x2 = x1 + cwidth;
-   y2 = y1 + cheight;
-
-   satot = srtot = sgtot = sbtot = 0;
-
-   y_params = params + 4 + (1  x_phase_bits) * cwidth + py * cheight;
-
-   for (i = y1; i  y2; ++i)
-   {
-   pixman_fixed_t fy = *y_params++;
-
-   if (fy)
-   {
-   pixman_fixed_t *x_params = params + 4 + px * cwidth;
-
-   for (j = x1; j  x2; ++j)
-   {
-   pixman_fixed_t fx = *x_params++;
-   int rx = j;
-   int ry = i;
-   
-   if (fx)
-   {
-   pixman_fixed_t f;
-   uint32_t pixel, mask;
-   uint8_t *row;
-
-   mask = PIXMAN_FORMAT_A (format)? 0 : 0xff00;
-
-   if (repeat_mode != PIXMAN_REPEAT_NONE)
-   {
-   repeat (repeat_mode, rx, bits-width);
-   repeat (repeat_mode, ry, bits-height);
-
-   row = (uint8_t *)bits-bits + bits-rowstride * 4 * 
ry;
-   pixel = convert_pixel (row, rx) | mask;
-   }
-   else
-   {
-   if (rx  0 || ry  0 || rx = bits-width || ry = 
bits-height)
-   {
-   pixel = 0;
-   }
-   else
-   {
-   row

[Pixman] [PATCH 1/3] {scaling, affine, composite-traps}-test: Use compute_crc32_for_image()

2013-09-12 Thread Søren Sandmann Pedersen
By using this function instead of compute_crc32() the alpha masking
code and the call to image_endian_swap() are not duplicated.
---
 test/affine-test.c  |   12 ++--
 test/composite-traps-test.c |   11 +--
 test/scaling-test.c |   12 ++--
 3 files changed, 5 insertions(+), 30 deletions(-)

diff --git a/test/affine-test.c b/test/affine-test.c
index c1649ed..3a37d7f 100644
--- a/test/affine-test.c
+++ b/test/affine-test.c
@@ -273,15 +273,8 @@ test_composite (int  testnum,
 pixman_image_composite (op, src_img, NULL, dst_img,
 src_x, src_y, 0, 0, dst_x, dst_y, w, h);
 
-if (dst_fmt == PIXMAN_x8r8g8b8)
-{
-   /* ignore unused part */
-   for (i = 0; i  dst_stride * dst_height / 4; i++)
-   dstbuf[i] = 0xFF;
-}
-
-image_endian_swap (dst_img);
-
+crc32 = compute_crc32_for_image (0, dst_img);
+
 if (verbose)
 {
int j;
@@ -298,7 +291,6 @@ test_composite (int  testnum,
 pixman_image_unref (src_img);
 pixman_image_unref (dst_img);
 
-crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
 free (srcbuf);
 free (dstbuf);
 
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
index 2983eae..34ae340 100644
--- a/test/composite-traps-test.c
+++ b/test/composite-traps-test.c
@@ -214,14 +214,7 @@ test_composite (int  testnum,
 pixman_composite_trapezoids (op, src_img, dst_img, mask_format,
 src_x, src_y, dst_x, dst_y, n_traps, traps);
 
-if (dst_format == PIXMAN_x8r8g8b8)
-{
-   /* ignore unused part */
-   for (i = 0; i  dst_stride * dst_height / 4; i++)
-   dst_bits[i] = 0xFF;
-}
-
-image_endian_swap (dst_img);
+crc32 = compute_crc32_for_image (0, dst_img);
 
 if (verbose)
 {
@@ -236,8 +229,6 @@ test_composite (int  testnum,
}
 }
 
-crc32 = compute_crc32 (0, dst_bits, dst_stride * dst_height);
-
 fence_free (dst_bits);
 
 pixman_image_unref (src_img);
diff --git a/test/scaling-test.c b/test/scaling-test.c
index b4142a7..04ecb63 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -340,15 +340,8 @@ test_composite (int  testnum,
pixman_image_composite (op, src_img, mask_img, dst_img,
 src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
 
-if (dst_fmt == PIXMAN_x8r8g8b8 || dst_fmt == PIXMAN_x8b8g8r8)
-{
-   /* ignore unused part */
-   for (i = 0; i  dst_stride * dst_height / 4; i++)
-   dstbuf[i] = 0xFF;
-}
-
-image_endian_swap (dst_img);
-
+crc32 = compute_crc32_for_image (0, dst_img);
+
 if (verbose)
 {
int j;
@@ -366,7 +359,6 @@ test_composite (int  testnum,
 pixman_image_unref (mask_img);
 pixman_image_unref (dst_img);
 
-crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
 free (srcbuf);
 free (maskbuf);
 free (dstbuf);
-- 
1.7.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 3/3] test: Test negative strides

2013-09-12 Thread Søren Sandmann Pedersen
Pixman supports negative strides, but up until now they haven't been
tested outside of stress-test. This commit adds testing of negative
strides to blitters-test, scaling-test, affine-test, rotate-test, and
composite-traps-test.
---
 test/affine-test.c  |   22 --
 test/blitters-test.c|   12 +++-
 test/composite-traps-test.c |   23 +++
 test/rotate-test.c  |   17 +
 test/scaling-test.c |   31 +--
 test/utils.c|6 ++
 6 files changed, 98 insertions(+), 13 deletions(-)

diff --git a/test/affine-test.c b/test/affine-test.c
index 03d296f..8e19023 100644
--- a/test/affine-test.c
+++ b/test/affine-test.c
@@ -80,6 +80,18 @@ test_composite (int  testnum,
 prng_randmemset (srcbuf, src_stride * src_height, 0);
 prng_randmemset (dstbuf, dst_stride * dst_height, 0);
 
+if (prng_rand_n (2) == 0)
+{
+   srcbuf += (src_stride / 4) * (src_height - 1);
+   src_stride = - src_stride;
+}
+
+if (prng_rand_n (2) == 0)
+{
+   dstbuf += (dst_stride / 4) * (dst_height - 1);
+   dst_stride = - dst_stride;
+}
+
 src_fmt = src_bpp == 4 ? (prng_rand_n (2) == 0 ?
   PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : 
PIXMAN_r5g6b5;
 
@@ -281,6 +293,12 @@ test_composite (int  testnum,
 pixman_image_unref (src_img);
 pixman_image_unref (dst_img);
 
+if (src_stride  0)
+   srcbuf += (src_stride / 4) * (src_height - 1);
+
+if (dst_stride  0)
+   dstbuf += (dst_stride / 4) * (dst_height - 1);
+
 free (srcbuf);
 free (dstbuf);
 
@@ -289,9 +307,9 @@ test_composite (int  testnum,
 }
 
 #if BILINEAR_INTERPOLATION_BITS == 7
-#define CHECKSUM 0xBC00B1DF
+#define CHECKSUM 0xBE724CFE
 #elif BILINEAR_INTERPOLATION_BITS == 4
-#define CHECKSUM 0xA227306B
+#define CHECKSUM 0x79BBE501
 #else
 #define CHECKSUM 0x
 #endif
diff --git a/test/blitters-test.c b/test/blitters-test.c
index 2120daf..af94835 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -57,6 +57,13 @@ create_random_image (pixman_format_code_t *allowed_formats,
prng_randmemset (buf, stride * height, RANDMEMSET_MORE_00_AND_FF);
 }
 
+/* test negative stride */
+if (prng_rand_n (4) == 0)
+{
+   buf += (stride / 4) * (height - 1);
+   stride = - stride;
+}
+
 img = pixman_image_create_bits (fmt, width, height, buf, stride);
 
 if (PIXMAN_FORMAT_TYPE (fmt) == PIXMAN_TYPE_COLOR)
@@ -89,6 +96,9 @@ free_random_image (uint32_t initcrc,
 if (fmt != PIXMAN_null)
crc32 = compute_crc32_for_image (initcrc, img);
 
+if (img-bits.rowstride  0)
+   data += img-bits.rowstride * (img-bits.height - 1);
+
 pixman_image_unref (img);
 free (data);
 
@@ -385,6 +395,6 @@ main (int argc, const char *argv[])
 }
 
 return fuzzer_test_main(blitters, 200,
-   0x0CF3283B,
+   0xAC8FDA98,
test_composite, argc, argv);
 }
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
index 44d5012..86a0355 100644
--- a/test/composite-traps-test.c
+++ b/test/composite-traps-test.c
@@ -97,19 +97,25 @@ test_composite (int  testnum,
int src_width = prng_rand_n (MAX_SRC_WIDTH) + 1;
int src_height = prng_rand_n (MAX_SRC_HEIGHT) + 1;
int src_stride = src_width * src_bpp + prng_rand_n (MAX_STRIDE) * 
src_bpp;
-   uint32_t *bits;
+   uint32_t *bits, *orig;
 
src_x = -(src_width / 4) + prng_rand_n (src_width * 3 / 2);
src_y = -(src_height / 4) + prng_rand_n (src_height * 3 / 2);
 
src_stride = (src_stride + 3)  ~3;

-   bits = (uint32_t *)make_random_bytes (src_stride * src_height);
+   orig = bits = (uint32_t *)make_random_bytes (src_stride * src_height);
 
+   if (prng_rand_n (2) == 0)
+   {
+   bits += (src_stride / 4) * (src_height - 1);
+   src_stride = - src_stride;
+   }
+   
src_img = pixman_image_create_bits (
src_format, src_width, src_height, bits, src_stride);
 
-   pixman_image_set_destroy_function (src_img, destroy_bits, bits);
+   pixman_image_set_destroy_function (src_img, destroy_bits, orig);
 
if (prng_rand_n (8) == 0)
{
@@ -153,6 +159,12 @@ test_composite (int  testnum,

dst_bits = (uint32_t *)make_random_bytes (dst_stride * dst_height);
 
+   if (prng_rand_n (2) == 0)
+   {
+   dst_bits += (dst_stride / 4) * (dst_height - 1);
+   dst_stride = - dst_stride;
+   }
+   
dst_x = -(dst_width / 4) + prng_rand_n (dst_width * 3 / 2);
dst_y = -(dst_height / 4) + prng_rand_n (dst_height * 3 / 2);

@@ -219,6 +231,9 @@ test_composite (int  testnum,
 if (verbose)
print_image (dst_img);
 
+if (dst_stride  0)
+   dst_bits += 

[Pixman] [PATCH 2/3] test: Share the image printing code

2013-09-12 Thread Søren Sandmann Pedersen
The affine-test, blitters-test, and scaling-test all have the ability
to print out the bytes of the destination image. Share this code by
moving it to utils.c.

At the same time make the code work correctly with negative strides.
---
 test/affine-test.c  |   12 +---
 test/blitters-test.c|   19 +--
 test/composite-traps-test.c |   12 +---
 test/scaling-test.c |   12 +---
 test/utils.c|   32 
 test/utils.h|4 
 6 files changed, 40 insertions(+), 51 deletions(-)

diff --git a/test/affine-test.c b/test/affine-test.c
index 3a37d7f..03d296f 100644
--- a/test/affine-test.c
+++ b/test/affine-test.c
@@ -276,17 +276,7 @@ test_composite (int  testnum,
 crc32 = compute_crc32_for_image (0, dst_img);
 
 if (verbose)
-{
-   int j;
-
-   for (i = 0; i  dst_height; i++)
-   {
-   for (j = 0; j  dst_stride; j++)
-   printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j));
-
-   printf (\n);
-   }
-}
+   print_image (dst_img);
 
 pixman_image_unref (src_img);
 pixman_image_unref (dst_img);
diff --git a/test/blitters-test.c b/test/blitters-test.c
index a2c6ff4..2120daf 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -222,7 +222,6 @@ static pixman_format_code_t mask_fmt_list[] = {
 uint32_t
 test_composite (int testnum, int verbose)
 {
-int i;
 pixman_image_t *src_img = NULL;
 pixman_image_t *dst_img = NULL;
 pixman_image_t *mask_img = NULL;
@@ -355,23 +354,7 @@ test_composite (int testnum, int verbose)
src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
 
 if (verbose)
-{
-   int j;
-
-   printf (---\n);
-   for (i = 0; i  dst_height; i++)
-   {
-   for (j = 0; j  dst_stride; j++)
-   {
-   if (j == (dst_width * PIXMAN_FORMAT_BPP (dst_fmt) + 7) / 8)
-   printf (| );
-
-   printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j));
-   }
-   printf (\n);
-   }
-   printf (---\n);
-}
+   print_image (dst_img);
 
 free_random_image (0, src_img, PIXMAN_null);
 crc32 = free_random_image (0, dst_img, dst_fmt);
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
index 34ae340..44d5012 100644
--- a/test/composite-traps-test.c
+++ b/test/composite-traps-test.c
@@ -217,17 +217,7 @@ test_composite (int  testnum,
 crc32 = compute_crc32_for_image (0, dst_img);
 
 if (verbose)
-{
-   int j;
-   
-   for (i = 0; i  dst_height; i++)
-   {
-   for (j = 0; j  dst_stride; j++)
-   printf (%02X , *((uint8_t *)dst_bits + i * dst_stride + j));
-
-   printf (\n);
-   }
-}
+   print_image (dst_img);
 
 fence_free (dst_bits);
 
diff --git a/test/scaling-test.c b/test/scaling-test.c
index 04ecb63..0778d2d 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -343,17 +343,7 @@ test_composite (int  testnum,
 crc32 = compute_crc32_for_image (0, dst_img);
 
 if (verbose)
-{
-   int j;
-   
-   for (i = 0; i  dst_height; i++)
-   {
-   for (j = 0; j  dst_stride; j++)
-   printf (%02X , *((uint8_t *)dstbuf + i * dst_stride + j));
-
-   printf (\n);
-   }
-}
+   print_image (dst_img);
 
 pixman_image_unref (src_img);
 pixman_image_unref (mask_img);
diff --git a/test/utils.c b/test/utils.c
index 3d1ba22..a693f30 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -238,6 +238,38 @@ compute_crc32_for_image (uint32_tcrc32,
 return crc32;
 }
 
+void
+print_image (pixman_image_t *image)
+{
+int i, j;
+int width, height, stride;
+pixman_format_code_t format;
+uint8_t *buffer;
+
+width = pixman_image_get_width (image);
+height = pixman_image_get_height (image);
+stride = pixman_image_get_stride (image);
+format = pixman_image_get_format (image);
+buffer = (uint8_t *)pixman_image_get_data (image);
+
+if (stride  0)
+   stride = - stride;
+
+printf (---\n);
+for (i = 0; i  height; i++)
+{
+   for (j = 0; j  stride; j++)
+   {
+   if (j == (width * PIXMAN_FORMAT_BPP (format) + 7) / 8)
+   printf (| );
+
+   printf (%02X , *((uint8_t *)buffer + i * stride + j));
+   }
+   printf (\n);
+}
+printf (---\n);
+}
+
 /* perform endian conversion of pixel data
  */
 void
diff --git a/test/utils.h b/test/utils.h
index c278151..28b7193 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -63,6 +63,10 @@ uint32_t
 compute_crc32_for_image (uint32_tin_crc32,
 pixman_image_t *image);
 
+/* Print the image in hexadecimal */
+void
+print_image (pixman_image_t *image);
+
 /* Returns TRUE if running on a little endian system
  */
 static force_inline pixman_bool_t
-- 

[Pixman] [PATCHv2 0/3] SSSE3 iterator for bilinear scaling

2013-09-07 Thread Søren Sandmann Pedersen
Here is a new version of the bilinear scaler that fixes Matt's and
Siarhei's comments and also uses movdqu instead of movdqa for the
writes to iter-buffer. This ensures that the iterator doesn't impose
new alignment restrictions that could interfere with the
direct-to-destination optimizations.

Even with movdqu there is still a benefit from aligning the
iter-buffer, so I'm keeping the patch that aligns the buffers in
pixman-general.


Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 3/3] ssse3: Add iterator for separable bilinear scaling

2013-09-07 Thread Søren Sandmann Pedersen
This new iterator uses the SSSE3 instructions pmaddubsw and pabsw to
implement a fast iterator for bilinear scaling.

There is a graph here recording the per-pixel time for various
bilinear scaling algorithms as reported by scaling-bench:

http://people.freedesktop.org/~sandmann/ssse3.v2/ssse3.v2.png

As the graph shows, this new iterator is clearly faster than the
existing C iterator, and when used with an SSE2 combiner, it is also
faster than the existing SSE2 fast paths for upscaling, though not for
downscaling.

Another graph:

http://people.freedesktop.org/~sandmann/ssse3.v2/movdqu.png

shows the difference between writing to iter-buffer with movdqa,
movdqu on an aligned buffer, and movdqu on a deliberately unaligned
buffer. Since the differences are very small, the patch here avoids
using movdqa because imposing alignment restrictions on iter-buffer
may interfere with other optimizations, such as writing directly to
the destination image.

The data was measured with scaling-bench on a Sandy Bridge Core
i3-2350M @ 2.3GHz and is available in this directory:

http://people.freedesktop.org/~sandmann/ssse3.v2/

where there is also a Gnumeric spreadsheet ssse3.v2.gnumeric
containing the per-pixel values and the graph.

V2:
- Use uintptr_t instead of unsigned long in the ALIGN macro
- Use _mm_storel_epi64 instead of _mm_cvtsi128_si64 as the latter form
  is not available on x86-32.
- Use _mm_storeu_si128() instead of _mm_store_si128() to avoid
  imposing alignment requirements on iter-buffer
---
 pixman/pixman-ssse3.c | 312 ++
 1 file changed, 312 insertions(+)

diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c
index 19d71e7..34763e2 100644
--- a/pixman/pixman-ssse3.c
+++ b/pixman/pixman-ssse3.c
@@ -35,6 +35,316 @@
 #include pixman-private.h
 #include pixman-inlines.h
 
+typedef struct
+{
+inty;
+uint64_t * buffer;
+} line_t;
+
+typedef struct
+{
+line_t line0;
+line_t line1;
+pixman_fixed_t y;
+pixman_fixed_t x;
+uint64_t   data[1];
+} bilinear_info_t;
+
+static void
+ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
+   int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
+{
+uint32_t *bits = image-bits + y * image-rowstride;
+__m128i vx = _mm_set_epi16 (
+   - (x + 1), x, - (x + 1), x,
+   - (x + ux + 1), x + ux,  - (x + ux + 1), x + ux);
+__m128i vux = _mm_set_epi16 (
+   - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
+   - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
+__m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
+__m128i *b = (__m128i *)line-buffer;
+__m128i vrl0, vrl1;
+
+while ((n -= 2) = 0)
+{
+   __m128i vw, vr, s;
+
+   vrl1 = _mm_loadl_epi64 (
+   (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
+   /* vrl1: R1, L1 */
+
+final_pixel:
+   vrl0 = _mm_loadl_epi64 (
+   (__m128i *)(bits + pixman_fixed_to_int (x)));
+   /* vrl0: R0, L0 */
+
+   /* The weights are based on vx which is a vector of 
+*
+*- (x + 1), x, - (x + 1), x,
+*  - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
+*
+* so the 16 bit weights end up like this:
+*
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*
+* and after shifting and packing, we get these bytes:
+*
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+*
+* which means the first and the second input pixel 
+* have to be interleaved like this:
+*
+*la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+*lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+*
+* before maddubsw can be used.
+*/
+
+   vw = _mm_add_epi16 (
+   vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
+   /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*/
+
+   vw = _mm_packus_epi16 (vw, vw);
+   /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+* iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*/
+   vx = _mm_add_epi16 (vx, vux);
+
+   x += 2 * ux;
+
+   vr = _mm_unpacklo_epi16 (vrl1, vrl0);
+   /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
+
+   s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
+   /* s:  lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
+
+   vr = _mm_unpackhi_epi8 (vr, s);
+   /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+*/
+
+   vr = _mm_maddubs_epi16 (vr, vw);
+
+   /* When the weight is 0, the inverse weight is
+* 128 which can't be represented in a signed byte.
+* As a result maddubsw computes the following:
+*
+* r = l * -128 + r * 0
+*
+* rather than the desired
+*
+* r = l * 128 + r 

[Pixman] [PATCHv2 1/3] general: Ensure that iter buffers are aligned to 16 bytes

2013-09-07 Thread Søren Sandmann Pedersen
At the moment iter buffers are only guaranteed to be aligned to a 4
byte boundary. SIMD implementations benefit from the buffers being
aligned to 16 bytes, so ensure this is the case.

V2:
- Use uintptr_t instead of unsigned long
- allocate 3 * SCANLINE_BUFFER_LENGTH byte on stack rather than just
  SCANLINE_BUFFER_LENGTH
- use sizeof (stack_scanline_buffer) instead of SCANLINE_BUFFER_LENGTH
  to determine overflow
---
 pixman/pixman-general.c | 22 +++---
 pixman/pixman-private.h |  3 +++
 pixman/pixman-utils.c   |  9 +
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 6310bff..a653fa7 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -114,7 +114,7 @@ general_composite_rect  (pixman_implementation_t *imp,
  pixman_composite_info_t *info)
 {
 PIXMAN_COMPOSITE_ARGS (info);
-uint64_t stack_scanline_buffer[(SCANLINE_BUFFER_LENGTH * 3 + 7) / 8];
+uint8_t stack_scanline_buffer[3 * SCANLINE_BUFFER_LENGTH];
 uint8_t *scanline_buffer = (uint8_t *) stack_scanline_buffer;
 uint8_t *src_buffer, *mask_buffer, *dest_buffer;
 pixman_iter_t src_iter, mask_iter, dest_iter;
@@ -137,17 +137,25 @@ general_composite_rect  (pixman_implementation_t *imp,
Bpp = 16;
 }
 
-if (width * Bpp  SCANLINE_BUFFER_LENGTH)
+#define ALIGN(addr)\
+((uint8_t *)uintptr_t)(addr)) + 15)  (~15)))
+
+src_buffer = ALIGN (scanline_buffer);
+mask_buffer = ALIGN (src_buffer + width * Bpp);
+dest_buffer = ALIGN (mask_buffer + width * Bpp);
+
+if (ALIGN (dest_buffer + width * Bpp) 
+   scanline_buffer + sizeof (stack_scanline_buffer))
 {
-   scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
+   scanline_buffer = pixman_malloc_ab_plus_c (width, Bpp * 3, 32 * 3);
 
if (!scanline_buffer)
return;
-}
 
-src_buffer = scanline_buffer;
-mask_buffer = src_buffer + width * Bpp;
-dest_buffer = mask_buffer + width * Bpp;
+   src_buffer = ALIGN (scanline_buffer);
+   mask_buffer = ALIGN (src_buffer + width * Bpp);
+   dest_buffer = ALIGN (mask_buffer + width * Bpp);
+}
 
 if (width_flag == ITER_WIDE)
 {
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 120196d..535117d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -787,6 +787,9 @@ pixman_malloc_ab (unsigned int n, unsigned int b);
 void *
 pixman_malloc_abc (unsigned int a, unsigned int b, unsigned int c);
 
+void *
+pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c);
+
 pixman_bool_t
 _pixman_multiply_overflows_size (size_t a, size_t b);
 
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index 98723a8..4a3a835 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -49,6 +49,15 @@ _pixman_addition_overflows_int (unsigned int a, unsigned int 
b)
 }
 
 void *
+pixman_malloc_ab_plus_c (unsigned int a, unsigned int b, unsigned int c)
+{
+if (!b || a = INT32_MAX / b || (a * b)  INT32_MAX - c)
+   return NULL;
+
+return malloc (a * b + c);
+}
+
+void *
 pixman_malloc_ab (unsigned int a,
   unsigned int b)
 {
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCHv2 2/3] Add empty SSSE3 implementation

2013-09-07 Thread Søren Sandmann Pedersen
This commit adds a new, empty SSSE3 implementation and the associated
build system support.

configure.ac:   detect whether the compiler understands SSSE3
intrinsics and set up the required CFLAGS

Makefile.am:Add libpixman-ssse3.la

pixman-x86.c:   Add X86_SSSE3 feature flag and detect it in
detect_cpu_features().

pixman-ssse3.c: New file with an empty SSSE3 implementation

V2: Remove SSSE3_LDFLAGS since it isn't necessary unless Solaris
support is added.
---
 configure.ac| 45 
 pixman/Makefile.am  | 12 
 pixman/pixman-private.h |  5 +
 pixman/pixman-ssse3.c   | 50 +
 pixman/pixman-x86.c | 15 +--
 5 files changed, 125 insertions(+), 2 deletions(-)
 create mode 100644 pixman/pixman-ssse3.c

diff --git a/configure.ac b/configure.ac
index daf4062..263c63e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -437,6 +437,50 @@ fi
 AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
 
 dnl ===
+dnl Check for SSSE3
+
+if test x$SSSE3_CFLAGS = x ; then
+SSSE3_CFLAGS=-mssse3 -Winline
+fi
+
+have_ssse3_intrinsics=no
+AC_MSG_CHECKING(whether to use SSSE3 intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS=$SSSE3_CFLAGS $CFLAGS
+
+AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
+#include mmintrin.h
+#include xmmintrin.h
+#include emmintrin.h
+#include tmmintrin.h
+int main () {
+__m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
+c = _mm_maddubs_epi16 (a, b);
+return 0;
+}]])], have_ssse3_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(ssse3,
+   [AC_HELP_STRING([--disable-ssse3],
+   [disable SSSE3 fast paths])],
+   [enable_ssse3=$enableval], [enable_ssse3=auto])
+
+if test $enable_ssse3 = no ; then
+   have_ssse3_intrinsics=disabled
+fi
+
+if test $have_ssse3_intrinsics = yes ; then
+   AC_DEFINE(USE_SSSE3, 1, [use SSSE3 compiler intrinsics])
+fi
+
+AC_MSG_RESULT($have_ssse3_intrinsics)
+if test $enable_ssse3 = yes  test $have_ssse3_intrinsics = no ; then
+   AC_MSG_ERROR([SSSE3 intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_SSSE3, test $have_ssse3_intrinsics = yes)
+
+dnl ===
 dnl Other special flags needed when building code using MMX or SSE instructions
 case $host_os in
solaris*)
@@ -471,6 +515,7 @@ AC_SUBST(MMX_CFLAGS)
 AC_SUBST(MMX_LDFLAGS)
 AC_SUBST(SSE2_CFLAGS)
 AC_SUBST(SSE2_LDFLAGS)
+AC_SUBST(SSSE3_CFLAGS)
 
 dnl ===
 dnl Check for VMX/Altivec
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index b9ea754..b376d9a 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -52,6 +52,18 @@ libpixman_1_la_LIBADD += libpixman-sse2.la
 ASM_CFLAGS_sse2=$(SSE2_CFLAGS)
 endif
 
+# ssse3 code
+if USE_SSSE3
+noinst_LTLIBRARIES += libpixman-ssse3.la
+libpixman_ssse3_la_SOURCES = \
+   pixman-ssse3.c
+libpixman_ssse3_la_CFLAGS = $(SSSE3_CFLAGS)
+libpixman_1_la_LDFLAGS += $(SSSE3_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-ssse3.la
+
+ASM_CFLAGS_ssse3=$(SSSE3_CFLAGS)
+endif
+
 # arm simd code
 if USE_ARM_SIMD
 noinst_LTLIBRARIES += libpixman-arm-simd.la
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 535117d..6ca13b2 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -593,6 +593,11 @@ pixman_implementation_t *
 _pixman_implementation_create_sse2 (pixman_implementation_t *fallback);
 #endif
 
+#ifdef USE_SSSE3
+pixman_implementation_t *
+_pixman_implementation_create_ssse3 (pixman_implementation_t *fallback);
+#endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (pixman_implementation_t *fallback);
diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c
new file mode 100644
index 000..19d71e7
--- /dev/null
+++ b/pixman/pixman-ssse3.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2013 Soren Sandmann Pedersen
+ * Copyright © 2013 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ * 
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT 

[Pixman] [PATCH 2/2] ssse3: Add iterator for separable bilinear scaling

2013-08-29 Thread Søren Sandmann Pedersen
This new iterator uses the SSSE3 instructions pmaddubsw and pabsw to
implement a fast iterator for bilinear scaling.

There is a graph here recording the per-pixel time for various
bilinear scaling algorithms as reported by scaling-bench:

http://people.freedesktop.org/~sandmann/ssse3/ssse3.png

As the graph shows, this new iterator is clearly faster than the
existing C iterator, and when used with an SSE2 combiner, it is also
faster than the existing SSE2 fast paths except for the lowest scaling
ratios.

The data was measured on an Ivy Bridge i7-3520M @ 2.0GHz and is
available in this directory:

http://people.freedesktop.org/~sandmann/ssse3/

where there is also a Gnumeric spreadsheet ssse3.gnumeric containing
the per-pixel values and the graph.
---
 pixman/pixman-ssse3.c |  312 +
 1 files changed, 312 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-ssse3.c b/pixman/pixman-ssse3.c
index 19d71e7..98d929b 100644
--- a/pixman/pixman-ssse3.c
+++ b/pixman/pixman-ssse3.c
@@ -35,6 +35,316 @@
 #include pixman-private.h
 #include pixman-inlines.h
 
+typedef struct
+{
+inty;
+uint64_t * buffer;
+} line_t;
+
+typedef struct
+{
+line_t line0;
+line_t line1;
+pixman_fixed_t y;
+pixman_fixed_t x;
+uint64_t   data[1];
+} bilinear_info_t;
+
+static void
+ssse3_fetch_horizontal (bits_image_t *image, line_t *line,
+   int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
+{
+uint32_t *bits = image-bits + y * image-rowstride;
+__m128i vx = _mm_set_epi16 (
+   - (x + 1), x, - (x + 1), x,
+   - (x + ux + 1), x + ux,  - (x + ux + 1), x + ux);
+__m128i vux = _mm_set_epi16 (
+   - 2 * ux, 2 * ux, - 2 * ux, 2 * ux,
+   - 2 * ux, 2 * ux, - 2 * ux, 2 * ux);
+__m128i vaddc = _mm_set_epi16 (1, 0, 1, 0, 1, 0, 1, 0);
+__m128i *b = (__m128i *)line-buffer;
+__m128i vrl0, vrl1;
+
+while ((n -= 2) = 0)
+{
+   __m128i vw, vr, s;
+
+   vrl1 = _mm_loadl_epi64 (
+   (__m128i *)(bits + pixman_fixed_to_int (x + ux)));
+   /* vrl1: R1, L1 */
+
+final_pixel:
+   vrl0 = _mm_loadl_epi64 (
+   (__m128i *)(bits + pixman_fixed_to_int (x)));
+   /* vrl0: R0, L0 */
+
+   /* The weights are based on vx which is a vector of 
+*
+*- (x + 1), x, - (x + 1), x,
+*  - (x + ux + 1), x + ux, - (x + ux + 1), x + ux
+*
+* so the 16 bit weights end up like this:
+*
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*
+* and after shifting and packing, we get these bytes:
+*
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+*iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+*
+* which means the first and the second input pixel 
+* have to be interleaved like this:
+*
+*la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+*lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+*
+* before maddubsw can be used.
+*/
+
+   vw = _mm_add_epi16 (
+   vaddc, _mm_srli_epi16 (vx, 16 - BILINEAR_INTERPOLATION_BITS));
+   /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*/
+
+   vw = _mm_packus_epi16 (vw, vw);
+   /* vw: iw0, w0, iw0, w0, iw1, w1, iw1, w1,
+* iw0, w0, iw0, w0, iw1, w1, iw1, w1
+*/
+   vx = _mm_add_epi16 (vx, vux);
+
+   x += 2 * ux;
+
+   vr = _mm_unpacklo_epi16 (vrl1, vrl0);
+   /* vr: rar0, rar1, rgb0, rgb1, lar0, lar1, lgb0, lgb1 */
+
+   s = _mm_shuffle_epi32 (vr, _MM_SHUFFLE (1, 0, 3, 2));
+   /* s:  lar0, lar1, lgb0, lgb1, rar0, rar1, rgb0, rgb1 */
+
+   vr = _mm_unpackhi_epi8 (vr, s);
+   /* vr: la0, ra0, lr0, rr0, la1, ra1, lr1, rr1,
+* lg0, rg0, lb0, rb0, lg1, rg1, lb1, rb1
+*/
+
+   vr = _mm_maddubs_epi16 (vr, vw);
+
+   /* When the weight is 0, the inverse weight is
+* 128 which can't be represented in a signed byte.
+* As a result maddubsw computes the following:
+*
+* r = l * -128 + r * 0
+*
+* rather than the desired
+*
+* r = l * 128 + r * 0
+*
+* We fix this by taking the absolute value of the
+* result.
+*/
+   vr = _mm_abs_epi16 (vr);
+
+   /* vr: A0, R0, A1, R1, G0, B0, G1, B1 */
+   _mm_store_si128 (b++, vr);
+}
+
+if (n == -1)
+{
+   vrl1 = _mm_setzero_si128();
+   goto final_pixel;
+}
+
+line-y = y;
+}
+
+static uint32_t *
+ssse3_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
+{
+pixman_fixed_t fx, ux;
+bilinear_info_t *info = iter-data;
+line_t *line0, *line1;
+int y0, y1;
+int32_t dist_y;
+__m128i vw;
+int i;
+
+fx = info-x;
+ux = iter-image-common.transform-matrix[0][0];
+
+y0 = pixman_fixed_to_int (info-y);
+y1 = y0 + 1;
+
+   

[Pixman] [PATCH 02/11] Add ITER_WIDE iter flag

2013-05-22 Thread Søren Sandmann Pedersen
This will be useful for putting iterators into tables where they can
be looked up by iterator flags. Without this flag, wide iterators can
only be recognized by the absence of ITER_NARROW, which makes testing
for a match difficult.
---
 pixman/pixman-general.c | 20 +---
 pixman/pixman-private.h | 13 +++--
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index a4935c7..c674ffa 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -116,7 +116,7 @@ general_composite_rect  (pixman_implementation_t *imp,
 pixman_iter_t src_iter, mask_iter, dest_iter;
 pixman_combine_32_func_t compose;
 pixman_bool_t component_alpha;
-iter_flags_t narrow, src_iter_flags;
+iter_flags_t width_flag, src_iter_flags;
 int Bpp;
 int i;
 
@@ -124,12 +124,12 @@ general_composite_rect  (pixman_implementation_t *imp,
(!mask_image || mask_image-common.flags  FAST_PATH_NARROW_FORMAT) 
(dest_image-common.flags  FAST_PATH_NARROW_FORMAT))
 {
-   narrow = ITER_NARROW;
+   width_flag = ITER_NARROW;
Bpp = 4;
 }
 else
 {
-   narrow = 0;
+   width_flag = ITER_WIDE;
Bpp = 16;
 }
 
@@ -145,7 +145,7 @@ general_composite_rect  (pixman_implementation_t *imp,
 mask_buffer = src_buffer + width * Bpp;
 dest_buffer = mask_buffer + width * Bpp;
 
-if (!narrow)
+if (width_flag == ITER_WIDE)
 {
/* To make sure there aren't any NANs in the buffers */
memset (src_buffer, 0, width * Bpp);
@@ -154,7 +154,7 @@ general_composite_rect  (pixman_implementation_t *imp,
 }
 
 /* src iter */
-src_iter_flags = narrow | op_flags[op].src | ITER_SRC;
+src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
 
 _pixman_implementation_src_iter_init (imp-toplevel, src_iter, src_image,
  src_x, src_y, width, height,
@@ -179,18 +179,16 @@ general_composite_rect  (pixman_implementation_t *imp,
 _pixman_implementation_src_iter_init (
imp-toplevel, mask_iter,
mask_image, mask_x, mask_y, width, height, mask_buffer,
-   ITER_SRC | narrow | (component_alpha? 0 : ITER_IGNORE_RGB),
+   ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
info-mask_flags);
 
 /* dest iter */
 _pixman_implementation_dest_iter_init (
-   imp-toplevel, dest_iter,
-   dest_image, dest_x, dest_y, width, height, dest_buffer,
-   ITER_DEST | narrow | op_flags[op].dst,
-   info-dest_flags);
+   imp-toplevel, dest_iter, dest_image, dest_x, dest_y, width, height,
+   dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, 
info-dest_flags);
 
 compose = _pixman_implementation_lookup_combiner (
-   imp-toplevel, op, component_alpha, narrow);
+   imp-toplevel, op, component_alpha, width_flag != ITER_WIDE);
 
 for (i = 0; i  height; ++i)
 {
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 9b6353e..0fe86ca 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -212,7 +212,8 @@ typedef void  (* pixman_iter_write_back_t)   
(pixman_iter_t *iter);
 
 typedef enum
 {
-ITER_NARROW =  (1  0),
+ITER_NARROW =   (1  0),
+ITER_WIDE = (1  1),
 
 /* Localized alpha is when the alpha channel is used only to compute
  * the alpha value of the destination. This means that the computation
@@ -229,15 +230,15 @@ typedef enum
  * we can treat it as if it were ARGB, which means in some cases we can
  * avoid copying it to a temporary buffer.
  */
-ITER_LOCALIZED_ALPHA = (1  1),
-ITER_IGNORE_ALPHA =(1  2),
-ITER_IGNORE_RGB =  (1  3),
+ITER_LOCALIZED_ALPHA = (1  2),
+ITER_IGNORE_ALPHA =(1  3),
+ITER_IGNORE_RGB =  (1  4),
 
 /* These indicate whether the iterator is for a source
  * or a destination image
  */
-ITER_SRC = (1  4),
-ITER_DEST =(1  5)
+ITER_SRC = (1  5),
+ITER_DEST =(1  6)
 } iter_flags_t;
 
 struct pixman_iter_t
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 05/11] sse2: Replace the fetcher_info_t table with a pixman_iter_info_t table

2013-05-22 Thread Søren Sandmann Pedersen
Similar to the changes to noop, put all the iterators into a table of
pixman_iter_info_t and then do a generic search of that table during
iterator initialization.
---
 pixman/pixman-sse2.c | 64 
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 863bc18..344cc46 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -6340,47 +6340,53 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t 
*mask)
 return iter-buffer;
 }
 
-typedef struct
+static void
+iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
 {
-pixman_format_code_t   format;
-pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
+pixman_image_t *image = iter-image;
+uint8_t *b = (uint8_t *)image-bits.bits;
+int s = image-bits.rowstride * 4;
+
+iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
+iter-stride = s;
+}
 
-static const fetcher_info_t fetchers[] =
+#define IMAGE_FLAGS\
+(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+
+static const pixman_iter_info_t sse2_iters[] = 
 {
-{ PIXMAN_x8r8g8b8, sse2_fetch_x8r8g8b8 },
-{ PIXMAN_r5g6b5,   sse2_fetch_r5g6b5 },
-{ PIXMAN_a8,   sse2_fetch_a8 },
-{ PIXMAN_null }
+{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, sse2_fetch_x8r8g8b8, NULL
+},
+{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, sse2_fetch_r5g6b5, NULL
+},
+{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, sse2_fetch_a8, NULL
+},
+{ PIXMAN_null },
 };
 
 static pixman_bool_t
 sse2_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
-pixman_image_t *image = iter-image;
+const pixman_iter_info_t *info;
 
-#define FLAGS  \
-(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
- FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
-
-if ((iter-iter_flags  ITER_NARROW)   
-   (iter-image_flags  FLAGS) == FLAGS)
+for (info = sse2_iters; info-format != PIXMAN_null; ++info)
 {
-   const fetcher_info_t *f;
-
-   for (f = fetchers[0]; f-format != PIXMAN_null; f++)
+   if ((info-format == PIXMAN_any ||
+info-format == iter-image-common.extended_format_code)   
+   (info-image_flags  iter-image_flags) == info-image_flags 
+   (info-iter_flags  iter-iter_flags) == info-iter_flags)
{
-   if (image-common.extended_format_code == f-format)
-   {
-   uint8_t *b = (uint8_t *)image-bits.bits;
-   int s = image-bits.rowstride * 4;
-
-   iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP 
(f-format) / 8;
-   iter-stride = s;
+   iter-get_scanline = info-get_scanline;
+   iter-write_back = info-write_back;
 
-   iter-get_scanline = f-get_scanline;
-   return TRUE;
-   }
+   if (info-initializer)
+   info-initializer (iter, info);
+   return TRUE;
}
 }
 
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 06/11] mmx: Replace the fetcher_info_t table with a pixman_iter_info_t table

2013-05-22 Thread Søren Sandmann Pedersen
Similar to the SSE2 commit, information about the iterators is stored
in a table of pixman_iter_info_t.
---
 pixman/pixman-mmx.c | 64 +
 1 file changed, 35 insertions(+), 29 deletions(-)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 746ecd6..02ec998 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3922,47 +3922,53 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 return iter-buffer;
 }
 
-typedef struct
+static void
+iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
 {
-pixman_format_code_t   format;
-pixman_iter_get_scanline_t get_scanline;
-} fetcher_info_t;
+pixman_image_t *image = iter-image;
+uint8_t *b = (uint8_t *)image-bits.bits;
+int s = image-bits.rowstride * 4;
+
+iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
+iter-stride = s;
+}
+
+#define IMAGE_FLAGS\
+(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
 
-static const fetcher_info_t fetchers[] =
+static const pixman_iter_info_t mmx_iters[] = 
 {
-{ PIXMAN_x8r8g8b8, mmx_fetch_x8r8g8b8 },
-{ PIXMAN_r5g6b5,   mmx_fetch_r5g6b5 },
-{ PIXMAN_a8,   mmx_fetch_a8 },
-{ PIXMAN_null }
+{ PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
+},
+{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
+},
+{ PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
+  iter_init_bits_stride, mmx_fetch_a8, NULL
+},
+{ PIXMAN_null },
 };
 
 static pixman_bool_t
 mmx_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
-pixman_image_t *image = iter-image;
-
-#define FLAGS  \
-(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
- FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+const pixman_iter_info_t *info;
 
-if ((iter-iter_flags  ITER_NARROW)   
-   (iter-image_flags  FLAGS) == FLAGS)
+for (info = mmx_iters; info-format != PIXMAN_null; ++info)
 {
-   const fetcher_info_t *f;
-
-   for (f = fetchers[0]; f-format != PIXMAN_null; f++)
+   if ((info-format == PIXMAN_any ||
+info-format == iter-image-common.extended_format_code)   
+   (info-image_flags  iter-image_flags) == info-image_flags 
+   (info-iter_flags  iter-iter_flags) == info-iter_flags)
{
-   if (image-common.extended_format_code == f-format)
-   {
-   uint8_t *b = (uint8_t *)image-bits.bits;
-   int s = image-bits.rowstride * 4;
+   iter-get_scanline = info-get_scanline;
+   iter-write_back = info-write_back;
 
-   iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP 
(f-format) / 8;
-   iter-stride = s;
-
-   iter-get_scanline = f-get_scanline;
-   return TRUE;
-   }
+   if (info-initializer)
+   info-initializer (iter, info);
+   return TRUE;
}
 }
 
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 07/11] fast: Replace the fetcher_info_t table with a pixman_iter_info_t table

2013-05-22 Thread Søren Sandmann Pedersen
Similar to the SSE2 and MMX patches, this commit replaces a table of
fetcher_info_t with a table of pixman_iter_info_t, and similar to the
noop patch, both fast_src_iter_init() and fast_dest_iter_init() are
now doing exactly the same thing, so their code can be shared in a new
function called fast_iter_init_common().
---
 pixman/pixman-fast-path.c | 107 +++---
 1 file changed, 45 insertions(+), 62 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 247aea6..047675c 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2261,46 +2261,55 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter)
 }
 }
 
-typedef struct
+static void
+iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
 {
-pixman_format_code_t   format;
-pixman_iter_get_scanline_t get_scanline;
-pixman_iter_write_back_t   write_back;
-} fetcher_info_t;
+pixman_image_t *image = iter-image;
+uint8_t *b = (uint8_t *)image-bits.bits;
+int s = image-bits.rowstride * 4;
+
+iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
+iter-stride = s;
+}
+
+#define IMAGE_FLAGS\
+(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
+ FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
 
-static const fetcher_info_t fetchers[] =
+static const pixman_iter_info_t fast_iters[] = 
 {
-{ PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
-{ PIXMAN_null }
+{ PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
+  iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
+
+{ PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+  ITER_NARROW | ITER_DEST,
+  iter_init_bits_stride, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+
+{ PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
+  ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
+  iter_init_bits_stride, fast_dest_fetch_noop, fast_write_back_r5g6b5 },
+
+{ PIXMAN_null },
 };
 
 static pixman_bool_t
-fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+fast_iter_init_common (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
-pixman_image_t *image = iter-image;
-
-#define FLAGS  \
-(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
- FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
+const pixman_iter_info_t *info;
 
-if ((iter-iter_flags  ITER_NARROW)   
-   (iter-image_flags  FLAGS) == FLAGS)
+for (info = fast_iters; info-format != PIXMAN_null; ++info)
 {
-   const fetcher_info_t *f;
-
-   for (f = fetchers[0]; f-format != PIXMAN_null; f++)
+   if ((info-format == PIXMAN_any ||
+info-format == iter-image-common.extended_format_code)   
+   (info-image_flags  iter-image_flags) == info-image_flags 
+   (info-iter_flags  iter-iter_flags) == info-iter_flags)
{
-   if (image-common.extended_format_code == f-format)
-   {
-   uint8_t *b = (uint8_t *)image-bits.bits;
-   int s = image-bits.rowstride * 4;
-
-   iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP 
(f-format) / 8;
-   iter-stride = s;
+   iter-get_scanline = info-get_scanline;
+   iter-write_back = info-write_back;
 
-   iter-get_scanline = f-get_scanline;
-   return TRUE;
-   }
+   if (info-initializer)
+   info-initializer (iter, info);
+   return TRUE;
}
 }
 
@@ -2308,42 +2317,16 @@ fast_src_iter_init (pixman_implementation_t *imp, 
pixman_iter_t *iter)
 }
 
 static pixman_bool_t
-fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
-pixman_image_t *image = iter-image;
-
-if ((iter-iter_flags  ITER_NARROW)   
-   (iter-image_flags  FAST_PATH_STD_DEST_FLAGS) == 
FAST_PATH_STD_DEST_FLAGS)
-{
-   const fetcher_info_t *f;
-
-   for (f = fetchers[0]; f-format != PIXMAN_null; f++)
-   {
-   if (image-common.extended_format_code == f-format)
-   {
-   uint8_t *b = (uint8_t *)image-bits.bits;
-   int s = image-bits.rowstride * 4;
-
-   iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP 
(f-format) / 8;
-   iter-stride = s;
-
-   if ((iter-iter_flags  (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) 
==
-   (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
-   {
-   iter-get_scanline = fast_dest_fetch_noop;
-   }
-   else
-   {
-   iter-get_scanline = f-get_scanline;
-   }
-   iter-write_back = f-write_back;

[Pixman] [PATCH 09/11] Add _pixman_implementation_iter_init() and use instead of _src/_dest_init()

2013-05-22 Thread Søren Sandmann Pedersen
A new field, 'iter_info', is added to the implementation struct, and
all the implementations store a pointer to their iterator tables in
it. A new function, _pixman_implementation_iter_init(), is then added
that searches those tables, and the new function is called in
pixman-general.c and pixman-image.c instead of the old
_pixman_implementation_src_init() and _pixman_implementation_dest_init().
---
 pixman/pixman-fast-path.c  |  1 +
 pixman/pixman-general.c| 12 
 pixman/pixman-image.c  |  2 +-
 pixman/pixman-implementation.c | 63 ++
 pixman/pixman-mmx.c|  1 +
 pixman/pixman-noop.c   |  1 +
 pixman/pixman-private.h| 13 +
 pixman/pixman-sse2.c   |  1 +
 8 files changed, 88 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 047675c..d5f707f 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2336,6 +2336,7 @@ _pixman_implementation_create_fast_path 
(pixman_implementation_t *fallback)
 imp-fill = fast_path_fill;
 imp-src_iter_init = fast_src_iter_init;
 imp-dest_iter_init = fast_dest_iter_init;
+imp-iter_info = fast_iters;
 
 return imp;
 }
diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 91e33c4..c469a81 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -196,9 +196,10 @@ general_composite_rect  (pixman_implementation_t *imp,
 /* src iter */
 src_iter_flags = width_flag | op_flags[op].src | ITER_SRC;
 
-_pixman_implementation_src_iter_init (imp-toplevel, src_iter, src_image,
- src_x, src_y, width, height,
- src_buffer, src_iter_flags, 
info-src_flags);
+_pixman_implementation_iter_init (imp-toplevel, src_iter, src_image,
+  src_x, src_y, width, height,
+  src_buffer, src_iter_flags,
+  info-src_flags);
 
 /* mask iter */
 if ((src_iter_flags  (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
@@ -216,14 +217,14 @@ general_composite_rect  (pixman_implementation_t *imp,
 mask_image-common.component_alpha
 PIXMAN_FORMAT_RGB (mask_image-bits.format);
 
-_pixman_implementation_src_iter_init (
+_pixman_implementation_iter_init (
imp-toplevel, mask_iter,
mask_image, mask_x, mask_y, width, height, mask_buffer,
ITER_SRC | width_flag | (component_alpha? 0 : ITER_IGNORE_RGB),
info-mask_flags);
 
 /* dest iter */
-_pixman_implementation_dest_iter_init (
+_pixman_implementation_iter_init (
imp-toplevel, dest_iter, dest_image, dest_x, dest_y, width, height,
dest_buffer, ITER_DEST | width_flag | op_flags[op].dst, 
info-dest_flags);
 
@@ -263,6 +264,7 @@ _pixman_implementation_create_general (void)
 
 imp-src_iter_init = general_src_iter_init;
 imp-dest_iter_init = general_dest_iter_init;
+imp-iter_info = general_iters;
 
 return imp;
 }
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 78c8610..4f9c2f9 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -920,7 +920,7 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
pixman_iter_t iter;
 
 otherwise:
-   _pixman_implementation_src_iter_init (
+   _pixman_implementation_iter_init (
imp, iter, image, 0, 0, 1, 1,
(uint8_t *)result,
ITER_NARROW | ITER_SRC, image-common.flags);
diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index cfb82bb..4bdc836 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -285,6 +285,69 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
 return FALSE;
 }
 
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
+{
+return NULL;
+}
+
+void
+_pixman_implementation_iter_init (pixman_implementation_t *imp,
+  pixman_iter_t   *iter,
+  pixman_image_t  *image,
+  int  x,
+  int  y,
+  int  width,
+  int  height,
+  uint8_t *buffer,
+  iter_flags_t iter_flags,
+  uint32_t image_flags)
+{
+pixman_format_code_t format;
+
+iter-image = image;
+iter-buffer = (uint32_t *)buffer;
+iter-x = x;
+iter-y = y;
+iter-width = width;
+iter-height = height;
+iter-iter_flags = iter_flags;
+iter-image_flags = image_flags;
+
+if (!iter-image)
+{
+   iter-get_scanline = 

[Pixman] [PATCH 11/11] Consolidate all the iter_init_bits_stride functions

2013-05-22 Thread Søren Sandmann Pedersen
The SSE2, MMX, and fast implementations all have a copy of the
function iter_init_bits_stride that computes an image buffer and
stride.

Move that function to pixman-utils.c and share it among all the
implementations.
---
 pixman/pixman-fast-path.c | 19 +--
 pixman/pixman-mmx.c   | 17 +++--
 pixman/pixman-private.h   |  3 +++
 pixman/pixman-sse2.c  | 17 +++--
 pixman/pixman-utils.c | 11 +++
 5 files changed, 25 insertions(+), 42 deletions(-)

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 9af26af..3982dce 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2261,17 +2261,6 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter)
 }
 }
 
-static void
-iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-pixman_image_t *image = iter-image;
-uint8_t *b = (uint8_t *)image-bits.bits;
-int s = image-bits.rowstride * 4;
-
-iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
-iter-stride = s;
-}
-
 #define IMAGE_FLAGS\
 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
  FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
@@ -2279,15 +2268,17 @@ iter_init_bits_stride (pixman_iter_t *iter, const 
pixman_iter_info_t *info)
 static const pixman_iter_info_t fast_iters[] = 
 {
 { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW | ITER_SRC,
-  iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
+  _pixman_iter_init_bits_stride, fast_fetch_r5g6b5, NULL },
 
 { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
   ITER_NARROW | ITER_DEST,
-  iter_init_bits_stride, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
+  _pixman_iter_init_bits_stride,
+  fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
 
 { PIXMAN_r5g6b5, FAST_PATH_STD_DEST_FLAGS,
   ITER_NARROW | ITER_DEST | ITER_IGNORE_RGB | ITER_IGNORE_ALPHA,
-  iter_init_bits_stride, fast_dest_fetch_noop, fast_write_back_r5g6b5 },
+  _pixman_iter_init_bits_stride,
+  fast_dest_fetch_noop, fast_write_back_r5g6b5 },
 
 { PIXMAN_null },
 };
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 861b856..c94d282 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3922,17 +3922,6 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 return iter-buffer;
 }
 
-static void
-iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-pixman_image_t *image = iter-image;
-uint8_t *b = (uint8_t *)image-bits.bits;
-int s = image-bits.rowstride * 4;
-
-iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
-iter-stride = s;
-}
-
 #define IMAGE_FLAGS\
 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
  FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
@@ -3940,13 +3929,13 @@ iter_init_bits_stride (pixman_iter_t *iter, const 
pixman_iter_info_t *info)
 static const pixman_iter_info_t mmx_iters[] = 
 {
 { PIXMAN_x8r8g8b8, IMAGE_FLAGS, ITER_NARROW,
-  iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
+  _pixman_iter_init_bits_stride, mmx_fetch_x8r8g8b8, NULL
 },
 { PIXMAN_r5g6b5, IMAGE_FLAGS, ITER_NARROW,
-  iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
+  _pixman_iter_init_bits_stride, mmx_fetch_r5g6b5, NULL
 },
 { PIXMAN_a8, IMAGE_FLAGS, ITER_NARROW,
-  iter_init_bits_stride, mmx_fetch_a8, NULL
+  _pixman_iter_init_bits_stride, mmx_fetch_a8, NULL
 },
 { PIXMAN_null },
 };
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index aa0a842..af4a0b6 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -652,6 +652,9 @@ _pixman_compute_composite_region32 (pixman_region32_t * 
region,
 uint32_t *
 _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 
+void
+_pixman_iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t 
*info);
+
 /* These formats all have depth 0, so they
  * will never clash with any real ones
  */
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index bc834b5..dde9235 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -6340,17 +6340,6 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 return iter-buffer;
 }
 
-static void
-iter_init_bits_stride (pixman_iter_t *iter, const pixman_iter_info_t *info)
-{
-pixman_image_t *image = iter-image;
-uint8_t *b = (uint8_t *)image-bits.bits;
-int s = image-bits.rowstride * 4;
-
-iter-bits = b + s * iter-y + iter-x * PIXMAN_FORMAT_BPP (info-format) 
/ 8;
-iter-stride = s;
-}
-
 #define IMAGE_FLAGS\
 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |   \
  FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
@@ -6358,13 +6347,13 

[Pixman] [PATCH] gtk-utils.c: Use cairo in show_image() rather than GdkPixbuf

2013-02-15 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

GdkPixbufs are not premultiplied, so when using them to display pixman
images, there is some unecessary conversions going on: First the image
is converted to non-premultiplied, and then GdkPixbuf premultiplies
before sending the result to the X server. These conversions may cause
the displayed image to not be exactly identical to the original.

This patch just uses a cairo image surface instead, which avoids these
conversions.

Also make the comment about sRGB a little more concise.
---
 demos/gtk-utils.c | 53 +
 1 file changed, 29 insertions(+), 24 deletions(-)

diff --git a/demos/gtk-utils.c b/demos/gtk-utils.c
index d7e946d..32d4aec 100644
--- a/demos/gtk-utils.c
+++ b/demos/gtk-utils.c
@@ -95,14 +95,31 @@ pixbuf_from_argb32 (uint32_t *bits,
 static gboolean
 on_expose (GtkWidget *widget, GdkEventExpose *expose, gpointer data)
 {
-GdkPixbuf *pixbuf = data;
+pixman_image_t *pimage = data;
+int width = pixman_image_get_width (pimage);
+int height = pixman_image_get_height (pimage);
+int stride = pixman_image_get_stride (pimage);
+cairo_surface_t *cimage;
+cairo_format_t format;
+cairo_t *cr;
+
+if (pixman_image_get_format (pimage) == PIXMAN_x8r8g8b8)
+   format = CAIRO_FORMAT_RGB24;
+else
+   format = CAIRO_FORMAT_ARGB32;
+
+cimage = cairo_image_surface_create_for_data (
+   (uint8_t *)pixman_image_get_data (pimage),
+   format, width, height, stride);
 
-gdk_draw_pixbuf (widget-window, NULL,
-pixbuf, 0, 0, 0, 0,
-gdk_pixbuf_get_width (pixbuf),
-gdk_pixbuf_get_height (pixbuf),
-GDK_RGB_DITHER_NONE,
-0, 0);
+cr = gdk_cairo_create (widget-window);
+
+cairo_rectangle (cr, 0, 0, width, height);
+cairo_set_source_surface (cr, cimage, 0, 0);
+cairo_fill (cr);
+
+cairo_destroy (cr);
+cairo_surface_destroy (cimage);
 
 return TRUE;
 }
@@ -111,7 +128,6 @@ void
 show_image (pixman_image_t *image)
 {
 GtkWidget *window;
-GdkPixbuf *pixbuf;
 int width, height;
 int argc;
 char **argv;
@@ -132,22 +148,15 @@ show_image (pixman_image_t *image)
 
 format = pixman_image_get_format (image);
 
-/* Three cases:
- *
- *  - image is a8r8g8b8_sRGB: we will display without modification
- *under the assumption that the monitor is sRGB
- *
- *  - image is a8r8g8b8: we will display without modification
- *under the assumption that whoever created the image
- *probably did it wrong by using sRGB inputs
- *
- *  - other: we will convert to a8r8g8b8 under the assumption that
- *whoever created the image probably did it wrong.
+/* We always display the image as if it contains sRGB data. That
+ * means that no conversion should take place when the image
+ * has the a8r8g8b8_sRGB format.
  */
 switch (format)
 {
 case PIXMAN_a8r8g8b8_sRGB:
 case PIXMAN_a8r8g8b8:
+case PIXMAN_x8r8g8b8:
copy = pixman_image_ref (image);
break;
 
@@ -161,11 +170,7 @@ show_image (pixman_image_t *image)
break;
 }
 
-pixbuf = pixbuf_from_argb32 (pixman_image_get_data (copy),
-width, height,
-pixman_image_get_stride (copy));
-
-g_signal_connect (window, expose_event, G_CALLBACK (on_expose), pixbuf);
+g_signal_connect (window, expose_event, G_CALLBACK (on_expose), copy);
 g_signal_connect (window, delete_event, G_CALLBACK (gtk_main_quit), 
NULL);
 
 gtk_widget_show (window);
-- 
1.7.11.7

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [RFC, PATCH 0/8] Floating point pipeline

2012-08-25 Thread Søren Sandmann Pedersen
Hi,

The following patches change the 64 pipeline to use single precision
floating point channels instead.

The main benefit of this is that we get more range and precision so
that we can support HDR image formats such as half precision floating
point argb. Unlike 16 bpc, single precision floating point is enough
for most people's needs.

A secondary benefit is that this floating point pipeline could serve
as a reference implementation in the test suite such that we can relax
the requirement for bit-exact operation. 

Finally, floating point may be a better match for some SIMD
instruction sets such as AVX.

Downsides include that some chips do not have hardware floating point,
that computation on four floating point channels may be slower than on
four 16 bit channels for example due to more cache pressure or because
floating point is just slower. 

Also note that the linear-sRGB scanline store routine is probably
slower with these patches because it now uses a binary search instead
of a simple table lookup. If someone has better suggestions here, I'm
definitely interested.

Comments appreciated.


Thanks,
Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 2/8] blitters-test: Prepare for floating point

2012-08-25 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Comment out some formats in blitters-test that are going to rely on
floating point in some upcoming patches.
---
 test/blitters-test.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/blitters-test.c b/test/blitters-test.c
index 6a3cc86..a2a1ea9 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -172,10 +172,12 @@ static pixman_format_code_t img_fmt_list[] = {
 PIXMAN_x14r6g6b6,
 PIXMAN_r8g8b8,
 PIXMAN_b8g8r8,
+#if 0 /* These are going to use floating point in the near future */
 PIXMAN_x2r10g10b10,
 PIXMAN_a2r10g10b10,
 PIXMAN_x2b10g10r10,
 PIXMAN_a2b10g10r10,
+#endif
 PIXMAN_a1r5g5b5,
 PIXMAN_x1r5g5b5,
 PIXMAN_a1b5g5r5,
@@ -395,6 +397,6 @@ main (int argc, const char *argv[])
 }
 
 return fuzzer_test_main(blitters, 200,
-   0xA364B5BF,
+   0x67951DE6,
test_composite, argc, argv);
 }
-- 
1.7.11.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/8] glyph-test: Prepare for floating point

2012-08-25 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

In preparation for an upcoming change of the wide pipe to use floating
point, comment out some formats in glyph-test that are going to be
using floating point and update the CRC32 value to match.
---
 test/glyph-test.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/test/glyph-test.c b/test/glyph-test.c
index 84de5aa..9dd5b41 100644
--- a/test/glyph-test.c
+++ b/test/glyph-test.c
@@ -30,10 +30,13 @@ static const pixman_format_code_t formats[] =
 PIXMAN_x14r6g6b6,
 PIXMAN_r8g8b8,
 PIXMAN_b8g8r8,
+#if 0
+/* These use floating point */
 PIXMAN_x2r10g10b10,
 PIXMAN_a2r10g10b10,
 PIXMAN_x2b10g10r10,
 PIXMAN_a2b10g10r10,
+#endif
 PIXMAN_a1r5g5b5,
 PIXMAN_x1r5g5b5,
 PIXMAN_a1b5g5r5,
@@ -329,7 +332,7 @@ test_glyphs (int testnum, int verbose)
 int
 main (int argc, const char *argv[])
 {
-return fuzzer_test_main (glyph, 3,
-0x741CB2DB,
+return fuzzer_test_main (glyph, 3,   
+0x79E74996,
 test_glyphs, argc, argv);
 }
-- 
1.7.11.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 3/8] Add pixman-combine-float.c

2012-08-25 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This file contains floating point implementations of combiners for all
pixman operators. These combiners operate on buffers containing single
precision floating point pixels stored in (a, r, g, b) order.

The combiners are added to the pixman_implementation_t struct, but
nothing uses them yet.

This commit incorporates a number of bug fixes contributed by Andrea
Canciani.
---
 pixman/Makefile.sources   |   1 +
 pixman/pixman-combine-float.c | 956 ++
 pixman/pixman-general.c   |   1 +
 pixman/pixman-private.h   |  10 +
 4 files changed, 968 insertions(+)
 create mode 100644 pixman/pixman-combine-float.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index cf7040f..96540ec 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -5,6 +5,7 @@ libpixman_sources = \
pixman-bits-image.c \
pixman-combine32.c  \
pixman-combine64.c  \
+   pixman-combine-float.c  \
pixman-conical-gradient.c   \
pixman-x86.c\
pixman-mips.c   \
diff --git a/pixman/pixman-combine-float.c b/pixman/pixman-combine-float.c
new file mode 100644
index 000..9617e24
--- /dev/null
+++ b/pixman/pixman-combine-float.c
@@ -0,0 +1,956 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2010, 2012 Soren Sandmann Pedersen
+ * Copyright © 2010, 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Soren Sandmann Pedersen (sandm...@cs.au.dk)
+ */
+
+#ifdef HAVE_CONFIG_H
+#include config.h
+#endif
+
+#include math.h
+#include string.h
+#include float.h
+
+#include pixman-private.h
+
+typedef float (* combine_channel_t) (float sa, float s, float da, float d);
+
+static force_inline void
+combine_inner (pixman_bool_t component,
+  float *dest, const float *src, const float *mask, int n_pixels,
+  combine_channel_t combine_a, combine_channel_t combine_c)
+{
+int i;
+
+if (!mask)
+{
+   for (i = 0; i  4 * n_pixels; i += 4)
+   {
+   float sa = src[i + 0];
+   float sr = src[i + 1];
+   float sg = src[i + 2];
+   float sb = src[i + 3];
+   
+   float da = dest[i + 0];
+   float dr = dest[i + 1];
+   float dg = dest[i + 2];
+   float db = dest[i + 3]; 
+   
+   dest[i + 0] = combine_a (sa, sa, da, da);
+   dest[i + 1] = combine_c (sa, sr, da, dr);
+   dest[i + 2] = combine_c (sa, sg, da, dg);
+   dest[i + 3] = combine_c (sa, sb, da, db);
+   }
+}
+else
+{
+   for (i = 0; i  4 * n_pixels; i += 4)
+   {
+   float sa, sr, sg, sb;
+   float ma, mr, mg, mb;
+   float da, dr, dg, db;
+   
+   sa = src[i + 0];
+   sr = src[i + 1];
+   sg = src[i + 2];
+   sb = src[i + 3];
+   
+   if (component)
+   {
+   ma = mask[i + 0];
+   mr = mask[i + 1];
+   mg = mask[i + 2];
+   mb = mask[i + 3];
+
+   sr *= mr;
+   sg *= mg;
+   sb *= mb;
+
+   ma *= sa;
+   mr *= sa;
+   mg *= sa;
+   mb *= sa;
+   
+   sa = ma;
+   }
+   else
+   {
+   ma = mask[i + 0];
+
+   sa *= ma;
+   sr *= ma;
+   sg *= ma;
+   sb *= ma;
+
+   ma = mr = mg = mb = sa;
+   }
+   
+   da = dest[i + 0];
+   dr = dest[i + 1];
+   dg = dest[i + 2];
+   db = dest[i + 3

[Pixman] [PATCH 5/8] pixman-access.c: Add floating point accessor functions

2012-08-25 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Three new function pointer fields are added to bits_image_t:

  fetch_scanline_float
  fetch_pixel_float
  store_scanline_float

similar to the existing 32 and 64 bit accessors. The fetcher_info_t
struct in pixman_access similarly gets a new get_scanline_float field.

For most formats, the new get_scanline_float field is set to a new
function fetch_scanline_generic_float() that first calls the 32 bit
fetcher uses the 32 bit scanline fetcher and then expands these pixels
to floating point.

For the 10 bpc formats, new floating point accessors are added that
use pixman_unorm_to_float() and pixman_float_to_unorm() to convert
back and forth.

The PIXMAN_a8r8g8b8_sRGB format is handled with a 256-entry table that
maps 8 bit sRGB channels to linear single precision floating point
numbers. The sRGB-linear direction can then be done with a simple
table lookup.

The other direction is currently done with 4096-entry table which
works fine for 16 bit integers, but not so great for floating
point. So instead this patch uses a binary search in the sRGB-linear
table. The existing 32 bit accessors for the sRGB format are also
converted to use this method.
---
 pixman/pixman-access.c | 619 ++---
 pixman/pixman-bits-image.c |  40 ++-
 pixman/pixman-private.h|   8 +
 3 files changed, 624 insertions(+), 43 deletions(-)

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 9feafc4..1eef621 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -31,6 +31,7 @@
 #include stdlib.h
 #include string.h
 #include assert.h
+#include math.h
 
 #include pixman-accessor.h
 #include pixman-private.h
@@ -635,6 +636,231 @@ fetch_scanline_x2b10g10r10 (pixman_image_t *image,
 }
 }
 
+/* Table mapping sRGB-encoded 8 bit numbers to linearly encoded
+ * floating point numbers. We assume that single precision
+ * floating point follows the IEEE 754 format.
+ */
+static const uint32_t to_linear_u[256] =
+{
+0x, 0x399f22b4, 0x3a1f22b4, 0x3a6eb40e, 0x3a9f22b4, 0x3ac6eb61,
+0x3aeeb40e, 0x3b0b3e5d, 0x3b1f22b4, 0x3b33070b, 0x3b46eb61, 0x3b5b518a,
+0x3b70f18a, 0x3b83e1c5, 0x3b8fe614, 0x3b9c87fb, 0x3ba9c9b5, 0x3bb7ad6d,
+0x3bc63547, 0x3bd5635f, 0x3be539bd, 0x3bf5ba70, 0x3c0373b5, 0x3c0c6152,
+0x3c15a703, 0x3c1f45bc, 0x3c293e68, 0x3c3391f4, 0x3c3e4149, 0x3c494d43,
+0x3c54b6c7, 0x3c607eb1, 0x3c6ca5df, 0x3c792d22, 0x3c830aa8, 0x3c89af9e,
+0x3c9085db, 0x3c978dc5, 0x3c9ec7c0, 0x3ca63432, 0x3cadd37d, 0x3cb5a601,
+0x3cbdac20, 0x3cc5e639, 0x3cce54ab, 0x3cd6f7d2, 0x3cdfd00e, 0x3ce8ddb9,
+0x3cf2212c, 0x3cfb9ac1, 0x3d02a569, 0x3d0798dc, 0x3d0ca7e4, 0x3d11d2ae,
+0x3d171963, 0x3d1c7c2e, 0x3d21fb3a, 0x3d2796af, 0x3d2d4ebb, 0x3d332380,
+0x3d39152b, 0x3d3f23e3, 0x3d454fd0, 0x3d4b991c, 0x3d51ffeb, 0x3d588466,
+0x3d5f26b7, 0x3d65e6fe, 0x3d6cc564, 0x3d73c210, 0x3d7add25, 0x3d810b65,
+0x3d84b793, 0x3d88732e, 0x3d8c3e48, 0x3d9018f4, 0x3d940343, 0x3d97fd48,
+0x3d9c0714, 0x3da020b9, 0x3da44a48, 0x3da883d6, 0x3daccd70, 0x3db12728,
+0x3db59110, 0x3dba0b38, 0x3dbe95b2, 0x3dc3308f, 0x3dc7dbe0, 0x3dcc97b4,
+0x3dd1641c, 0x3dd6412a, 0x3ddb2eec, 0x3de02d75, 0x3de53cd3, 0x3dea5d16,
+0x3def8e52, 0x3df4d091, 0x3dfa23e5, 0x3dff885e, 0x3e027f06, 0x3e05427f,
+0x3e080ea2, 0x3e0ae376, 0x3e0dc104, 0x3e10a752, 0x3e139669, 0x3e168e50,
+0x3e198f0e, 0x3e1c98ab, 0x3e1fab2e, 0x3e22c6a0, 0x3e25eb08, 0x3e29186a,
+0x3e2c4ed0, 0x3e2f8e42, 0x3e32d6c4, 0x3e362861, 0x3e39831e, 0x3e3ce702,
+0x3e405416, 0x3e43ca5e, 0x3e4749e4, 0x3e4ad2ae, 0x3e4e64c2, 0x3e520027,
+0x3e55a4e6, 0x3e595303, 0x3e5d0a8a, 0x3e60cb7c, 0x3e6495e0, 0x3e6869bf,
+0x3e6c4720, 0x3e702e08, 0x3e741e7f, 0x3e78188c, 0x3e7c1c34, 0x3e8014c0,
+0x3e822039, 0x3e84308b, 0x3e8645b8, 0x3e885fc3, 0x3e8a7eb0, 0x3e8ca281,
+0x3e8ecb3a, 0x3e90f8df, 0x3e932b72, 0x3e9562f6, 0x3e979f6f, 0x3e99e0e0,
+0x3e9c274e, 0x3e9e72b8, 0x3ea0c322, 0x3ea31892, 0x3ea57308, 0x3ea7d28a,
+0x3eaa3718, 0x3eaca0b7, 0x3eaf0f69, 0x3eb18332, 0x3eb3fc16, 0x3eb67a15,
+0x3eb8fd34, 0x3ebb8576, 0x3ebe12de, 0x3ec0a56e, 0x3ec33d2a, 0x3ec5da14,
+0x3ec87c30, 0x3ecb2380, 0x3ecdd008, 0x3ed081ca, 0x3ed338c9, 0x3ed5f508,
+0x3ed8b68a, 0x3edb7d52, 0x3ede4962, 0x3ee11abe, 0x3ee3f168, 0x3ee6cd64,
+0x3ee9aeb6, 0x3eec955d, 0x3eef815d, 0x3ef272ba, 0x3ef56976, 0x3ef86594,
+0x3efb6717, 0x3efe6e02, 0x3f00bd2b, 0x3f02460c, 0x3f03d1a5, 0x3f055ff8,
+0x3f06f105, 0x3f0884ce, 0x3f0a1b54, 0x3f0bb499, 0x3f0d509f, 0x3f0eef65,
+0x3f1090ef, 0x3f12353c, 0x3f13dc50, 0x3f15862a, 0x3f1732cc, 0x3f18e237,
+0x3f1a946d, 0x3f1c4970, 0x3f1e013f, 0x3f1fbbde, 0x3f21794c, 0x3f23398c,
+0x3f24fca0, 0x3f26c286, 0x3f288b42, 0x3f2a56d3, 0x3f2c253d, 0x3f2df680,
+0x3f2fca9d, 0x3f31a195, 0x3f337b6a, 0x3f35581e, 0x3f3737b1, 0x3f391a24,
+0x3f3aff7a, 0x3f3ce7b2, 0x3f3ed2d0, 0x3f40c0d2, 0x3f42b1bc, 0x3f44a58e,
+0x3f469c49, 0x3f4895ee

[Pixman] [PATCH 01/10] pixman-cpu.c: Rename disabled to _pixman_disabled() and export it

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

---
 pixman/pixman-cpu.c |   22 +++---
 pixman/pixman-private.h |2 ++
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index aa9036f..a0d2f8c 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -729,8 +729,8 @@ pixman_have_sse2 (void)
 #endif /* __amd64__ */
 #endif
 
-static pixman_bool_t
-disabled (const char *name)
+pixman_bool_t
+_pixman_disabled (const char *name)
 {
 const char *env;
 
@@ -767,44 +767,44 @@ _pixman_choose_implementation (void)
 
 imp = _pixman_implementation_create_general();
 
-if (!disabled (fast))
+if (!_pixman_disabled (fast))
imp = _pixman_implementation_create_fast_path (imp);
 
 #ifdef USE_X86_MMX
-if (!disabled (mmx)  pixman_have_mmx ())
+if (!_pixman_disabled (mmx)  pixman_have_mmx ())
imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_SSE2
-if (!disabled (sse2)  pixman_have_sse2 ())
+if (!_pixman_disabled (sse2)  pixman_have_sse2 ())
imp = _pixman_implementation_create_sse2 (imp);
 #endif
 
 #ifdef USE_ARM_SIMD
-if (!disabled (arm-simd)  pixman_have_arm_simd ())
+if (!_pixman_disabled (arm-simd)  pixman_have_arm_simd ())
imp = _pixman_implementation_create_arm_simd (imp);
 #endif
 
 #ifdef USE_ARM_IWMMXT
-if (!disabled (arm-iwmmxt)  pixman_have_arm_iwmmxt ())
+if (!_pixman_disabled (arm-iwmmxt)  pixman_have_arm_iwmmxt ())
imp = _pixman_implementation_create_mmx (imp);
 #endif
 #ifdef USE_LOONGSON_MMI
-if (!disabled (loongson-mmi)  pixman_have_loongson_mmi ())
+if (!_pixman_disabled (loongson-mmi)  pixman_have_loongson_mmi ())
imp = _pixman_implementation_create_mmx (imp);
 #endif
 #ifdef USE_ARM_NEON
-if (!disabled (arm-neon)  pixman_have_arm_neon ())
+if (!_pixman_disabled (arm-neon)  pixman_have_arm_neon ())
imp = _pixman_implementation_create_arm_neon (imp);
 #endif
 
 #ifdef USE_MIPS_DSPR2
-if (!disabled (mips-dspr2)  pixman_have_mips_dspr2 ())
+if (!_pixman_disabled (mips-dspr2)  pixman_have_mips_dspr2 ())
imp = _pixman_implementation_create_mips_dspr2 (imp);
 #endif
 
 #ifdef USE_VMX
-if (!disabled (vmx)  pixman_have_vmx ())
+if (!_pixman_disabled (vmx)  pixman_have_vmx ())
imp = _pixman_implementation_create_vmx (imp);
 #endif
 
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 72e3b4f..89020c9 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -574,6 +574,8 @@ _pixman_implementation_create_vmx (pixman_implementation_t 
*fallback);
 pixman_implementation_t *
 _pixman_choose_implementation (void);
 
+pixman_bool_t
+_pixman_disabled (const char *name);
 
 
 /*
-- 
1.7.10.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 00/10] Cleanups to CPU detection

2012-06-29 Thread Søren Sandmann Pedersen
git://people.freedesktop.org/~sandmann/pixman

in the branch cpudetectfiles.

Hi,

The following patches contains some cleanups to the CPU detection in
general, and some improvements to the x86 specific parts in particular.

I was looking at making use of some of the newer x86 SIMD instruction
sets and realized that (a) we don't ever call cpuid on x86-64, we just
assume that MMX and SSE2 are present, and (b) pixman-cpu.c is a royal
mess.

The following patches split pixman-cpu.c into four different files:
pixman-arm.c, pixman-mips.c, pixman-ppc.c, and pixman-x86.c. All the
files are still compiled on all arhicitectures, but they have #ifdefs
in them that make them no-ops on the ones that they are not specific
to. The remaining bits of pixman-cpu.c are moved into
pixman-implementation.c

There are also some cleanups to the logic for all architectures. In
particular, all the have_feature() functions are gone and replaced
with a single function that detects all the features that the CPU
offers. This function is implemented by each #ifdef variation, and
then this is called from shared code.

The changes to x86 are the most involved. There is now a
pixman_cpuid() function that uses inline assembly on GCC and the
cpuid__ intrinsic on MSVC. The assembly is written such that it will
work on both 32 and 64 bit; the main change required was the save %ebx
in %esi instead of on the stack.

There is also a have_cpuid() function that detects the presence of
cpuid. On MSVC, this simply returns TRUE, so the an MSVC-compiled
pixman will now not work on old 486s. I am very tempted to remove this
on GCC as well and just require cpuid to be present for pixman work.

These two functions together make it possible to write the CPU
detection code in plain C, rather than the #ifdef ridden mess of
assembly it used to be.

I have tested the patches on ppc64, x86-64, x86-32, and on an ARM
Cortex A8 running Linux, but more testing would definitely be
appreciated. In particular if you use MSVC, MIPS, XO-1, or ARM on
Android/iPhone.


Thanks,
Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 07/10] Simplify MIPS CPU detection

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

There is no reason to have pixman_have_feature functions when all
they do is call pixman_have_mips_feature().

Instead rename pixman_have_mips_feature() to have_feature() and call
it directly from _pixman_mips_get_implementations(). Also on
non-Linux, just make have_feature() return FALSE.
---
 pixman/pixman-mips.c |   44 +---
 1 file changed, 9 insertions(+), 35 deletions(-)

diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
index 9d3ee59..2b280c6 100644
--- a/pixman/pixman-mips.c
+++ b/pixman/pixman-mips.c
@@ -30,21 +30,18 @@
 #include string.h
 #include stdlib.h
 
-#if defined (__linux__) /* linux ELF */
-
 static pixman_bool_t
-pixman_have_mips_feature (const char *search_string)
+have_feature (const char *search_string)
 {
-const char *file_name = /proc/cpuinfo;
+#if defined (__linux__) /* linux ELF */
 /* Simple detection of MIPS features at runtime for Linux.
  * It is based on /proc/cpuinfo, which reveals hardware configuration
  * to user-space applications.  According to MIPS (early 2010), no similar
  * facility is universally available on the MIPS architectures, so it's up
  * to individual OSes to provide such.
  */
-
+const char *file_name = /proc/cpuinfo;
 char cpuinfo_line[256];
-
 FILE *f = NULL;
 
 if ((f = fopen (file_name, r)) == NULL)
@@ -60,51 +57,28 @@ pixman_have_mips_feature (const char *search_string)
 }
 
 fclose (f);
+#endif
 
-/* Did not find string in the proc file. */
+/* Did not find string in the proc file, or not Linux ELF. */
 return FALSE;
 }
 
-#if defined(USE_MIPS_DSPR2)
-pixman_bool_t
-pixman_have_mips_dspr2 (void)
-{
- /* Only currently available MIPS core that supports DSPr2 is 74K. */
-return pixman_have_mips_feature (MIPS 74K);
-}
 #endif
 
-#if defined(USE_LOONGSON_MMI)
-pixman_bool_t
-pixman_have_loongson_mmi (void)
-{
-/* I really don't know if some Loongson CPUs don't have MMI. */
-return pixman_have_mips_feature (Loongson);
-}
-#endif
-
-#else /* linux ELF */
-
-#define pixman_have_mips_dspr2() FALSE
-#define pixman_have_loongson_mmi() FALSE
-
-#endif /* linux ELF */
-
-#endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */
-
 pixman_implementation_t *
 _pixman_mips_get_implementations (pixman_implementation_t *imp)
 {
 #ifdef USE_LOONGSON_MMI
-if (!_pixman_disabled (loongson-mmi)  pixman_have_loongson_mmi ())
+/* I really don't know if some Loongson CPUs don't have MMI. */
+if (!_pixman_disabled (loongson-mmi)  have_feature (Loongson))
imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_MIPS_DSPR2
-if (!_pixman_disabled (mips-dspr2)  pixman_have_mips_dspr2 ())
+/* Only currently available MIPS core that supports DSPr2 is 74K. */
+if (!_pixman_disabled (mips-dspr2)  have_feature (MIPS 74K))
imp = _pixman_implementation_create_mips_dspr2 (imp);
 #endif
 
 return imp;
 }
-
-- 
1.7.10.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 03/10] Move ARM specific CPU detection to a new file pixman-arm.c

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Similar to the x86 commit, this moves the ARM specific CPU detection
to its own file which exports a pixman_arm_get_implementations()
function that is supposed to be a noop on non-ARM.
---
 pixman/Makefile.sources |1 +
 pixman/pixman-arm.c |  295 +++
 pixman/pixman-cpu.c |  254 +---
 pixman/pixman-private.h |3 +
 4 files changed, 300 insertions(+), 253 deletions(-)
 create mode 100644 pixman/pixman-arm.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 4e0137a..7f2b75f 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -8,6 +8,7 @@ libpixman_sources = \
pixman-conical-gradient.c   \
pixman-cpu.c\
pixman-x86.c\
+   pixman-arm.c\
pixman-edge.c   \
pixman-edge-accessors.c \
pixman-fast-path.c  \
diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c
new file mode 100644
index 000..6625d7f
--- /dev/null
+++ b/pixman/pixman-arm.c
@@ -0,0 +1,295 @@
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided as is
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include config.h
+#endif
+
+#include pixman-private.h
+
+#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
+
+#include string.h
+#include stdlib.h
+
+#if defined(USE_ARM_SIMD)  defined(_MSC_VER)
+/* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
+#include windows.h
+#endif
+
+#if defined(__APPLE__)
+#include TargetConditionals.h
+#endif
+
+#if defined(_MSC_VER)
+
+#if defined(USE_ARM_SIMD)
+extern int pixman_msvc_try_arm_simd_op ();
+
+pixman_bool_t
+pixman_have_arm_simd (void)
+{
+static pixman_bool_t initialized = FALSE;
+static pixman_bool_t have_arm_simd = FALSE;
+
+if (!initialized)
+{
+   __try {
+   pixman_msvc_try_arm_simd_op ();
+   have_arm_simd = TRUE;
+   } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
+   have_arm_simd = FALSE;
+   }
+   initialized = TRUE;
+}
+
+return have_arm_simd;
+}
+
+#endif /* USE_ARM_SIMD */
+
+#if defined(USE_ARM_NEON)
+extern int pixman_msvc_try_arm_neon_op ();
+
+pixman_bool_t
+pixman_have_arm_neon (void)
+{
+static pixman_bool_t initialized = FALSE;
+static pixman_bool_t have_arm_neon = FALSE;
+
+if (!initialized)
+{
+   __try
+   {
+   pixman_msvc_try_arm_neon_op ();
+   have_arm_neon = TRUE;
+   }
+   __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
+   {
+   have_arm_neon = FALSE;
+   }
+   initialized = TRUE;
+}
+
+return have_arm_neon;
+}
+
+#endif /* USE_ARM_NEON */
+
+#elif (defined (__APPLE__)  defined(TARGET_OS_IPHONE)) /* iOS 
(iPhone/iPad/iPod touch) */
+
+/* Detection of ARM NEON on iOS is fairly simple because iOS binaries
+ * contain separate executable images for each processor architecture.
+ * So all we have to do is detect the armv7 architecture build. The
+ * operating system automatically runs the armv7 binary for armv7 devices
+ * and the armv6 binary for armv6 devices.
+ */
+
+pixman_bool_t
+pixman_have_arm_simd (void)
+{
+#if defined(USE_ARM_SIMD)
+return TRUE;
+#else
+return FALSE;
+#endif
+}
+
+pixman_bool_t
+pixman_have_arm_neon (void)
+{
+#if defined(USE_ARM_NEON)  defined(__ARM_NEON__)
+/* This is an armv7 cpu build */
+return TRUE;
+#else
+/* This is an armv6 cpu build */
+return FALSE;
+#endif
+}
+
+pixman_bool_t
+pixman_have_arm_iwmmxt (void)
+{
+#if defined(USE_ARM_IWMMXT)
+return FALSE;
+#else
+return FALSE;
+#endif
+}
+
+#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux 
ELF or ANDROID */
+
+static pixman_bool_t

[Pixman] [PATCH 02/10] Move x86 specific CPU detection to a new file pixman-x86.c

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Extract the x86 specific parts of pixman-cpu.c and put them in their
own file called pixman-x86.c which exports one function
pixman_x86_get_implementations() that creates the MMX and SSE2
implementations. This file is supposed to be compiled on all
architectures, but pixman_x86_get_implementations() should be a noop
on non-x86.
---
 pixman/Makefile.sources |1 +
 pixman/pixman-cpu.c |  250 +
 pixman/pixman-private.h |6 +
 pixman/pixman-x86.c |  282 +++
 4 files changed, 291 insertions(+), 248 deletions(-)
 create mode 100644 pixman/pixman-x86.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 11f959d..4e0137a 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -7,6 +7,7 @@ libpixman_sources = \
pixman-combine64.c  \
pixman-conical-gradient.c   \
pixman-cpu.c\
+   pixman-x86.c\
pixman-edge.c   \
pixman-edge-accessors.c \
pixman-fast-path.c  \
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index a0d2f8c..0bfc90f 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -491,244 +491,6 @@ pixman_have_loongson_mmi (void)
 
 #endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */
 
-#if defined(USE_X86_MMX) || defined(USE_SSE2)
-/* The CPU detection code needs to be in a file not compiled with
- * -mmmx -msse, as gcc would generate CMOV instructions otherwise
- * that would lead to SIGILL instructions on old CPUs that don't have
- * it.
- */
-#if !defined(__amd64__)  !defined(__x86_64__)  !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include sys/auxv.h
-#endif
-
-typedef enum
-{
-NO_FEATURES = 0,
-MMX = 0x1,
-MMX_EXTENSIONS = 0x2,
-SSE = 0x6,
-SSE2 = 0x8,
-CMOV = 0x10
-} cpu_features_t;
-
-
-static unsigned int
-detect_cpu_features (void)
-{
-unsigned int features = 0;
-unsigned int result = 0;
-
-#ifdef HAVE_GETISAX
-if (getisax (result, 1))
-{
-   if (result  AV_386_CMOV)
-   features |= CMOV;
-   if (result  AV_386_MMX)
-   features |= MMX;
-   if (result  AV_386_AMD_MMX)
-   features |= MMX_EXTENSIONS;
-   if (result  AV_386_SSE)
-   features |= SSE;
-   if (result  AV_386_SSE2)
-   features |= SSE2;
-}
-#else
-char vendor[13];
-#ifdef _MSC_VER
-int vendor0 = 0, vendor1, vendor2;
-#endif
-vendor[0] = 0;
-vendor[12] = 0;
-
-#ifdef __GNUC__
-/* see p. 118 of amd64 instruction set manual Vol3 */
-/* We need to be careful about the handling of %ebx and
- * %esp here. We can't declare either one as clobbered
- * since they are special registers (%ebx is the PIC
- * register holding an offset to global data, %esp the
- * stack pointer), so we need to make sure they have their
- * original values when we access the output operands.
- */
-__asm__ (
-pushf\n
-pop %%eax\n
-mov %%eax, %%ecx\n
-xor $0x0020, %%eax\n
-push %%eax\n
-popf\n
-pushf\n
-pop %%eax\n
-mov $0x0, %%edx\n
-xor %%ecx, %%eax\n
-jz 1f\n
-
-mov $0x, %%eax\n
-push %%ebx\n
-cpuid\n
-mov %%ebx, %%eax\n
-pop %%ebx\n
-mov %%eax, %1\n
-mov %%edx, %2\n
-mov %%ecx, %3\n
-mov $0x0001, %%eax\n
-push %%ebx\n
-cpuid\n
-pop %%ebx\n
-1:\n
-mov %%edx, %0\n
-   : =r (result),
-=m (vendor[0]),
-=m (vendor[4]),
-=m (vendor[8])
-   :
-   : %eax, %ecx, %edx
-);
-
-#elif defined (_MSC_VER)
-
-_asm {
-   pushfd
-   pop eax
-   mov ecx, eax
-   xor eax, 0020h
-   push eax
-   popfd
-   pushfd
-   pop eax
-   mov edx, 0
-   xor eax, ecx
-   jz nocpuid
-
-   mov eax, 0
-   push ebx
-   cpuid
-   mov eax, ebx
-   pop ebx
-   mov vendor0, eax
-   mov vendor1, edx
-   mov vendor2, ecx
-   mov eax, 1
-   push ebx
-   cpuid
-   pop ebx
-nocpuid:
-   mov result, edx
-}
-memmove (vendor + 0, vendor0, 4);
-memmove (vendor + 4, vendor1, 4);
-memmove (vendor + 8, vendor2, 4);
-
-#else
-#   error unsupported compiler
-#endif
-
-features = 0;
-if (result)
-{
-   /* result now contains the standard feature bits */
-   if (result  (1  15))
-   features |= CMOV;
-   if (result  (1  23))
-   features |= MMX;
-   if (result  (1  25))
-   features |= SSE;
-   if (result  (1  26))
-   features |= SSE2;
-   if ((features  MMX)  !(features  SSE) 
-   (strcmp (vendor, AuthenticAMD) == 0 ||
-strcmp (vendor, Geode by NSC) == 0

[Pixman] [PATCH 09/10] Simplifications to ARM CPU detection

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Organize pixman-arm.c such that each operating system/compiler exports
a detect_cpu_features() function that returns a bitmask with the
various features that we are interested in. A new function
have_feature() then calls this function, caches the result, and return
whether the given feature is available.

The result is that all the pixman_have_arm_feature functions become
redundant and can be deleted.
---
 pixman/pixman-arm.c |  244 ++-
 1 file changed, 87 insertions(+), 157 deletions(-)

diff --git a/pixman/pixman-arm.c b/pixman/pixman-arm.c
index 6625d7f..23374e4 100644
--- a/pixman/pixman-arm.c
+++ b/pixman/pixman-arm.c
@@ -25,132 +25,83 @@
 
 #include pixman-private.h
 
+typedef enum
+{
+ARM_V7 = (1  0),
+ARM_V6 = (1  1),
+ARM_VFP= (1  2),
+ARM_NEON   = (1  3),
+ARM_IWMMXT = (1  4)
+} arm_cpu_features_t;
+
 #if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
 
-#include string.h
-#include stdlib.h
+#if defined(_MSC_VER)
 
-#if defined(USE_ARM_SIMD)  defined(_MSC_VER)
 /* Needed for EXCEPTION_ILLEGAL_INSTRUCTION */
 #include windows.h
-#endif
 
-#if defined(__APPLE__)
-#include TargetConditionals.h
-#endif
-
-#if defined(_MSC_VER)
-
-#if defined(USE_ARM_SIMD)
+extern int pixman_msvc_try_arm_neon_op ();
 extern int pixman_msvc_try_arm_simd_op ();
 
-pixman_bool_t
-pixman_have_arm_simd (void)
+static arm_cpu_features_t
+detect_cpu_features (void)
 {
-static pixman_bool_t initialized = FALSE;
-static pixman_bool_t have_arm_simd = FALSE;
+arm_cpu_features_t features = 0;
 
-if (!initialized)
+__try
+{
+   pixman_msvc_try_arm_simd_op ();
+   features |= ARM_V6;
+}
+__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
 {
-   __try {
-   pixman_msvc_try_arm_simd_op ();
-   have_arm_simd = TRUE;
-   } __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
-   have_arm_simd = FALSE;
-   }
-   initialized = TRUE;
 }
 
-return have_arm_simd;
-}
-
-#endif /* USE_ARM_SIMD */
-
-#if defined(USE_ARM_NEON)
-extern int pixman_msvc_try_arm_neon_op ();
-
-pixman_bool_t
-pixman_have_arm_neon (void)
-{
-static pixman_bool_t initialized = FALSE;
-static pixman_bool_t have_arm_neon = FALSE;
-
-if (!initialized)
+__try
+{
+   pixman_msvc_try_arm_neon_op ();
+   features |= ARM_NEON;
+}
+__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
 {
-   __try
-   {
-   pixman_msvc_try_arm_neon_op ();
-   have_arm_neon = TRUE;
-   }
-   __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
-   {
-   have_arm_neon = FALSE;
-   }
-   initialized = TRUE;
 }
 
-return have_arm_neon;
+return features;
 }
 
-#endif /* USE_ARM_NEON */
-
-#elif (defined (__APPLE__)  defined(TARGET_OS_IPHONE)) /* iOS 
(iPhone/iPad/iPod touch) */
-
-/* Detection of ARM NEON on iOS is fairly simple because iOS binaries
- * contain separate executable images for each processor architecture.
- * So all we have to do is detect the armv7 architecture build. The
- * operating system automatically runs the armv7 binary for armv7 devices
- * and the armv6 binary for armv6 devices.
- */
+#elif defined(__APPLE__)  defined(TARGET_OS_IPHONE) /* iOS */
 
-pixman_bool_t
-pixman_have_arm_simd (void)
-{
-#if defined(USE_ARM_SIMD)
-return TRUE;
-#else
-return FALSE;
-#endif
-}
+#include TargetConditionals.h
 
-pixman_bool_t
-pixman_have_arm_neon (void)
+static arm_cpu_features_t
+detect_cpu_features (void)
 {
-#if defined(USE_ARM_NEON)  defined(__ARM_NEON__)
-/* This is an armv7 cpu build */
-return TRUE;
-#else
-/* This is an armv6 cpu build */
-return FALSE;
+arm_cpu_features_t features = 0;
+
+features |= ARM_V6;
+
+/* Detection of ARM NEON on iOS is fairly simple because iOS binaries
+ * contain separate executable images for each processor architecture.
+ * So all we have to do is detect the armv7 architecture build. The
+ * operating system automatically runs the armv7 binary for armv7 devices
+ * and the armv6 binary for armv6 devices.
+ */
+#if defined(__ARM_NEON__)
+features |= ARM_NEON;
 #endif
-}
 
-pixman_bool_t
-pixman_have_arm_iwmmxt (void)
-{
-#if defined(USE_ARM_IWMMXT)
-return FALSE;
-#else
-return FALSE;
-#endif
+return features;
 }
 
-#elif defined (__linux__) || defined(__ANDROID__) || defined(ANDROID) /* linux 
ELF or ANDROID */
-
-static pixman_bool_t arm_has_v7 = FALSE;
-static pixman_bool_t arm_has_v6 = FALSE;
-static pixman_bool_t arm_has_vfp = FALSE;
-static pixman_bool_t arm_has_neon = FALSE;
-static pixman_bool_t arm_has_iwmmxt = FALSE;
-static pixman_bool_t arm_tests_initialized = FALSE;
-
-#if defined(__ANDROID__) || defined(ANDROID) /* Android device

[Pixman] [PATCH 05/10] Move MIPS specific CPU detection to its own file, pixman-mips.c

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

---
 pixman/Makefile.sources |1 +
 pixman/pixman-cpu.c |   77 +
 pixman/pixman-mips.c|  110 +++
 pixman/pixman-private.h |3 ++
 4 files changed, 115 insertions(+), 76 deletions(-)
 create mode 100644 pixman/pixman-mips.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 414ac02..73758ff 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -8,6 +8,7 @@ libpixman_sources = \
pixman-conical-gradient.c   \
pixman-cpu.c\
pixman-x86.c\
+   pixman-mips.c   \
pixman-arm.c\
pixman-ppc.c\
pixman-edge.c   \
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 914f116..5cef480 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -22,76 +22,10 @@
 #ifdef HAVE_CONFIG_H
 #include config.h
 #endif
-
-#include string.h
 #include stdlib.h
 
 #include pixman-private.h
 
-#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI)
-
-#if defined (__linux__) /* linux ELF */
-
-static pixman_bool_t
-pixman_have_mips_feature (const char *search_string)
-{
-const char *file_name = /proc/cpuinfo;
-/* Simple detection of MIPS features at runtime for Linux.
- * It is based on /proc/cpuinfo, which reveals hardware configuration
- * to user-space applications.  According to MIPS (early 2010), no similar
- * facility is universally available on the MIPS architectures, so it's up
- * to individual OSes to provide such.
- */
-
-char cpuinfo_line[256];
-
-FILE *f = NULL;
-
-if ((f = fopen (file_name, r)) == NULL)
-return FALSE;
-
-while (fgets (cpuinfo_line, sizeof (cpuinfo_line), f) != NULL)
-{
-if (strstr (cpuinfo_line, search_string) != NULL)
-{
-fclose (f);
-return TRUE;
-}
-}
-
-fclose (f);
-
-/* Did not find string in the proc file. */
-return FALSE;
-}
-
-#if defined(USE_MIPS_DSPR2)
-pixman_bool_t
-pixman_have_mips_dspr2 (void)
-{
- /* Only currently available MIPS core that supports DSPr2 is 74K. */
-return pixman_have_mips_feature (MIPS 74K);
-}
-#endif
-
-#if defined(USE_LOONGSON_MMI)
-pixman_bool_t
-pixman_have_loongson_mmi (void)
-{
-/* I really don't know if some Loongson CPUs don't have MMI. */
-return pixman_have_mips_feature (Loongson);
-}
-#endif
-
-#else /* linux ELF */
-
-#define pixman_have_mips_dspr2() FALSE
-#define pixman_have_loongson_mmi() FALSE
-
-#endif /* linux ELF */
-
-#endif /* USE_MIPS_DSPR2 || USE_LOONGSON_MMI */
-
 pixman_bool_t
 _pixman_disabled (const char *name)
 {
@@ -136,16 +70,7 @@ _pixman_choose_implementation (void)
 imp = _pixman_x86_get_implementations (imp);
 imp = _pixman_arm_get_implementations (imp);
 imp = _pixman_ppc_get_implementations (imp);
-
-#ifdef USE_LOONGSON_MMI
-if (!_pixman_disabled (loongson-mmi)  pixman_have_loongson_mmi ())
-   imp = _pixman_implementation_create_mmx (imp);
-#endif
-
-#ifdef USE_MIPS_DSPR2
-if (!_pixman_disabled (mips-dspr2)  pixman_have_mips_dspr2 ())
-   imp = _pixman_implementation_create_mips_dspr2 (imp);
-#endif
+imp = _pixman_mips_get_implementations (imp);
 
 imp = _pixman_implementation_create_noop (imp);
 
diff --git a/pixman/pixman-mips.c b/pixman/pixman-mips.c
new file mode 100644
index 000..9d3ee59
--- /dev/null
+++ b/pixman/pixman-mips.c
@@ -0,0 +1,110 @@
+/*
+ * Copyright © 2000 SuSE, Inc.
+ * Copyright © 2007 Red Hat, Inc.
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of SuSE not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  SuSE makes no representations about the
+ * suitability of this software for any purpose.  It is provided as is
+ * without express or implied warranty.
+ *
+ * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
+ * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include config.h
+#endif
+
+#include pixman-private.h
+
+#if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI)
+
+#include string.h
+#include stdlib.h
+
+#if defined

[Pixman] [PATCH 06/10] Move the remaining bits of pixman-cpu into pixman-implementation.c

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

---
 pixman/Makefile.sources|1 -
 pixman/pixman-cpu.c|   79 
 pixman/pixman-implementation.c |   51 ++
 3 files changed, 51 insertions(+), 80 deletions(-)
 delete mode 100644 pixman/pixman-cpu.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 73758ff..6472994 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -6,7 +6,6 @@ libpixman_sources = \
pixman-combine32.c  \
pixman-combine64.c  \
pixman-conical-gradient.c   \
-   pixman-cpu.c\
pixman-x86.c\
pixman-mips.c   \
pixman-arm.c\
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
deleted file mode 100644
index 5cef480..000
--- a/pixman/pixman-cpu.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright © 2000 SuSE, Inc.
- * Copyright © 2007 Red Hat, Inc.
- *
- * Permission to use, copy, modify, distribute, and sell this software and its
- * documentation for any purpose is hereby granted without fee, provided that
- * the above copyright notice appear in all copies and that both that
- * copyright notice and this permission notice appear in supporting
- * documentation, and that the name of SuSE not be used in advertising or
- * publicity pertaining to distribution of the software without specific,
- * written prior permission.  SuSE makes no representations about the
- * suitability of this software for any purpose.  It is provided as is
- * without express or implied warranty.
- *
- * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
- * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-#ifdef HAVE_CONFIG_H
-#include config.h
-#endif
-#include stdlib.h
-
-#include pixman-private.h
-
-pixman_bool_t
-_pixman_disabled (const char *name)
-{
-const char *env;
-
-if ((env = getenv (PIXMAN_DISABLE)))
-{
-   do
-   {
-   const char *end;
-   int len;
-
-   if ((end = strchr (env, ' ')))
-   len = end - env;
-   else
-   len = strlen (env);
-
-   if (strlen (name) == len  strncmp (name, env, len) == 0)
-   {
-   printf (pixman: Disabled %s implementation\n, name);
-   return TRUE;
-   }
-
-   env += len;
-   }
-   while (*env++);
-}
-
-return FALSE;
-}
-
-pixman_implementation_t *
-_pixman_choose_implementation (void)
-{
-pixman_implementation_t *imp;
-
-imp = _pixman_implementation_create_general();
-
-if (!_pixman_disabled (fast))
-   imp = _pixman_implementation_create_fast_path (imp);
-
-imp = _pixman_x86_get_implementations (imp);
-imp = _pixman_arm_get_implementations (imp);
-imp = _pixman_ppc_get_implementations (imp);
-imp = _pixman_mips_get_implementations (imp);
-
-imp = _pixman_implementation_create_noop (imp);
-
-return imp;
-}
-
diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index c769ab8..77d0906 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -223,3 +223,54 @@ _pixman_implementation_dest_iter_init 
(pixman_implementation_t *imp,
 
 (*imp-dest_iter_init) (imp, iter);
 }
+
+pixman_bool_t
+_pixman_disabled (const char *name)
+{
+const char *env;
+
+if ((env = getenv (PIXMAN_DISABLE)))
+{
+   do
+   {
+   const char *end;
+   int len;
+
+   if ((end = strchr (env, ' ')))
+   len = end - env;
+   else
+   len = strlen (env);
+
+   if (strlen (name) == len  strncmp (name, env, len) == 0)
+   {
+   printf (pixman: Disabled %s implementation\n, name);
+   return TRUE;
+   }
+
+   env += len;
+   }
+   while (*env++);
+}
+
+return FALSE;
+}
+
+pixman_implementation_t *
+_pixman_choose_implementation (void)
+{
+pixman_implementation_t *imp;
+
+imp = _pixman_implementation_create_general();
+
+if (!_pixman_disabled (fast))
+   imp = _pixman_implementation_create_fast_path (imp);
+
+imp = _pixman_x86_get_implementations (imp);
+imp = _pixman_arm_get_implementations (imp);
+imp = _pixman_ppc_get_implementations (imp);
+imp = _pixman_mips_get_implementations (imp);
+
+imp = _pixman_implementation_create_noop (imp);
+
+return imp;
+}
-- 
1.7.10.4

___
Pixman mailing list
Pixman

[Pixman] [PATCH 04/10] Move PowerPC specific CPU detection to its own file pixman-ppc.c

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

---
 pixman/Makefile.sources |1 +
 pixman/pixman-cpu.c |  165 +---
 pixman/pixman-ppc.c |  192 +++
 pixman/pixman-private.h |3 +
 4 files changed, 197 insertions(+), 164 deletions(-)
 create mode 100644 pixman/pixman-ppc.c

diff --git a/pixman/Makefile.sources b/pixman/Makefile.sources
index 7f2b75f..414ac02 100644
--- a/pixman/Makefile.sources
+++ b/pixman/Makefile.sources
@@ -9,6 +9,7 @@ libpixman_sources = \
pixman-cpu.c\
pixman-x86.c\
pixman-arm.c\
+   pixman-ppc.c\
pixman-edge.c   \
pixman-edge-accessors.c \
pixman-fast-path.c  \
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 319d71f..914f116 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -26,167 +26,8 @@
 #include string.h
 #include stdlib.h
 
-#if defined(__APPLE__)
-#include TargetConditionals.h
-#endif
-
 #include pixman-private.h
 
-#ifdef USE_VMX
-
-/* The CPU detection code needs to be in a file not compiled with
- * -maltivec -mabi=altivec, as gcc would try to save vector register
- * across function calls causing SIGILL on cpus without Altivec/vmx.
- */
-static pixman_bool_t initialized = FALSE;
-static volatile pixman_bool_t have_vmx = TRUE;
-
-#ifdef __APPLE__
-#include sys/sysctl.h
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-if (!initialized)
-{
-   size_t length = sizeof(have_vmx);
-   int error =
-   sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0);
-
-   if (error)
-   have_vmx = FALSE;
-
-   initialized = TRUE;
-}
-return have_vmx;
-}
-
-#elif defined (__OpenBSD__)
-#include sys/param.h
-#include sys/sysctl.h
-#include machine/cpu.h
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-if (!initialized)
-{
-   int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
-   size_t length = sizeof(have_vmx);
-   int error =
-   sysctl (mib, 2, have_vmx, length, NULL, 0);
-
-   if (error != 0)
-   have_vmx = FALSE;
-
-   initialized = TRUE;
-}
-return have_vmx;
-}
-
-#elif defined (__linux__)
-#include sys/types.h
-#include sys/stat.h
-#include fcntl.h
-#include unistd.h
-#include stdio.h
-#include linux/auxvec.h
-#include asm/cputable.h
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-if (!initialized)
-{
-   char fname[64];
-   unsigned long buf[64];
-   ssize_t count = 0;
-   pid_t pid;
-   int fd, i;
-
-   pid = getpid ();
-   snprintf (fname, sizeof(fname) - 1, /proc/%d/auxv, pid);
-
-   fd = open (fname, O_RDONLY);
-   if (fd = 0)
-   {
-   for (i = 0; i = (count / sizeof(unsigned long)); i += 2)
-   {
-   /* Read more if buf is empty... */
-   if (i == (count / sizeof(unsigned long)))
-   {
-   count = read (fd, buf, sizeof(buf));
-   if (count = 0)
-   break;
-   i = 0;
-   }
-
-   if (buf[i] == AT_HWCAP)
-   {
-   have_vmx = !!(buf[i + 1]  PPC_FEATURE_HAS_ALTIVEC);
-   initialized = TRUE;
-   break;
-   }
-   else if (buf[i] == AT_NULL)
-   {
-   break;
-   }
-   }
-   close (fd);
-   }
-}
-if (!initialized)
-{
-   /* Something went wrong. Assume 'no' rather than playing
-  fragile tricks with catching SIGILL. */
-   have_vmx = FALSE;
-   initialized = TRUE;
-}
-
-return have_vmx;
-}
-
-#else /* !__APPLE__  !__OpenBSD__  !__linux__ */
-#include signal.h
-#include setjmp.h
-
-static jmp_buf jump_env;
-
-static void
-vmx_test (intsig,
- siginfo_t *si,
- void * unused)
-{
-longjmp (jump_env, 1);
-}
-
-static pixman_bool_t
-pixman_have_vmx (void)
-{
-struct sigaction sa, osa;
-int jmp_result;
-
-if (!initialized)
-{
-   sa.sa_flags = SA_SIGINFO;
-   sigemptyset (sa.sa_mask);
-   sa.sa_sigaction = vmx_test;
-   sigaction (SIGILL, sa, osa);
-   jmp_result = setjmp (jump_env);
-   if (jmp_result == 0)
-   {
-   asm volatile ( vor 0, 0, 0 );
-   }
-   sigaction (SIGILL, osa, NULL);
-   have_vmx = (jmp_result == 0);
-   initialized = TRUE;
-}
-return have_vmx;
-}
-
-#endif /* __APPLE__ */
-#endif /* USE_VMX */
-
 #if defined(USE_MIPS_DSPR2) || defined(USE_LOONGSON_MMI)
 
 #if defined (__linux__) /* linux ELF */
@@ -294,6 +135,7 @@ _pixman_choose_implementation (void)
 
 imp = _pixman_x86_get_implementations (imp);
 imp = _pixman_arm_get_implementations (imp);
+imp

[Pixman] [PATCH 10/10] Simplify CPU detection on PPC.

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Get rid of the initialized and have_vmx static variables in
pixman-ppc.c There is no point to them since CPU detection only
happens once per process.

On Linux, just read /proc/self/auxv instead of generating the filename
with getpid() and don't bother with the stack buffer. Instead just
read the aux entries one by one.
---
 pixman/pixman-ppc.c |  113 +--
 1 file changed, 38 insertions(+), 75 deletions(-)

diff --git a/pixman/pixman-ppc.c b/pixman/pixman-ppc.c
index 786f204..f1bea1e 100644
--- a/pixman/pixman-ppc.c
+++ b/pixman/pixman-ppc.c
@@ -31,26 +31,20 @@
  * -maltivec -mabi=altivec, as gcc would try to save vector register
  * across function calls causing SIGILL on cpus without Altivec/vmx.
  */
-static pixman_bool_t initialized = FALSE;
-static volatile pixman_bool_t have_vmx = TRUE;
-
 #ifdef __APPLE__
 #include sys/sysctl.h
 
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-if (!initialized)
-{
-   size_t length = sizeof(have_vmx);
-   int error =
-   sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0);
+size_t length = sizeof(have_vmx);
+int error, have_mmx;
 
-   if (error)
-   have_vmx = FALSE;
+sysctlbyname (hw.optional.altivec, have_vmx, length, NULL, 0);
+
+if (error)
+   return FALSE;
 
-   initialized = TRUE;
-}
 return have_vmx;
 }
 
@@ -62,22 +56,20 @@ pixman_have_vmx (void)
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-if (!initialized)
-{
-   int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
-   size_t length = sizeof(have_vmx);
-   int error =
-   sysctl (mib, 2, have_vmx, length, NULL, 0);
+int mib[2] = { CTL_MACHDEP, CPU_ALTIVEC };
+size_t length = sizeof(have_vmx);
+int error, have_vmx;
 
-   if (error != 0)
-   have_vmx = FALSE;
+error = sysctl (mib, 2, have_vmx, length, NULL, 0);
+
+if (error != 0)
+   return FALSE;
 
-   initialized = TRUE;
-}
 return have_vmx;
 }
 
 #elif defined (__linux__)
+
 #include sys/types.h
 #include sys/stat.h
 #include fcntl.h
@@ -89,51 +81,27 @@ pixman_have_vmx (void)
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-if (!initialized)
+int have_vmx = FALSE;
+int fd;
+struct
 {
-   char fname[64];
-   unsigned long buf[64];
-   ssize_t count = 0;
-   pid_t pid;
-   int fd, i;
+   unsigned long type;
+   unsigned long value;
+} aux;
 
-   pid = getpid ();
-   snprintf (fname, sizeof(fname) - 1, /proc/%d/auxv, pid);
-
-   fd = open (fname, O_RDONLY);
-   if (fd = 0)
+fd = open (/proc/self/auxv, O_RDONLY);
+if (fd = 0)
+{
+   while (read (fd, aux, sizeof (aux)) == sizeof (aux))
{
-   for (i = 0; i = (count / sizeof(unsigned long)); i += 2)
+   if (aux.type == AT_HWCAP  (aux.value  PPC_FEATURE_HAS_ALTIVEC))
{
-   /* Read more if buf is empty... */
-   if (i == (count / sizeof(unsigned long)))
-   {
-   count = read (fd, buf, sizeof(buf));
-   if (count = 0)
-   break;
-   i = 0;
-   }
-
-   if (buf[i] == AT_HWCAP)
-   {
-   have_vmx = !!(buf[i + 1]  PPC_FEATURE_HAS_ALTIVEC);
-   initialized = TRUE;
-   break;
-   }
-   else if (buf[i] == AT_NULL)
-   {
-   break;
-   }
+   have_vmx = TRUE;
+   break;
}
-   close (fd);
}
-}
-if (!initialized)
-{
-   /* Something went wrong. Assume 'no' rather than playing
-  fragile tricks with catching SIGILL. */
-   have_vmx = FALSE;
-   initialized = TRUE;
+
+   close (fd);
 }
 
 return have_vmx;
@@ -159,22 +127,17 @@ pixman_have_vmx (void)
 struct sigaction sa, osa;
 int jmp_result;
 
-if (!initialized)
+sa.sa_flags = SA_SIGINFO;
+sigemptyset (sa.sa_mask);
+sa.sa_sigaction = vmx_test;
+sigaction (SIGILL, sa, osa);
+jmp_result = setjmp (jump_env);
+if (jmp_result == 0)
 {
-   sa.sa_flags = SA_SIGINFO;
-   sigemptyset (sa.sa_mask);
-   sa.sa_sigaction = vmx_test;
-   sigaction (SIGILL, sa, osa);
-   jmp_result = setjmp (jump_env);
-   if (jmp_result == 0)
-   {
-   asm volatile ( vor 0, 0, 0 );
-   }
-   sigaction (SIGILL, osa, NULL);
-   have_vmx = (jmp_result == 0);
-   initialized = TRUE;
+   asm volatile ( vor 0, 0, 0 );
 }
-return have_vmx;
+sigaction (SIGILL, osa, NULL);
+return (jmp_result == 0);
 }
 
 #endif /* __APPLE__ */
-- 
1.7.10.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 08/10] Cleanups and simplifications in x86 CPU feature detection

2012-06-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

A new function pixman_cpuid() is added that runs the cpuid instruction
and returns the results.

On GCC this function uses inline assembly that is written such that it
will work on both 32 and 64 bit. Compared to the old code, the only
difference is %ebx is saved in %esi instead of on the stack. Saving 32
bit registers on a 64 bit stack is difficult or impossible because in
64 bit mode, the push and pop instructions work on 64 bit registers.

On MSVC, the function calls the __cpuid intrinsic.

There is also a new function called have_cpuid() which detects whether
cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on
x86-32 bit, it checks whether the 22nd bit of eflags can be
modified. On MSVC this does have the consequence that pixman will no
longer work CPUS without cpuid (ie., older than 486 and some 486
models).

These two functions together makes it possible to write a generic
detect_cpu_features() in plain C. This function is then used in a new
have_feature() function that checks whether a specific set of feature
bits is available.

Aside from the cleanups and simplifications, the main benefit from
this patch is that pixman now can do feature detection on x86-64, so
that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And
apparently the assumption that x86-64 CPUs always have MMX and SSE2 is
no longer correct: Knight's Corner is x86-64, but doesn't have them).
---
 pixman/pixman-x86.c |  311 +--
 1 file changed, 129 insertions(+), 182 deletions(-)

diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 52ad3df..84590d2 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -32,30 +32,25 @@
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__)  !defined(__x86_64__)  !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include sys/auxv.h
-#endif
 
 typedef enum
 {
-NO_FEATURES = 0,
-MMX = 0x1,
-MMX_EXTENSIONS = 0x2,
-SSE = 0x6,
-SSE2 = 0x8,
-CMOV = 0x10
+X86_MMX= (1  0),
+X86_MMX_EXTENSIONS = (1  1),
+X86_SSE= (1  2) | X86_MMX_EXTENSIONS,
+X86_SSE2   = (1  3),
+X86_CMOV   = (1  4)
 } cpu_features_t;
 
+#ifdef HAVE_GETISAX
 
-static unsigned int
+#include sys/auxv.h
+
+static cpu_features_t
 detect_cpu_features (void)
 {
-unsigned int features = 0;
-unsigned int result = 0;
-
-#ifdef HAVE_GETISAX
+cpu_features_t features;
+
 if (getisax (result, 1))
 {
if (result  AV_386_CMOV)
@@ -69,15 +64,47 @@ detect_cpu_features (void)
if (result  AV_386_SSE2)
features |= SSE2;
 }
+
+return features;
+}
+
+#else
+
+static pixman_bool_t
+have_cpuid (void)
+{
+#if defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64) || defined 
(_MSC_VER)
+
+return TRUE;
+
+#elif defined (__GNUC__)
+uint32_t result;
+
+__asm__ volatile (
+pushf\n\t
+pop %%eax\n\t
+mov %%eax, %%ecx \n\t
+xor $0x0020, %%eax   \n\t
+push %%eax   \n\t
+popf \n\t
+pushf\n\t
+pop %%eax\n\t
+xor %%ecx, %%eax \n\t
+   mov %%eax, %0 \n\t
+   : =r (result)
+   :
+   : %eax, %ecx);
+
+return !!result;
+
 #else
-char vendor[13];
-#ifdef _MSC_VER
-int vendor0 = 0, vendor1, vendor2;
+#error Unknown compiler
 #endif
-vendor[0] = 0;
-vendor[12] = 0;
-
-#ifdef __GNUC__
+}
+
+static void
+pixman_cpuid (uint32_t feature, uint32_t *a, uint32_t *b, uint32_t *c, 
uint32_t *d)
+{
 /* see p. 118 of amd64 instruction set manual Vol3 */
 /* We need to be careful about the handling of %ebx and
  * %esp here. We can't declare either one as clobbered
@@ -86,195 +113,115 @@ detect_cpu_features (void)
  * stack pointer), so we need to make sure they have their
  * original values when we access the output operands.
  */
-__asm__ (
-pushf\n
-pop %%eax\n
-mov %%eax, %%ecx\n
-xor $0x0020, %%eax\n
-push %%eax\n
-popf\n
-pushf\n
-pop %%eax\n
-mov $0x0, %%edx\n
-xor %%ecx, %%eax\n
-jz 1f\n
-   
-mov $0x, %%eax\n
-push %%ebx\n
-cpuid\n
-mov %%ebx, %%eax\n
-pop %%ebx\n
-mov %%eax, %1\n
-mov %%edx, %2\n
-mov %%ecx, %3\n
-mov $0x0001, %%eax\n
-push %%ebx\n
-cpuid\n
-pop %%ebx\n
-1:\n
-mov %%edx, %0\n
-   : =r (result),
- =m (vendor[0]),
- =m (vendor[4]),
- =m (vendor[8])
-   :
-   : %eax, %ecx, %edx
-);
-
+#if defined

[Pixman] NOOP implementation

2011-05-03 Thread Søren Sandmann Pedersen
The following patches add a noop implementation, which is used as
topmost in the implementation hierarchy. It is supposed to contain
iterators and compositing routines that don't do anything. For
example, there is a compositing fast path for the DST operator.

This is useful because it allows more CPU specific iterators to be
added without worrying about them being selected ahead of noop
iterators.


Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/6] Add a noop implementation.

2011-05-03 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This new implementation is ahead of all other implementations in the
fallback chain and is supposed to contain operations that don't
require any work. For examples, it might contain a fast path for the
DST operator that doesn't actually do anything.
---
 pixman/Makefile.am  |1 +
 pixman/pixman-cpu.c |2 ++
 pixman/pixman-noop.c|   45 +
 pixman/pixman-private.h |3 +++
 4 files changed, 51 insertions(+), 0 deletions(-)
 create mode 100644 pixman/pixman-noop.c

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index be08266..1e20bb0 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -20,6 +20,7 @@ libpixman_1_la_SOURCES =  \
pixman-combine64.h  \
pixman-general.c\
pixman.c\
+   pixman-noop.c   \
pixman-fast-path.c  \
pixman-fast-path.h  \
pixman-solid-fill.c \
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 0e14ecb..973ed54 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -606,6 +606,8 @@ _pixman_choose_implementation (void)
imp = _pixman_implementation_create_vmx (imp);
 #endif
 
+imp = _pixman_implementation_create_noop (imp);
+
 return imp;
 }
 
diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
new file mode 100644
index 000..50bbfb0
--- /dev/null
+++ b/pixman/pixman-noop.c
@@ -0,0 +1,45 @@
+/* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
+/*
+ * Copyright © 2011 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the Software),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifdef HAVE_CONFIG_H
+#include config.h
+#endif
+#include string.h
+#include stdlib.h
+#include pixman-private.h
+#include pixman-combine32.h
+#include pixman-fast-path.h
+
+static const pixman_fast_path_t noop_fast_paths[] =
+{
+{ PIXMAN_OP_NONE },
+};
+
+pixman_implementation_t *
+_pixman_implementation_create_noop (pixman_implementation_t *fallback)
+{
+pixman_implementation_t *imp =
+   _pixman_implementation_create (fallback, noop_fast_paths);
+
+return imp;
+}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 60060a9..2996907 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -523,6 +523,9 @@ _pixman_implementation_create_general (void);
 pixman_implementation_t *
 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
 
+pixman_implementation_t *
+_pixman_implementation_create_noop (pixman_implementation_t *fallback);
+
 #ifdef USE_MMX
 pixman_implementation_t *
 _pixman_implementation_create_mmx (pixman_implementation_t *fallback);
-- 
1.7.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 3/6] Move noop dest fetching to noop implementation

2011-05-03 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

It will at some point become useful to have CPU specific destination
iterators. However, a problem with that is that such iterators should
not be used if we can composite directly in the destination image.

By moving the noop destination iterator to the noop implementation, we
can ensure that it will be chosen before any CPU specific iterator.
---
 pixman/pixman-bits-image.c |   31 +--
 pixman/pixman-noop.c   |   32 
 2 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 835ecfb..4e9ed14 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -1462,43 +1462,22 @@ dest_write_back_wide (pixman_iter_t *iter)
 iter-y++;
 }
 
-static void
-dest_write_back_direct (pixman_iter_t *iter)
-{
-iter-buffer += iter-image-bits.rowstride;
-}
-
 void
 _pixman_bits_image_dest_iter_init (pixman_image_t *image, pixman_iter_t *iter)
 {
 if (iter-flags  ITER_NARROW)
 {
-   if (((image-common.flags 
- (FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) ==
-(FAST_PATH_NO_ALPHA_MAP | FAST_PATH_NO_ACCESSORS)) 
-   (image-bits.format == PIXMAN_a8r8g8b8  ||
-(image-bits.format == PIXMAN_x8r8g8b8 
- (iter-flags  ITER_LOCALIZED_ALPHA
+   if ((iter-flags  (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
+   (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
{
-   iter-buffer = image-bits.bits + iter-y * image-bits.rowstride + 
iter-x;
-
iter-get_scanline = _pixman_iter_get_scanline_noop;
-   iter-write_back = dest_write_back_direct;
}
else
{
-   if ((iter-flags  (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
-   (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
-   {
-   iter-get_scanline = _pixman_iter_get_scanline_noop;
-   }
-   else
-   {
-   iter-get_scanline = dest_get_scanline_narrow;
-   }
-
-   iter-write_back = dest_write_back_narrow;
+   iter-get_scanline = dest_get_scanline_narrow;
}
+   
+   iter-write_back = dest_write_back_narrow;
 }
 else
 {
diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
index 673a02a..d753843 100644
--- a/pixman/pixman-noop.c
+++ b/pixman/pixman-noop.c
@@ -48,6 +48,36 @@ noop_composite (pixman_implementation_t *imp,
 return;
 }
 
+static void
+dest_write_back_direct (pixman_iter_t *iter)
+{
+iter-buffer += iter-image-bits.rowstride;
+}
+
+static void
+noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+pixman_image_t *image = iter-image;
+uint32_t image_flags = image-common.flags;
+uint32_t iter_flags = iter-flags;
+   
+if ((image_flags  FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS   

+   (iter_flags  ITER_NARROW) == ITER_NARROW   

+   ((image-common.extended_format_code == PIXMAN_a8r8g8b8)||
+(image-common.extended_format_code == PIXMAN_x8r8g8b8 
+ (iter_flags  (ITER_LOCALIZED_ALPHA)
+{
+   iter-buffer = image-bits.bits + iter-y * image-bits.rowstride + 
iter-x;
+   
+   iter-get_scanline = _pixman_iter_get_scanline_noop;
+   iter-write_back = dest_write_back_direct;
+}
+else
+{
+   (* imp-delegate-dest_iter_init) (imp-delegate, iter);
+}
+}
+
 static const pixman_fast_path_t noop_fast_paths[] =
 {
 { PIXMAN_OP_DST, PIXMAN_any, 0, PIXMAN_any, 0, PIXMAN_any, 0, 
noop_composite },
@@ -60,5 +90,7 @@ _pixman_implementation_create_noop (pixman_implementation_t 
*fallback)
 pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
 
+imp-dest_iter_init = noop_dest_iter_init;
+
 return imp;
 }
-- 
1.7.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 5/6] Move NULL iterator into pixman-noop.c

2011-05-03 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Iterating a NULL image returns NULL for all scanlines. This may as
well be done in the noop iterator.
---
 pixman/pixman-implementation.c |   12 +---
 pixman/pixman-noop.c   |   24 
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index f1d3f99..2706ceb 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -241,12 +241,6 @@ _pixman_implementation_fill (pixman_implementation_t *imp,
 return (*imp-fill) (imp, bits, stride, bpp, x, y, width, height, xor);
 }
 
-static uint32_t *
-get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
-{
-return NULL;
-}
-
 void
 _pixman_implementation_src_iter_init (pixman_implementation_t  *imp,
  pixman_iter_t *iter,
@@ -266,11 +260,7 @@ _pixman_implementation_src_iter_init 
(pixman_implementation_t  *imp,
 iter-height = height;
 iter-flags = flags;
 
-if (!image)
-{
-   iter-get_scanline = get_scanline_null;
-}
-else if ((flags  (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
+if ((flags  (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB)) ==
 (ITER_IGNORE_ALPHA | ITER_IGNORE_RGB))
 {
iter-get_scanline = _pixman_iter_get_scanline_noop;
diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
index 5dc528d..75ecf0b 100644
--- a/pixman/pixman-noop.c
+++ b/pixman/pixman-noop.c
@@ -64,22 +64,30 @@ noop_get_scanline (pixman_iter_t *iter, const uint32_t 
*mask)
 return result;
 }
 
+static uint32_t *
+get_scanline_null (pixman_iter_t *iter, const uint32_t *mask)
+{
+return NULL;
+}
+
 static void
 noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
 pixman_image_t *image = iter-image;
-uint32_t iter_flags = iter-flags;
-uint32_t image_flags = image-common.flags;
 
 #define FLAGS  \
 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
 
-if ((iter_flags  ITER_NARROW) 
-   (image_flags  FLAGS) == FLAGS  
-   iter-x = 0  iter-y = 0
-   iter-x + iter-width = image-bits.width  
-   iter-y + iter-height = image-bits.height
-   image-common.extended_format_code == PIXMAN_a8r8g8b8)
+if (!image)
+{
+   iter-get_scanline = get_scanline_null;
+}
+else if ((iter-flags  ITER_NARROW)   
+(image-common.flags  FLAGS) == FLAGS 
+iter-x = 0  iter-y = 0   
+iter-x + iter-width = image-bits.width 
+iter-y + iter-height = image-bits.height   
+image-common.extended_format_code == PIXMAN_a8r8g8b8)
 {
iter-buffer =
image-bits.bits + iter-y * image-bits.rowstride + iter-x;
-- 
1.7.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 4/6] Add a noop src iterator

2011-05-03 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

When the image is a8r8g8b8 and not transformed, and the fetched
rectangle is within the image bounds, scanlines can be fetched by
simply returning a pointer instead of copying the bits.
---
 pixman/pixman-noop.c |   39 +++
 1 files changed, 39 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-noop.c b/pixman/pixman-noop.c
index d753843..5dc528d 100644
--- a/pixman/pixman-noop.c
+++ b/pixman/pixman-noop.c
@@ -54,6 +54,44 @@ dest_write_back_direct (pixman_iter_t *iter)
 iter-buffer += iter-image-bits.rowstride;
 }
 
+static uint32_t *
+noop_get_scanline (pixman_iter_t *iter, const uint32_t *mask)
+{
+uint32_t *result = iter-buffer;
+
+iter-buffer += iter-image-bits.rowstride;
+
+return result;
+}
+
+static void
+noop_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
+{
+pixman_image_t *image = iter-image;
+uint32_t iter_flags = iter-flags;
+uint32_t image_flags = image-common.flags;
+
+#define FLAGS  \
+(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM)
+
+if ((iter_flags  ITER_NARROW) 
+   (image_flags  FLAGS) == FLAGS  
+   iter-x = 0  iter-y = 0
+   iter-x + iter-width = image-bits.width  
+   iter-y + iter-height = image-bits.height
+   image-common.extended_format_code == PIXMAN_a8r8g8b8)
+{
+   iter-buffer =
+   image-bits.bits + iter-y * image-bits.rowstride + iter-x;
+
+   iter-get_scanline = noop_get_scanline;
+}
+else
+{
+   (* imp-delegate-src_iter_init) (imp-delegate, iter);
+}
+}
+
 static void
 noop_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
 {
@@ -90,6 +128,7 @@ _pixman_implementation_create_noop (pixman_implementation_t 
*fallback)
 pixman_implementation_t *imp =
_pixman_implementation_create (fallback, noop_fast_paths);
 
+imp-src_iter_init = noop_src_iter_init;
 imp-dest_iter_init = noop_dest_iter_init;
 
 return imp;
-- 
1.7.4

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] Fix

2011-03-31 Thread Søren Sandmann Pedersen
I forgot to CC pixman@lists.freedesktop.org on the following
patch. The patch is necessary to make trapezoid rendering directly to
X windows work and also makes the pixman_composite_trapezoids() API
more similar to pixman_image_composite(). See this thread:

http://lists.x.org/archives/xorg-devel/2011-March/021056.html

for context.


Soren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


Re: [Pixman] [PATCH] Add forgotten _mm_empty() calls in the SSE2 fetchers.

2011-02-18 Thread Søren Sandmann Pedersen
Here is a patch series that removes all use of MMX from
pixman-sse2.c. This avoids all the emms issues and is likely also a
speedup on Windows x64, where MMX intrinsics are not supported and
therefore had to be emulated.

 b/configure.ac|2 
 b/pixman/pixman-sse2.c| 1601 +-
 pixman/pixman-x64-mmx-emulation.h |  263 --
 3 files changed, 572 insertions(+), 1294 deletions(-)

Søren

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 4/8] sse2: Don't compile pixman-sse2.c with -mmmx anymore

2011-02-18 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

It's not necessary now that the file doesn't use MMX instructions.
---
 configure.ac |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/configure.ac b/configure.ac
index 5242799..8d96647 100644
--- a/configure.ac
+++ b/configure.ac
@@ -326,7 +326,7 @@ if test x$SSE2_CFLAGS = x ; then
  SSE2_CFLAGS=-xarch=sse2
   fi
else
-  SSE2_CFLAGS=-mmmx -msse2 -Winline
+  SSE2_CFLAGS=-msse2 -Winline
fi
 fi
 
-- 
1.7.3.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 6/8] sse2: Delete obsolete or redundant comments

2011-02-18 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

---
 pixman/pixman-sse2.c |  137 --
 1 files changed, 0 insertions(+), 137 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 0753b6d..286dea8 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -46,12 +46,6 @@
 #   include pixman-x64-mmx-emulation.h
 #endif
 
-#ifdef USE_SSE2
-
-/* 
- * Locals
- */
-
 static __m128i mask_0080;
 static __m128i mask_00ff;
 static __m128i mask_0101;
@@ -69,9 +63,6 @@ static __m128i mask_blue;
 static __m128i mask_565_fix_rb;
 static __m128i mask_565_fix_g;
 
-/* --
- * SSE2 Inlines
- */
 static force_inline __m128i
 unpack_32_1x128 (uint32_t data)
 {
@@ -389,10 +380,6 @@ save_128_unaligned (__m128i* dst,
 _mm_storeu_si128 (dst, data);
 }
 
-/* --
- * MMX inlines
- */
-
 static force_inline __m128i
 load_32_1x128 (uint32_t data)
 {
@@ -486,9 +473,6 @@ expand565_16_1x128 (uint16_t pixel)
 return _mm_unpacklo_epi8 (m, _mm_setzero_si128 ());
 }
 
-/* 
- * Compose Core transformations
- */
 static force_inline uint32_t
 core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
 {
@@ -2365,9 +2349,6 @@ sse2_combine_add_ca (pixman_implementation_t *imp,
 }
 }
 
-/* ---
- * fb_compose_setup_sSE2
- */
 static force_inline __m128i
 create_mask_16_128 (uint16_t mask)
 {
@@ -2387,10 +2368,6 @@ create_mask_2x32_128 (uint32_t mask0,
 }
 #endif
 
-/* ---
- * composite_over_n_
- */
-
 static void
 sse2_composite_over_n_ (pixman_implementation_t *imp,
 pixman_op_t  op,
@@ -2470,9 +2447,6 @@ sse2_composite_over_n_ (pixman_implementation_t *imp,
 }
 }
 
-/* -
- * composite_over_n_0565
- */
 static void
 sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 pixman_op_t  op,
@@ -2558,9 +2532,6 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 
 }
 
-/* --
- * composite_add_n___ca
- */
 static void
 sse2_composite_add_n___ca (pixman_implementation_t *imp,
   pixman_op_t  op,
@@ -2684,10 +2655,6 @@ sse2_composite_add_n___ca 
(pixman_implementation_t *imp,
 
 }
 
-/* ---
- * composite_over_n___ca
- */
-
 static void
 sse2_composite_over_n___ca (pixman_implementation_t *imp,
 pixman_op_t  op,
@@ -2811,10 +2778,6 @@ sse2_composite_over_n___ca 
(pixman_implementation_t *imp,
 
 }
 
-/*-
- * composite_over__n_
- */
-
 static void
 sse2_composite_over__n_ (pixman_implementation_t *imp,
  pixman_op_t  op,
@@ -2929,10 +2892,6 @@ sse2_composite_over__n_ (pixman_implementation_t 
*imp,
 
 }
 
-/*-
- * composite_over__n_
- */
-
 static void
 sse2_composite_src_x888_ (pixman_implementation_t *imp,
  pixman_op_t  op,
@@ -3001,9 +2960,6 @@ sse2_composite_src_x888_ (pixman_implementation_t 
*imp,
 
 }
 
-/* -
- * composite_over_x888_n_
- */
 static void
 sse2_composite_over_x888_n_ (pixman_implementation_t *imp,
  pixman_op_t  op,
@@ -3105,9 +3061,6 @@ sse2_composite_over_x888_n_ (pixman_implementation_t 
*imp,
 
 }
 
-/* 
- * composite_over__
- */
 static void
 sse2_composite_over__ (pixman_implementation_t *imp,
pixman_op_t  op,
@@ -3144,9 +3097,6 @@ sse2_composite_over__ (pixman_implementation_t 
*imp,
 }
 }
 
-/* --
- * composite_over__0565
- */
 static force_inline uint16_t
 composite_over__0565pixel (uint32_t src, uint16_t dst)
 {
@@ -3188,15 +3138,6 @@ sse2_composite_over__0565 (pixman_implementation_t 
*imp,
 PIXMAN_IMAGE_GET_LINE (
src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-#if 0
-/* FIXME
- *
- * I copy the code from MMX one and keep the fixme.
- * If it's a problem there, probably is a problem here.
- */
-assert

[Pixman] [PATCH 8/8] sse2: Minor coding style cleanups.

2011-02-18 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

Also make pixman_fill_sse2() static.
---
 pixman/pixman-sse2.c |   18 --
 1 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 0509613..88287b4 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2587,7 +2587,8 @@ sse2_composite_add_n___ca 
(pixman_implementation_t *imp,
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (
-   _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), 
mmx_dest));
+   _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+  mmx_dest));
}
 
pd++;
@@ -2635,7 +2636,8 @@ sse2_composite_add_n___ca 
(pixman_implementation_t *imp,
mmx_dest = unpack_32_1x128 (d);
 
*pd = pack_1x128_32 (
-   _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src), 
mmx_dest));
+   _mm_adds_epu8 (pix_multiply_1x128 (mmx_mask, mmx_src),
+  mmx_dest));
}
 
pd++;
@@ -,7 +3335,7 @@ sse2_composite_over_n_8_ (pixman_implementation_t 
*imp,
 
 }
 
-pixman_bool_t
+static pixman_bool_t
 pixman_fill_sse2 (uint32_t *bits,
   int   stride,
   int   bpp,
@@ -4886,7 +4888,8 @@ sse2_composite_over_x888_8_ (pixman_implementation_t 
*imp,
 while (w = 4)
 {
 m = *(uint32_t*) mask;
-xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), 
mask_ff00);
+xmm_src = _mm_or_si128 (
+   load_128_unaligned ((__m128i*)src), mask_ff00);
 
 if (m == 0x)
 {
@@ -4902,9 +4905,12 @@ sse2_composite_over_x888_8_ (pixman_implementation_t 
*imp,
 unpack_128_2x128 (xmm_mask, xmm_mask_lo, xmm_mask_hi);
 unpack_128_2x128 (xmm_dst, xmm_dst_lo, xmm_dst_hi);
 
-expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, 
xmm_mask_lo, xmm_mask_hi);
+expand_alpha_rev_2x128 (
+   xmm_mask_lo, xmm_mask_hi, xmm_mask_lo, xmm_mask_hi);
 
-in_over_2x128 (xmm_src_lo, xmm_src_hi, mask_00ff, 
mask_00ff, xmm_mask_lo, xmm_mask_hi, xmm_dst_lo, xmm_dst_hi);
+in_over_2x128 (xmm_src_lo, xmm_src_hi,
+  mask_00ff, mask_00ff, xmm_mask_lo, 
xmm_mask_hi,
+  xmm_dst_lo, xmm_dst_hi);
 
 save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, 
xmm_dst_hi));
 }
-- 
1.7.3.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 2/6] Add a test program for pixman_composite_trapezoids().

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

A CRC32 based test program to check that pixman_composite_trapezoids()
actually works.
---
 test/Makefile.am|5 +
 test/composite-traps-test.c |  253 +++
 2 files changed, 258 insertions(+), 0 deletions(-)
 create mode 100644 test/composite-traps-test.c

diff --git a/test/Makefile.am b/test/Makefile.am
index 8d8471d..0b7d05c 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -17,6 +17,7 @@ TESTPROGRAMS =\
gradient-crash-test \
alphamap\
stress-test \
+   composite-traps-test\
blitters-test   \
scaling-test\
affine-test \
@@ -52,6 +53,10 @@ blitters_test_LDADD = $(TEST_LDADD)
 blitters_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
 blitters_test_SOURCES = blitters-test.c utils.c utils.h
 
+composite_traps_test_LDADD = $(TEST_LDADD)
+composite_traps_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
+composite_traps_test_SOURCES = composite-traps-test.c utils.c utils.h
+
 scaling_test_LDADD = $(TEST_LDADD)
 scaling_test_LDFLAGS = @TESTPROGS_EXTRA_LDFLAGS@
 scaling_test_SOURCES = scaling-test.c utils.c utils.h
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
new file mode 100644
index 000..8f32778
--- /dev/null
+++ b/test/composite-traps-test.c
@@ -0,0 +1,253 @@
+/* Based loosely on scaling-test */
+
+#include assert.h
+#include stdlib.h
+#include stdio.h
+#include utils.h
+
+#define MAX_SRC_WIDTH  48
+#define MAX_SRC_HEIGHT 48
+#define MAX_DST_WIDTH  48
+#define MAX_DST_HEIGHT 48
+#define MAX_STRIDE 4
+
+static pixman_format_code_t formats[] =
+{
+PIXMAN_a8r8g8b8, PIXMAN_a8, PIXMAN_r5g6b5, PIXMAN_a1, PIXMAN_a4
+};
+
+static pixman_format_code_t mask_formats[] =
+{
+PIXMAN_a1, PIXMAN_a4, PIXMAN_a8,
+};
+
+static pixman_op_t operators[] =
+{
+PIXMAN_OP_OVER, PIXMAN_OP_ADD, PIXMAN_OP_SRC, PIXMAN_OP_IN
+};
+
+#define RANDOM_ELT(array)  \
+((array)[lcg_rand_n(ARRAY_LENGTH((array)))])
+
+static void
+destroy_bits (pixman_image_t *image, void *data)
+{
+fence_free (data);
+}
+
+static pixman_fixed_t
+random_fixed (int n)
+{
+return lcg_rand_N (n  16);
+}
+
+/*
+ * Composite operation with pseudorandom images
+ */
+uint32_t
+test_composite (int  testnum,
+   int  verbose)
+{
+inti;
+pixman_image_t *   src_img;
+pixman_image_t *   dst_img;
+pixman_region16_t  clip;
+intdst_width, dst_height;
+intdst_stride;
+intdst_x, dst_y;
+intdst_bpp;
+pixman_op_top;
+uint32_t * dst_bits;
+uint32_t   crc32;
+pixman_format_code_t mask_format, dst_format;
+pixman_trapezoid_t *traps;
+int src_x, src_y;
+int n_traps;
+
+static pixman_color_t colors[] =
+{
+   { 0x, 0x, 0x, 0x },
+   { 0x, 0x, 0x, 0x },
+   { 0xabcd, 0xabcd, 0x, 0xabcd },
+   { 0x, 0x, 0x, 0x },
+   { 0x0101, 0x0101, 0x0101, 0x0101 },
+   { 0x, 0x, 0x, 0x },
+};
+
+FLOAT_REGS_CORRUPTION_DETECTOR_START ();
+
+lcg_srand (testnum);
+
+op = RANDOM_ELT (operators);
+mask_format = RANDOM_ELT (mask_formats);
+
+/* Create source image */
+
+if (lcg_rand_n (4) == 0)
+{
+   src_img = pixman_image_create_solid_fill (
+   (colors[lcg_rand_n (ARRAY_LENGTH (colors))]));
+
+   src_x = 10;
+   src_y = 234;
+}
+else
+{
+   pixman_format_code_t src_format = RANDOM_ELT(formats);
+   int src_bpp = (PIXMAN_FORMAT_BPP (src_format) + 7) / 8;
+   int src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+   int src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+   int src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * 
src_bpp;
+   uint32_t *bits;
+
+   src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
+   src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+
+   src_stride = (src_stride + 3)  ~3;
+   
+   bits = (uint32_t *)make_random_bytes (src_stride * src_height);
+
+   src_img = pixman_image_create_bits (
+   src_format, src_width, src_height, bits, src_stride);
+
+   pixman_image_set_destroy_function (src_img, destroy_bits, bits);
+
+   if (lcg_rand_n (8) == 0)
+   {
+   pixman_box16_t clip_boxes[2];
+   intn = lcg_rand_n (2) + 1;
+   
+   for (i = 0; i  n; i++)
+   {
+   clip_boxes[i].x1 = lcg_rand_n (src_width);
+   clip_boxes[i].y1 = lcg_rand_n (src_height);
+   clip_boxes[i].x2 =
+   clip_boxes[i].x1 + lcg_rand_n (src_width - 
clip_boxes[i].x1);
+   clip_boxes[i].y2 =
+   clip_boxes[i].y1

[Pixman] [PATCH 3/6] Add support for triangles to pixman.

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

The Render X extension can draw triangles as well as trapezoids, but
the implementation has always converted them to trapezoids. This patch
moves the X server's triangle conversion code into pixman, where we
can reuse the pixman_composite_trapezoid() code.
---
 pixman/pixman-trap.c |  136 ++
 pixman/pixman.h  |   15 ++
 2 files changed, 151 insertions(+), 0 deletions(-)

diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index ecec5d4..2675773 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -1,4 +1,5 @@
 /*
+ * Copyright © 2002 Keith Packard, member of The XFree86 Project, Inc.
  * Copyright © 2004 Keith Packard
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
@@ -25,6 +26,7 @@
 #endif
 
 #include stdio.h
+#include stdlib.h
 #include pixman-private.h
 
 /*
@@ -471,3 +473,137 @@ pixman_composite_trapezoids (pixman_op_t  op,
 
 pixman_image_unref (tmp);
 }
+
+static int
+greater_y (const pixman_point_fixed_t *a, const pixman_point_fixed_t *b)
+{
+if (a-y == b-y)
+   return a-x  b-x;
+return a-y  b-y;
+}
+
+/*
+ * Note that the definition of this function is a bit odd because
+ * of the X coordinate space (y increasing downwards).
+ */
+static int
+clockwise (const pixman_point_fixed_t *ref,
+  const pixman_point_fixed_t *a,
+  const pixman_point_fixed_t *b)
+{
+pixman_point_fixed_t   ad, bd;
+
+ad.x = a-x - ref-x;
+ad.y = a-y - ref-y;
+bd.x = b-x - ref-x;
+bd.y = b-y - ref-y;
+
+return ((pixman_fixed_32_32_t) bd.y * ad.x -
+   (pixman_fixed_32_32_t) ad.y * bd.x)  0;
+}
+
+static void
+triangle_to_trapezoids (const pixman_triangle_t *tri, pixman_trapezoid_t 
*traps)
+{
+const pixman_point_fixed_t *top, *left, *right, *tmp;
+
+top = tri-p1;
+left = tri-p2;
+right = tri-p3;
+
+if (greater_y (top, left))
+{
+   tmp = left;
+   left = top;
+   top = tmp;
+}
+
+if (greater_y (top, right))
+{
+   tmp = right;
+   right = top;
+   top = tmp;
+}
+
+if (clockwise (top, right, left))
+{
+   tmp = right;
+   right = left;
+   left = tmp;
+}
+
+/*
+ * Two cases:
+ *
+ * +   +
+ */ \ / \
+ *   /   \   /   \
+ *  / + + \
+ *  /--   --\
+ * /   --   --   \
+ */ ---   --- \
+ *  +-- --+
+ */
+
+traps-top = top-y;
+traps-left.p1 = *top;
+traps-left.p2 = *left;
+traps-right.p1 = *top;
+traps-right.p2 = *right;
+
+if (right-y  left-y)
+   traps-bottom = right-y;
+else
+   traps-bottom = left-y;
+
+traps++;
+
+*traps = *(traps - 1);
+
+if (right-y  left-y)
+{
+   traps-top = right-y;
+   traps-bottom = left-y;
+   traps-right.p1 = *right;
+   traps-right.p2 = *left;
+}
+else
+{
+   traps-top = left-y;
+   traps-bottom = right-y;
+   traps-left.p1 = *left;
+   traps-left.p2 = *right;
+}
+}
+
+PIXMAN_EXPORT void
+pixman_composite_triangles (pixman_op_top,
+   pixman_image_t *src,
+   pixman_image_t *dst,
+   pixman_format_code_tmask_format,
+   int x_src,
+   int y_src,
+   int x_dst,
+   int y_dst,
+   int n_tris,
+   const pixman_triangle_t *   tris)
+{
+pixman_trapezoid_t *trapezoids;
+int i;
+
+if (n_tris = 0)
+   return;
+
+trapezoids = malloc (2 * n_tris * sizeof (pixman_trapezoid_t));
+if (!trapezoids)
+   return;
+
+for (i = 0; i  n_tris; ++i)
+   triangle_to_trapezoids ((tris[i]), trapezoids + 2 * i);
+
+pixman_composite_trapezoids (op, src, dst, mask_format,
+x_src, y_src, x_dst, y_dst,
+n_tris * 2, trapezoids);
+   
+free (trapezoids);
+}
diff --git a/pixman/pixman.h b/pixman/pixman.h
index 52ab8a5..7d28e78 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -868,6 +868,7 @@ typedef struct pixman_edge pixman_edge_t;
 typedef struct pixman_trapezoid pixman_trapezoid_t;
 typedef struct pixman_trap pixman_trap_t;
 typedef struct pixman_span_fix pixman_span_fix_t;
+typedef struct pixman_triangle pixman_triangle_t;
 
 /*
  * An edge structure.  This represents a single polygon edge
@@ -895,6 +896,10 @@ struct pixman_trapezoid
 pixman_line_fixed_tleft, right;
 };
 
+struct

[Pixman] [PATCH 5/6] Optimize adding opaque trapezoids onto a8 destination.

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

When the source is opaque and the destination is alpha only, we can
avoid the temporary mask and just add the trapezoids directly.
---
 pixman/pixman-trap.c |  133 -
 1 files changed, 76 insertions(+), 57 deletions(-)

diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index 2675773..adf822c 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -399,10 +399,7 @@ pixman_composite_trapezoids (pixman_op_t   op,
 intn_traps,
 const pixman_trapezoid_t * traps)
 {
-pixman_image_t *tmp;
-pixman_box32_t box;
 int i;
-int x_rel, y_rel;
 
 if (n_traps = 0)
return;
@@ -410,68 +407,90 @@ pixman_composite_trapezoids (pixman_op_t  op,
 _pixman_image_validate (src);
 _pixman_image_validate (dst);
 
-box.x1 = INT32_MAX;
-box.y1 = INT32_MAX;
-box.x2 = INT32_MIN;
-box.y2 = INT32_MIN;
-
-for (i = 0; i  n_traps; ++i)
+if (op == PIXMAN_OP_ADD 
+   (src-common.flags  FAST_PATH_IS_OPAQUE)   
+   (mask_format == dst-common.extended_format_code)   
+   !(dst-common.have_clip_region))
 {
-   const pixman_trapezoid_t *trap = (traps[i]);
-   int y1, y2;
-
-   if (!pixman_trapezoid_valid (trap))
-   continue;
+   for (i = 0; i  n_traps; ++i)
+   {
+   const pixman_trapezoid_t *trap = (traps[i]);
+   
+   if (!pixman_trapezoid_valid (trap))
+   continue;
+   
+   pixman_rasterize_trapezoid (dst, trap, 0, 0);
+   }
+}
+else
+{
+   pixman_image_t *tmp;
+   pixman_box32_t box;
+   int x_rel, y_rel;

-   y1 = pixman_fixed_to_int (trap-top);
-   if (y1  box.y1)
-   box.y1 = y1;
+   box.x1 = INT32_MAX;
+   box.y1 = INT32_MAX;
+   box.x2 = INT32_MIN;
+   box.y2 = INT32_MIN;

-   y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom));
-   if (y2  box.y2)
-   box.y2 = y2;
-
+   for (i = 0; i  n_traps; ++i)
+   {
+   const pixman_trapezoid_t *trap = (traps[i]);
+   int y1, y2;
+   
+   if (!pixman_trapezoid_valid (trap))
+   continue;
+   
+   y1 = pixman_fixed_to_int (trap-top);
+   if (y1  box.y1)
+   box.y1 = y1;
+   
+   y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom));
+   if (y2  box.y2)
+   box.y2 = y2;
+   
 #define EXTEND_MIN(x)  \
-   if (pixman_fixed_to_int ((x))  box.x1) \
-   box.x1 = pixman_fixed_to_int ((x));
+   if (pixman_fixed_to_int ((x))  box.x1) \
+   box.x1 = pixman_fixed_to_int ((x));
 #define EXTEND_MAX(x)  \
-   if (pixman_fixed_to_int (pixman_fixed_ceil ((x)))  box.x2) \
-   box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
-
+   if (pixman_fixed_to_int (pixman_fixed_ceil ((x)))  box.x2) \
+   box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
+   
 #define EXTEND(x)  \
-   EXTEND_MIN(x);  \
-   EXTEND_MAX(x);
-
-   EXTEND(trap-left.p1.x);
-   EXTEND(trap-left.p2.x);
-   EXTEND(trap-right.p1.x);
-   EXTEND(trap-right.p2.x);
-}
-
-if (box.x1 = box.x2 || box.y1 = box.y2)
-   return;
-
-tmp = pixman_image_create_bits (
-   mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
+   EXTEND_MIN(x);  \
+   EXTEND_MAX(x);
+   
+   EXTEND(trap-left.p1.x);
+   EXTEND(trap-left.p2.x);
+   EXTEND(trap-right.p1.x);
+   EXTEND(trap-right.p2.x);
+   }

-for (i = 0; i  n_traps; ++i)
-{
-   const pixman_trapezoid_t *trap = (traps[i]);
-
-   if (!pixman_trapezoid_valid (trap))
-   continue;
-
-   pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
+   if (box.x1 = box.x2 || box.y1 = box.y2)
+   return;
+   
+   tmp = pixman_image_create_bits (
+   mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
+   
+   for (i = 0; i  n_traps; ++i)
+   {
+   const pixman_trapezoid_t *trap = (traps[i]);
+   
+   if (!pixman_trapezoid_valid (trap))
+   continue;
+   
+   pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
+   }
+   
+   x_rel = box.x1 + x_src - x_dst;
+   y_rel = box.y1 + y_src - y_dst;
+   
+   pixman_image_composite (op, src, tmp, dst,
+   x_rel, y_rel

[Pixman] [PATCH 6/6] Add new public function pixman_add_triangles()

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This allows some more code to be deleted from the X server. The
implementation consists of converting to trapezoids, and is shared
with pixman_composite_triangles().
---
 pixman/pixman-trap.c |   61 -
 pixman/pixman.h  |7 +-
 2 files changed, 51 insertions(+), 17 deletions(-)

diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index adf822c..2957a2b 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -595,6 +595,25 @@ triangle_to_trapezoids (const pixman_triangle_t *tri, 
pixman_trapezoid_t *traps)
 }
 }
 
+static pixman_trapezoid_t *
+convert_triangles (int n_tris, const pixman_triangle_t *tris)
+{
+pixman_trapezoid_t *traps;
+int i;
+
+if (n_tris = 0)
+   return NULL;
+
+traps = pixman_malloc_ab (n_tris, 2 * sizeof (pixman_trapezoid_t));
+if (!traps)
+   return NULL;
+
+for (i = 0; i  n_tris; ++i)
+   triangle_to_trapezoids ((tris[i]), traps + 2 * i);
+
+return traps;
+}
+
 PIXMAN_EXPORT void
 pixman_composite_triangles (pixman_op_top,
pixman_image_t *src,
@@ -607,22 +626,32 @@ pixman_composite_triangles (pixman_op_t   
op,
int n_tris,
const pixman_triangle_t *   tris)
 {
-pixman_trapezoid_t *trapezoids;
-int i;
+pixman_trapezoid_t *traps;
 
-if (n_tris = 0)
-   return;
-
-trapezoids = malloc (2 * n_tris * sizeof (pixman_trapezoid_t));
-if (!trapezoids)
-   return;
+if ((traps = convert_triangles (n_tris, tris)))
+{
+   pixman_composite_trapezoids (op, src, dst, mask_format,
+x_src, y_src, x_dst, y_dst,
+n_tris * 2, traps);
+   
+   free (traps);
+}
+}
 
-for (i = 0; i  n_tris; ++i)
-   triangle_to_trapezoids ((tris[i]), trapezoids + 2 * i);
-
-pixman_composite_trapezoids (op, src, dst, mask_format,
-x_src, y_src, x_dst, y_dst,
-n_tris * 2, trapezoids);
-   
-free (trapezoids);
+PIXMAN_EXPORT void
+pixman_add_triangles (pixman_image_t  *image,
+ int32_t  x_off,
+ int32_t  y_off,
+ int  n_tris,
+ const pixman_triangle_t *tris)
+{
+pixman_trapezoid_t *traps;
+
+if ((traps = convert_triangles (n_tris, tris)))
+{
+   pixman_add_trapezoids (image, x_off, y_off,
+  n_tris * 2, traps);
+
+   free (traps);
+}
 }
diff --git a/pixman/pixman.h b/pixman/pixman.h
index 7d28e78..1305bc1 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -975,7 +975,12 @@ void  pixman_composite_triangles (pixman_op_t  
   op,
  int  y_dst,
  int  n_tris,
  const pixman_triangle_t *tris);
-
+void pixman_add_triangles   (pixman_image_t  *image,
+ int32_t  x_off,
+ int32_t  y_off,
+ int  n_tris,
+ const pixman_triangle_t *tris);
+
 PIXMAN_END_DECLS
 
 #endif /* PIXMAN_H__ */
-- 
1.7.3.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 2/3] Move miTriangles to fb as fbTriangles().

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

The fb version simply calls the new pixman_composite_triangles(). This
allows us to get rid of miCreateAlphaPicture().

Signed-off-by: Søren Sandmann s...@redhat.com
---
 fb/fbpict.c |1 +
 fb/fbpict.h |   10 +
 fb/fbtrap.c |  109 +++
 render/mipict.c |2 +-
 render/mipict.h |   17 -
 render/mitrap.c |   49 -
 render/mitri.c  |   59 --
 7 files changed, 90 insertions(+), 157 deletions(-)

diff --git a/fb/fbpict.c b/fb/fbpict.c
index 6e66db8..312f3df 100644
--- a/fb/fbpict.c
+++ b/fb/fbpict.c
@@ -367,6 +367,7 @@ fbPictureInit (ScreenPtr pScreen, PictFormatPtr formats, 
int nformats)
 ps-Trapezoids = fbTrapezoids;
 ps-AddTraps = fbAddTraps;
 ps-AddTriangles = fbAddTriangles;
+ps-Triangles = fbTriangles;
 
 return TRUE;
 }
diff --git a/fb/fbpict.h b/fb/fbpict.h
index 03d2665..b880ebb 100644
--- a/fb/fbpict.h
+++ b/fb/fbpict.h
@@ -75,4 +75,14 @@ fbTrapezoids (CARD8  op,
  int   ntrap,
  xTrapezoid*traps);
 
+extern _X_EXPORT void
+fbTriangles (CARD8 op,
+PicturePtr pSrc,
+PicturePtr pDst,
+PictFormatPtr  maskFormat,
+INT16  xSrc,
+INT16  ySrc,
+intntris,
+xTriangle *tris);
+
 #endif /* _FBPICT_H_ */
diff --git a/fb/fbtrap.c b/fb/fbtrap.c
index 687de55..3b197b4 100644
--- a/fb/fbtrap.c
+++ b/fb/fbtrap.c
@@ -157,51 +157,56 @@ fbAddTriangles (PicturePtr  pPicture,
 }
 }
 
+typedef void (* CompositeShapesFunc) (pixman_op_t op,
+ pixman_image_t *src,
+ pixman_image_t *dst,
+ pixman_format_code_t mask_format,
+ int x_src, int y_src,
+ int x_dst, int y_dst,
+ int n_shapes, const uint8_t *shapes);
 
-void
-fbTrapezoids (CARD8op,
- PicturePtrpSrc,
- PicturePtrpDst,
- PictFormatPtr maskFormat,
- INT16 xSrc,
- INT16 ySrc,
- int   ntrap,
- xTrapezoid*traps)
+static void
+fbShapes (CompositeShapesFunc  composite,
+ pixman_op_t   op,
+ PicturePtrpSrc,
+ PicturePtrpDst,
+ PictFormatPtr maskFormat,
+ int16_t   xSrc,
+ int16_t   ySrc,
+ int16_t   xDst,
+ int16_t   yDst,
+ int   nshapes,
+ int   shape_size,
+ const uint8_t *   shapes)
 {
 pixman_image_t *src, *dst;
 int src_xoff, src_yoff;
 int dst_xoff, dst_yoff;
 
-if (ntrap == 0)
-   return;
-
 src = image_from_pict (pSrc, FALSE, src_xoff, src_yoff);
 dst = image_from_pict (pDst, TRUE, dst_xoff, dst_yoff);
 
 if (src  dst)
 {
pixman_format_code_t format;
-   int x_dst, y_dst;
-   int i;
 
-   x_dst = traps[0].left.p1.x  16;
-   y_dst = traps[0].left.p1.y  16;
-   
if (!maskFormat)
{
+   int i;
+
if (pDst-polyEdge == PolyEdgeSharp)
format = PIXMAN_a1;
else
format = PIXMAN_a8;
 
-   for (i = 0; i  ntrap; ++i)
+   for (i = 0; i  nshapes; ++i)
{
-   pixman_composite_trapezoids (op, src, dst, format,
-xSrc + src_xoff,
-ySrc + src_yoff,
-x_dst + dst_xoff,
-y_dst + dst_yoff,
-1, (pixman_trapezoid_t *)traps++);
+   composite (op, src, dst, format,
+  xSrc + src_xoff,
+  ySrc + src_yoff,
+  xDst + dst_xoff,
+  yDst + dst_yoff,
+  1, shapes + i * shape_size);
}
}
else
@@ -221,16 +226,58 @@ fbTrapezoids (CARD8   op,
format = PIXMAN_a8;
break;
}
-
-   pixman_composite_trapezoids (op, src, dst, format,
-xSrc + src_xoff,
-ySrc + src_yoff,
-x_dst + dst_xoff,
-y_dst + dst_yoff,
-ntrap, (pixman_trapezoid_t *)traps);
+   
+   composite (op, src, dst, format,
+  xSrc + src_xoff,
+  ySrc + src_yoff

[Pixman] [PATCH 3/3] Implement fbAddTriangles() in terms of pixman_add_triangles().

2011-02-11 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This allows the remaining triangle-to-trap conversion code to be
deleted.

Signed-off-by: Søren Sandmann s...@redhat.com
---
 fb/fbtrap.c |   91 ++-
 1 files changed, 9 insertions(+), 82 deletions(-)

diff --git a/fb/fbtrap.c b/fb/fbtrap.c
index 3b197b4..2554fcc 100644
--- a/fb/fbtrap.c
+++ b/fb/fbtrap.c
@@ -65,32 +65,6 @@ fbRasterizeTrapezoid (PicturePtrpPicture,
 free_pixman_pict (pPicture, image);
 }
 
-static int
-_GreaterY (xPointFixed *a, xPointFixed *b)
-{
-if (a-y == b-y)
-   return a-x  b-x;
-return a-y  b-y;
-}
-
-/*
- * Note that the definition of this function is a bit odd because
- * of the X coordinate space (y increasing downwards).
- */
-static int
-_Clockwise (xPointFixed *ref, xPointFixed *a, xPointFixed *b)
-{
-xPointFixedad, bd;
-
-ad.x = a-x - ref-x;
-ad.y = a-y - ref-y;
-bd.x = b-x - ref-x;
-bd.y = b-y - ref-y;
-
-return ((xFixed_32_32) bd.y * ad.x - (xFixed_32_32) ad.y * bd.x)  0;
-}
-
-/* FIXME -- this could be made more efficient */
 void
 fbAddTriangles (PicturePtr  pPicture,
INT16   x_off,
@@ -98,63 +72,16 @@ fbAddTriangles (PicturePtr  pPicture,
int ntri,
xTriangle *tris)
 {
-xPointFixed  *top, *left, *right, *tmp;
-xTrapezoid trap;
+int image_xoff, image_yoff;
+pixman_image_t *image =
+   image_from_pict (pPicture, FALSE, image_xoff, image_yoff);
 
-for (; ntri; ntri--, tris++)
-{
-   top = tris-p1;
-   left = tris-p2;
-   right = tris-p3;
-   if (_GreaterY (top, left)) {
-   tmp = left; left = top; top = tmp;
-   }
-   if (_GreaterY (top, right)) {
-   tmp = right; right = top; top = tmp;
-   }
-   if (_Clockwise (top, right, left)) {
-   tmp = right; right = left; left = tmp;
-   }
-   
-   /*
-* Two cases:
-*
-*  +   +
-* / \ / \
-*/   \   /   \
-*   / + + \
-*  /--   --\
-* /   --   --   \
-*/ ---   --- \
-*   +-- --+
-*/
-   
-   trap.top = top-y;
-   trap.left.p1 = *top;
-   trap.left.p2 = *left;
-   trap.right.p1 = *top;
-   trap.right.p2 = *right;
-   if (right-y  left-y)
-   trap.bottom = right-y;
-   else
-   trap.bottom = left-y;
-   fbRasterizeTrapezoid (pPicture, trap, x_off, y_off);
-   if (right-y  left-y)
-   {
-   trap.top = right-y;
-   trap.bottom = left-y;
-   trap.right.p1 = *right;
-   trap.right.p2 = *left;
-   }
-   else
-   {
-   trap.top = left-y;
-   trap.bottom = right-y;
-   trap.left.p1 = *left;
-   trap.left.p2 = *right;
-   }
-   fbRasterizeTrapezoid (pPicture, trap, x_off, y_off);
-}
+if (!image)
+   return;
+
+pixman_add_triangles (image, x_off, y_off, ntri, (pixman_triangle_t 
*)tris);
+
+free_pixman_pict (pPicture, image);
 }
 
 typedef void (* CompositeShapesFunc) (pixman_op_t op,
-- 
1.7.3.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH 1/3] Add pixman_composite_trapezoids().

2011-01-12 Thread Søren Sandmann Pedersen
This function is an implementation of the X server request
Trapezoids. That request is what the X backend of cairo is using all
the time; by moving it into pixman we can hopefully make it faster.
---
 pixman/pixman-trap.c |   87 ++
 pixman/pixman.h  |   12 ++-
 2 files changed, 98 insertions(+), 1 deletions(-)

diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index 8353992..a924326 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -390,3 +390,90 @@ pixman_rasterize_trapezoid (pixman_image_t *  
image,
pixman_rasterize_edges (image, l, r, t, b);
 }
 }
+
+PIXMAN_EXPORT void
+pixman_composite_trapezoids (pixman_op_t   op,
+pixman_image_t *   src,
+pixman_image_t *   dst,
+pixman_format_code_t   mask_format,
+intx_src,
+inty_src,
+intx_dst,
+inty_dst,
+intn_traps,
+pixman_trapezoid_t *   traps)
+{
+pixman_image_t *tmp;
+pixman_box32_t box;
+int i;
+int x_rel, y_rel;
+
+if (n_traps = 0)
+   return;
+
+_pixman_image_validate (src);
+_pixman_image_validate (dst);
+
+box.x1 = INT32_MAX;
+box.y1 = INT32_MAX;
+box.x2 = INT32_MIN;
+box.y2 = INT32_MIN;
+
+for (i = 0; i  n_traps; ++i)
+{
+   pixman_trapezoid_t *trap = (traps[i]);
+   int y1, y2;
+
+   if (!pixman_trapezoid_valid (trap))
+   continue;
+   
+   y1 = pixman_fixed_to_int (trap-top);
+   if (y1  box.y1)
+   box.y1 = y1;
+   
+   y2 = pixman_fixed_to_int (pixman_fixed_ceil (trap-bottom));
+   if (y2  box.y2)
+   box.y2 = y2;
+
+#define EXTEND_MIN(x)  \
+   if (pixman_fixed_to_int ((x))  box.x1) \
+   box.x1 = pixman_fixed_to_int ((x));
+#define EXTEND_MAX(x)  \
+   if (pixman_fixed_to_int (pixman_fixed_ceil ((x)))  box.x2) \
+   box.x2 = pixman_fixed_to_int (pixman_fixed_ceil ((x)));
+
+#define EXTEND(x)  \
+   EXTEND_MIN(x);  \
+   EXTEND_MAX(x);
+
+   EXTEND(trap-left.p1.x);
+   EXTEND(trap-left.p2.x);
+   EXTEND(trap-right.p1.x);
+   EXTEND(trap-right.p2.x);
+}
+
+if (box.x1 = box.x2 || box.y1 = box.y2)
+   return;
+
+tmp = pixman_image_create_bits (
+   mask_format, box.x2 - box.x1, box.y2 - box.y1, NULL, -1);
+   
+for (i = 0; i  n_traps; ++i)
+{
+   pixman_trapezoid_t *trap = (traps[i]);
+
+   if (!pixman_trapezoid_valid (trap))
+   continue;
+
+   pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
+}
+
+x_rel = box.x1 + x_src - x_dst;
+y_rel = box.y1 + y_src - y_dst;
+
+pixman_image_composite (op, src, tmp, dst,
+   x_rel, y_rel, 0, 0, box.x1, box.y1,
+   box.x2 - box.x1, box.y2 - box.y1);
+
+pixman_image_unref (tmp);
+}
diff --git a/pixman/pixman.h b/pixman/pixman.h
index b95d0e9..c2f7da3 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -950,7 +950,17 @@ void   pixman_rasterize_trapezoid  (pixman_image_t 
   *image,
const pixman_trapezoid_t  *trap,
intx_off,
inty_off);
-
+void  pixman_composite_trapezoids (pixman_op_top,
+  pixman_image_t *src,
+  pixman_image_t *dst,
+  pixman_format_code_t
mask_format,
+  int x_src,
+  int y_src,
+  int x_dst,
+  int y_dst,
+  int n_traps,
+  pixman_trapezoid_t *traps);
+
 PIXMAN_END_DECLS
 
 #endif /* PIXMAN_H__ */
-- 
1.7.3.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH] Only try to compute the FAST_SAMPLES_COVER_CLIP for bits images

2010-08-16 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

It doesn't make sense in other cases, and the computation would make
use of image-bits.{width,height} which lead to uninitialized memory
accesses when the image wasn't of type BITS.
---
 pixman/pixman.c |   17 ++---
 1 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/pixman/pixman.c b/pixman/pixman.c
index 55c5981..ddd4935 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -787,14 +787,17 @@ analyze_extent (pixman_image_t *image, int x, int y,
 if (!compute_sample_extents (transform, ex, x, y, x_off, y_off, width, 
height))
return FALSE;
 
-/* Check whether the non-expanded, transformed extent is entirely within
- * the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
- */
-ex = *extents;
-if (compute_sample_extents (transform, ex, x, y, x_off, y_off, width, 
height))
+if (image-type == BITS)
 {
-   if (ex.x1 = 0  ex.y1 = 0  ex.x2 = image-bits.width  ex.y2 = 
image-bits.height)
-   *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+   /* Check whether the non-expanded, transformed extent is entirely within
+* the source image, and set the FAST_PATH_SAMPLES_COVER_CLIP if it is.
+*/
+   ex = *extents;
+   if (compute_sample_extents (transform, ex, x, y, x_off, y_off, width, 
height))
+   {
+   if (ex.x1 = 0  ex.y1 = 0  ex.x2 = image-bits.width  ex.y2 
= image-bits.height)
+   *flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+   }
 }
 
 return TRUE;
-- 
1.7.1.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


Re: [Pixman] [PATCH] ARM: NEON: don't hit general path for r5g6b5 OVER r5g6b5 operation

2010-08-09 Thread Søren Sandmann Pedersen
  Would it be possible instead to add a new flag OPAQUE_SAMPLES
  that would be set whenever the image format is opaque, and then
  use it along with SAMPLES_COVER_CLIP to add the OPAQUE flag before
  strength reducing the operator?
 
  That would help all the backends, including the general one, and
  all the opaque image formats.
 
 OK, I'll try to see what can be done. I'm a bit worried about the
 applications using pixman in such a way that the pixels outside of
 the source image are also fetched and whether this will be handled
 efficiently with the new flag.

I think the clip analysis is finally to the point that this can be
done without introducing new bugs. See the following patch, and please
let me know if it doesn't work for the r5g6b5 OVER r5g6b5 case.


Thanks,
Soren



___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH] Introduce new FAST_PATH_SAMPLES_OPAQUE flag

2010-08-09 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This flag is set whenever the pixels of a bits image don't have an
alpha channel. Together with FAST_PATH_SAMPLES_COVER_CLIP it implies
that the image effectively is opaque, so we can do operator reductions
such as OVER-SRC.
---
 pixman/pixman-image.c   |   10 ++
 pixman/pixman-private.h |1 +
 pixman/pixman.c |   11 +++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 0b8bb3c..0e3601f 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -406,12 +406,14 @@ compute_image_info (pixman_image_t *image)
}
}
 
-   if (image-common.repeat != PIXMAN_REPEAT_NONE  

-   !PIXMAN_FORMAT_A (image-bits.format)   

+   if (!PIXMAN_FORMAT_A (image-bits.format)   

PIXMAN_FORMAT_TYPE (image-bits.format) != PIXMAN_TYPE_GRAY 

PIXMAN_FORMAT_TYPE (image-bits.format) != PIXMAN_TYPE_COLOR)
{
-   flags |= FAST_PATH_IS_OPAQUE;
+   flags |= FAST_PATH_SAMPLES_OPAQUE;
+
+   if (image-common.repeat != PIXMAN_REPEAT_NONE)
+   flags |= FAST_PATH_IS_OPAQUE;
}
 
if (source_image_needs_out_of_bounds_workaround (image-bits))
@@ -459,7 +461,7 @@ compute_image_info (pixman_image_t *image)
image-common.filter == PIXMAN_FILTER_CONVOLUTION   ||
image-common.component_alpha)
 {
-   flags = ~FAST_PATH_IS_OPAQUE;
+   flags = ~(FAST_PATH_IS_OPAQUE | FAST_PATH_SAMPLES_OPAQUE);
 }
 
 image-common.flags = flags;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index c4e6bb8..dedea0b 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -571,6 +571,7 @@ _pixman_choose_implementation (void);
 #define FAST_PATH_BILINEAR_FILTER  (1  20)
 #define FAST_PATH_NO_NORMAL_REPEAT (1  21)
 #define FAST_PATH_HAS_TRANSFORM(1  22)
+#define FAST_PATH_SAMPLES_OPAQUE   (1  23)
 
 #define FAST_PATH_PAD_REPEAT   \
 (FAST_PATH_NO_NONE_REPEAT  |   \
diff --git a/pixman/pixman.c b/pixman/pixman.c
index e79e135..55c5981 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -884,6 +884,17 @@ do_composite (pixman_op_t op,
 if (!analyze_extent (mask, dest_x - mask_x, dest_y - mask_y, extents, 
mask_flags))
goto out;
 
+/* If the clip is within the source samples, and the samples are opaque,
+ * then the source is effectively opaque.
+ */
+#define BOTH (FAST_PATH_SAMPLES_OPAQUE | FAST_PATH_SAMPLES_COVER_CLIP)
+
+if ((src_flags  BOTH) == BOTH)
+   src_flags |= FAST_PATH_IS_OPAQUE;
+
+if ((mask_flags  BOTH) == BOTH)
+   mask_flags |= FAST_PATH_IS_OPAQUE;
+
 /*
  * Check if we can replace our operator by a simpler one
  * if the src or dest are opaque. The output operator should be
-- 
1.7.1.1

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


Re: [Pixman] FAST_PATH_SAMPLES_COVER_CLIP flag fast_composite_scaled_nearest_*

2010-07-29 Thread Søren Sandmann Pedersen
Siarhei Siamashka siarhei.siamas...@gmail.com writes:

 Overall looks like a good fix, a few comments below.

Thanks for the comments. I'll send a new patch with a long commit log
as a follow-up to this message (provided I can make it work with
git-send-email), but I'll reply to some specifics below.

The main difference in the new patch is that the 16BIT_SAFE flag is
gone entirely, and instead pixman will simply bail out in that case.

  +if (!transform)
  +{
  +box-x1 = pixman_fixed_to_int (pixman_int_to_fixed (box-x1) + 
  pixman_fixed_1 / 2 + x_off);
  +box-y1 = pixman_fixed_to_int (pixman_int_to_fixed (box-y1) + 
  pixman_fixed_1 / 2 + y_off);
  +box-x2 = pixman_fixed_to_int (pixman_int_to_fixed (box-x2) - 
  pixman_fixed_1 / 2 + x_off) + width + 1;
  +box-y2 = pixman_fixed_to_int (pixman_int_to_fixed (box-y2) - 
  pixman_fixed_1 / 2 + y_off) + height + 1;
  +return TRUE;

 That's an interesting case. If I understand it correctly, without any 
 transform
 at all, both NEAREST and BILINEAR filters should introduce no changes in the
 bounds. This is fine for NEAREST, but not for BILINEAR filter which gets the
 bounds expanded by 1.

 The bilinear filter is a bit special, because the sampling of extra rightmost
 pixels may technically happen according to formulas, but they get multiplied 
 by
 zero anyway, so make no difference and are ignored by non-transformed fast
 paths.

The non-transformed fast paths certainly make this assumption, but the
general path will actually read these pixels, so we still have to
account for them. If we start seeing a lot of cases where images have
a bilinear filter, an identity transform, and we end up hitting the
general case, we could look into it, but I don't think this is a very
common case.

  +v[0].vector[0] = pixman_int_to_fixed (box-x1) + pixman_fixed_1 / 2;
  +v[0].vector[1] = pixman_int_to_fixed (box-y1) + pixman_fixed_1 / 2;
  +v[0].vector[2] = pixman_int_to_fixed (1);
  +
  +v[1].vector[0] = pixman_int_to_fixed (box-x2) - pixman_fixed_1 / 2;
  +v[1].vector[1] = pixman_int_to_fixed (box-y1) + pixman_fixed_1 / 2;
  +v[1].vector[2] = pixman_int_to_fixed (1);
  +
  +v[2].vector[0] = pixman_int_to_fixed (box-x2) - pixman_fixed_1 / 2;
  +v[2].vector[1] = pixman_int_to_fixed (box-y2) - pixman_fixed_1 / 2;
  +v[2].vector[2] = pixman_int_to_fixed (1);
  +
  +v[3].vector[0] = pixman_int_to_fixed (box-x1) + pixman_fixed_1 / 2;
  +v[3].vector[1] = pixman_int_to_fixed (box-y2) - pixman_fixed_1 / 2;
  +v[3].vector[2] = pixman_int_to_fixed (1);
  +
  +for (i = 0; i  4; ++i)
  +{
  +   if (!pixman_transform_point (transform, v[i]))
  +   return FALSE;

 Still what about the subtle differences between pixman_transform_point() and
 pixman_transform_point_3d()? They are not exactly the same. Transformed
 fetchers and fast path functions are all using pixman_transform_point_3d().

It's true that they are not exactly the same. In the new patch, I have
introduced a bit of slack in the computation of the source area (8 *
pixman_fixed_e). This is hopefully enough to account for any rounding
differences. The common special case where a NEAREST image is being
scaled so that the source area exactly matches the image will still
work because all we need in that case is for the computed bounds to be
within [0, 0.5] and 8 * pixman_fixed_1 is not even close to that.

It does mean of course that if you scale an image very slightly down,
the new code might decide to not set the FAST_PATH_SAMPLES_COVER_CLIP
flag, but I'm not that concerned about this.

I'd like to avoid relying on the two transformation functions being
exactly the same because I think it should be considered legitimate to
write compositing functions that transform in a different way if
that's more efficient, even if it means slightly different results.

 Another (minor) issue is that pixman_transform_point() has division 
 operations,
 which may be not very good for performance.

We can't really avoid the divisions if the flags are to be set
correctly when the transformation is projective. I realize we don't
actually make use of the flags in that case, but it's still unpleasent
to rely on the knowledge that there aren't any projective fast paths,
in a place that should only have knowledge about a particular image.

So basically, I think the flags should always be computed correctly,
even if we know that an incorrect computation won't have any ill
effects.

  +
  +   x1 = pixman_fixed_to_int (v[i].vector[0] + x_off);
  +   y1 = pixman_fixed_to_int (v[i].vector[1] + y_off);
  +   x2 = x1 + width + 1;
  +   y2 = y1 + height + 1;

 A minor performance improvement is possible. Addition of (width + 1) and
 (height + 1) to x2/y2 is done inside of the loop on each iteration here, 8
 times total. If done after the loop just before returning, it would be 2
 additions only.

The new patch is different is quite different, but it does 

[Pixman] [PATCH 1/2] Extend scaling-crash-test in various ways

2010-07-29 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

This extends scaling-crash-test to test some more things:

- All combinations of NEAREST/BILINEAR/CONVOLUTION filters and
  NORMAL/PAD/REFLECT repeat modes.

- Tests various scale factors very close to 1/7th such that the source
  area is very close to edge of the source image.

- The same things, only with scale factors very close to 1/32767th.

- Enables the commented-out tests for accessing memory outside the
  source buffer.

Also there is now a border around the source buffer which has a
different color than the source buffer itself so that if we sample
outside, it will show up.

Finally, the test now allows the destination buffer to not be changed
at all. This allows pixman to simply bail out in cases where the
transformation too strange.
---
 test/scaling-crash-test.c |  193 -
 1 files changed, 139 insertions(+), 54 deletions(-)

diff --git a/test/scaling-crash-test.c b/test/scaling-crash-test.c
index 4ab01e3..7a94115 100644
--- a/test/scaling-crash-test.c
+++ b/test/scaling-crash-test.c
@@ -8,117 +8,202 @@
  * We have a source image filled with solid color, set NORMAL or PAD repeat,
  * and some transform which results in nearest neighbour scaling.
  *
- * The expected result is the destination image filled with this solid
- * color.
+ * The expected result is either that the destination image filled with this 
solid
+ * color or, if the transformation is such that we can't composite anything at
+ * all, that nothing has changed in the destination.
+ *
+ * The surrounding memory of the source image is a different solid color so 
that
+ * we are sure to get failures if we access it.
  */
 static int
-do_test (int32_t   dst_size,
-int32_tsrc_size,
-int32_tsrc_offs,
-int32_tscale_factor,
-pixman_repeat_trepeat)
+run_test (int32_t  dst_width,
+ int32_t   dst_height,
+ int32_t   src_width,
+ int32_t   src_height,
+ int32_t   src_x,
+ int32_t   src_y,
+ int32_t   scale_x,
+ int32_t   scale_y,
+ pixman_filter_t   filter,
+ pixman_repeat_t   repeat)
 {
-int i;
 pixman_image_t *   src_img;
 pixman_image_t *   dst_img;
 pixman_transform_t transform;
 uint32_t * srcbuf;
 uint32_t * dstbuf;
+pixman_box32_t box = { 0, 0, src_width, src_height };
+pixman_color_t color_cc = { 0x, 0x, 0x, 0x };
+int result;
+int i;
 
-srcbuf = (uint32_t *)malloc (src_size * 4);
-dstbuf = (uint32_t *)malloc (dst_size * 4);
+static const pixman_fixed_t kernel[] =
+{
+#define D(f)   (pixman_double_to_fixed (f) + 0x0001)
+
+   pixman_int_to_fixed (5),
+   pixman_int_to_fixed (5),
+   D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0),
+   D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0),
+   D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0),
+   D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0),
+   D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0), D(1/25.0)
+};
+
+result = 0;
 
-/* horizontal test */
-memset (srcbuf, 0xCC, src_size * 4);
-memset (dstbuf, 0x33, dst_size * 4);
+srcbuf = (uint32_t *)malloc ((src_width + 10) * (src_height + 10) * 4);
+dstbuf = (uint32_t *)malloc (dst_width * dst_height * 4);
+
+memset (srcbuf, 0x88, src_width * src_height * 4);
+memset (dstbuf, 0x33, dst_width * dst_height * 4);
 
 src_img = pixman_image_create_bits (
-PIXMAN_a8r8g8b8, src_size, 1, srcbuf, src_size * 4);
+PIXMAN_a8r8g8b8, src_width, src_height,
+   srcbuf + (src_width + 10) * 5 + 5, (src_width + 10) * 4);
+
+pixman_image_fill_boxes (PIXMAN_OP_SRC, src_img, color_cc, 1, box);
+
 dst_img = pixman_image_create_bits (
-PIXMAN_a8r8g8b8, dst_size, 1, dstbuf, dst_size * 4);
+PIXMAN_a8r8g8b8, dst_width, dst_height, dstbuf, dst_width * 4);
 
-pixman_transform_init_scale (transform, scale_factor, 65536);
+pixman_transform_init_scale (transform, scale_x, scale_y);
 pixman_image_set_transform (src_img, transform);
 pixman_image_set_repeat (src_img, repeat);
-pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+if (filter == PIXMAN_FILTER_CONVOLUTION)
+   pixman_image_set_filter (src_img, filter, kernel, 27);
+else
+   pixman_image_set_filter (src_img, filter, NULL, 0);
 
 pixman_image_composite (PIXMAN_OP_SRC, src_img, NULL, dst_img,
-src_offs, 0, 0, 0, 0, 0, dst_size, 1);
+src_x, src_y, 0, 0, 0, 0, dst_width, dst_height);
 
 pixman_image_unref (src_img);
 pixman_image_unref (dst_img);
 
-for (i = 0; i  dst_size; i++)
+for (i = 0; i  dst_width

[Pixman] [PATCH] Cache the implementation along with the fast paths.

2010-07-10 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

When calling a fast path, we need to pass the corresponding
implementation since it might contain information necessary to run the
fast path.
---
 pixman/pixman.c |   26 --
 1 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/pixman/pixman.c b/pixman/pixman.c
index b76143f..4dfd3ae 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -563,7 +563,11 @@ compute_src_extents_flags (pixman_image_t *image,
 
 typedef struct
 {
-pixman_fast_path_t cache [N_CACHED_FAST_PATHS];
+struct
+{
+   pixman_implementation_t *   imp;
+   pixman_fast_path_t  fast_path;
+} cache [N_CACHED_FAST_PATHS];
 } cache_t;
 
 PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
@@ -667,7 +671,7 @@ do_composite (pixman_implementation_t *imp,
 
 for (i = 0; i  N_CACHED_FAST_PATHS; ++i)
 {
-   info = (cache-cache[i]);
+   info = (cache-cache[i].fast_path);
 
/* Note that we check for equality here, not whether
 * the cached fast path matches. This is to prevent
@@ -683,6 +687,7 @@ do_composite (pixman_implementation_t *imp,
info-dest_flags == dest_flags  
info-func)
{
+   imp = cache-cache[i].imp;
goto found;
}
 }
@@ -745,14 +750,15 @@ found:
while (i--)
cache-cache[i + 1] = cache-cache[i];
 
-   cache-cache[0].op = op;
-   cache-cache[0].src_format = src_format;
-   cache-cache[0].src_flags = src_flags;
-   cache-cache[0].mask_format = mask_format;
-   cache-cache[0].mask_flags = mask_flags;
-   cache-cache[0].dest_format = dest_format;
-   cache-cache[0].dest_flags = dest_flags;
-   cache-cache[0].func = func;
+   cache-cache[0].imp = imp;
+   cache-cache[0].fast_path.op = op;
+   cache-cache[0].fast_path.src_format = src_format;
+   cache-cache[0].fast_path.src_flags = src_flags;
+   cache-cache[0].fast_path.mask_format = mask_format;
+   cache-cache[0].fast_path.mask_flags = mask_flags;
+   cache-cache[0].fast_path.dest_format = dest_format;
+   cache-cache[0].fast_path.dest_flags = dest_flags;
+   cache-cache[0].fast_path.func = func;
 }
 
 out:
-- 
1.6.0.6

___
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman


[Pixman] [PATCH] Split the fast path caching into its own force_inline function

2010-07-10 Thread Søren Sandmann Pedersen
From: Søren Sandmann Pedersen s...@redhat.com

The do_composite() function is a lot more readable this way.
---
 pixman/pixman.c |  200 +-
 1 files changed, 107 insertions(+), 93 deletions(-)

diff --git a/pixman/pixman.c b/pixman/pixman.c
index 4dfd3ae..80a766a 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -572,9 +572,105 @@ typedef struct
 
 PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
 
+static force_inline void
+lookup_composite_function (pixman_op_t op,
+  pixman_format_code_t src_format,
+  uint32_t src_flags,
+  pixman_format_code_t mask_format,
+  uint32_t mask_flags,
+  pixman_format_code_t dest_format,
+  uint32_t dest_flags,
+  pixman_implementation_t**out_imp,
+  pixman_composite_func_t *out_func)
+{
+pixman_implementation_t *imp;
+cache_t *cache;
+int i;
+
+/* Check cache for fast paths */
+cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
+
+for (i = 0; i  N_CACHED_FAST_PATHS; ++i)
+{
+   const pixman_fast_path_t *info = (cache-cache[i].fast_path);
+
+   /* Note that we check for equality here, not whether
+* the cached fast path matches. This is to prevent
+* us from selecting an overly general fast path
+* when a more specific one would work.
+*/
+   if (info-op == op  
+   info-src_format == src_format  
+   info-mask_format == mask_format
+   info-dest_format == dest_format
+   info-src_flags == src_flags
+   info-mask_flags == mask_flags  
+   info-dest_flags == dest_flags  
+   info-func)
+   {
+   *out_imp = cache-cache[i].imp;
+   *out_func = cache-cache[i].fast_path.func;
+
+   goto update_cache;
+   }
+}
+
+for (imp = get_implementation (); imp != NULL; imp = imp-delegate)
+{
+   const pixman_fast_path_t *info = imp-fast_paths;
+
+   while (info-op != PIXMAN_OP_NONE)
+   {
+   if ((info-op == op || info-op == PIXMAN_OP_any)   
+   /* Formats */
+   ((info-src_format == src_format) ||
+(info-src_format == PIXMAN_any))  
+   ((info-mask_format == mask_format) ||
+(info-mask_format == PIXMAN_any)) 
+   ((info-dest_format == dest_format) ||
+(info-dest_format == PIXMAN_any)) 
+   /* Flags */
+   (info-src_flags  src_flags) == info-src_flags
+   (info-mask_flags  mask_flags) == info-mask_flags 
+   (info-dest_flags  dest_flags) == info-dest_flags)
+   {
+   *out_imp = imp;
+   *out_func = info-func;
+
+   /* Set i to the last spot in the cache so that the
+* move-to-front code below will work
+*/
+   i = N_CACHED_FAST_PATHS - 1;
+
+   goto update_cache;
+   }
+
+   ++info;
+   }
+}
+return;
+
+update_cache:
+if (i)
+{
+   while (i--)
+   cache-cache[i + 1] = cache-cache[i];
+
+   cache-cache[0].imp = *out_imp;
+   cache-cache[0].fast_path.op = op;
+   cache-cache[0].fast_path.src_format = src_format;
+   cache-cache[0].fast_path.src_flags = src_flags;
+   cache-cache[0].fast_path.mask_format = mask_format;
+   cache-cache[0].fast_path.mask_flags = mask_flags;
+   cache-cache[0].fast_path.dest_format = dest_format;
+   cache-cache[0].fast_path.dest_flags = dest_flags;
+   cache-cache[0].fast_path.func = *out_func;
+}
+}
+
+
 static void
-do_composite (pixman_implementation_t *imp,
- pixman_op_t  op,
+do_composite (pixman_op_t op,
  pixman_image_t  *src,
  pixman_image_t  *mask,
  pixman_image_t  *dest,
@@ -598,9 +694,8 @@ do_composite (pixman_implementation_t *imp,
 uint32_t *dest_bits;
 int dest_dx, dest_dy;
 pixman_bool_t need_workaround;
-const pixman_fast_path_t *info;
-cache_t *cache;
-int i;
+pixman_implementation_t *imp;
+pixman_composite_func_t func;
 
 src_format = src-common.extended_format_code;
 src_flags = src-common.flags;
@@ -666,71 +761,12 @@ do_composite (pixman_implementation_t *imp,
 if (op == PIXMAN_OP_DST)
return;
 
-/* Check cache for fast paths */
-cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
-
-for (i = 0; i  N_CACHED_FAST_PATHS; ++i)
-{
-   info = (cache-cache[i].fast_path