[Pixman] [RFC] mmx: add and use expand_4xpacked565

Matt Turner Thu, 17 May 2012 13:34:35 -0700

Given a pixel with only the red component of these values, the results
are off-by-one.


0x03 -> 0x19 (0x18)
0x07 -> 0x3A (0x39)
0x18 -> 0xC5 (0xC6)
0x1C -> 0xE6 (0xE7)

(Same for blue, and green has many more cases)

It uses
R8 = ( R5 * 527 + 23 ) >> 6;
G8 = ( G6 * 259 + 33 ) >> 6;
B8 = ( B5 * 527 + 23 ) >> 6;

I don't guess there's a way to tweak this to produce the same results
we get from expand565, is there?

---
 pixman/pixman-mmx.c |   58 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 52 insertions(+), 6 deletions(-)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 01a2bc9..74a2ad8 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -185,6 +185,13 @@ typedef struct
     mmxdatafield mmx_565_b;
     mmxdatafield mmx_packed_565_rb;
     mmxdatafield mmx_packed_565_g;
+    mmxdatafield mmx_expand_565_r;
+    mmxdatafield mmx_expand_565_g;
+    mmxdatafield mmx_expand_565_b;
+    mmxdatafield mmx_mul_adjust_565_rb;
+    mmxdatafield mmx_mul_adjust_565_g;
+    mmxdatafield mmx_add_adjust_565_rb;
+    mmxdatafield mmx_add_adjust_565_g;
 #ifndef USE_LOONGSON_MMI
     mmxdatafield mmx_mask_0;
     mmxdatafield mmx_mask_1;
@@ -216,6 +223,13 @@ static const mmx_data_t c =
     MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
     MMXDATA_INIT (.mmx_packed_565_rb,            0x00f800f800f800f8),
     MMXDATA_INIT (.mmx_packed_565_g,             0x0000fc000000fc00),
+    MMXDATA_INIT (.mmx_expand_565_r,             0xf800f800f800f800),
+    MMXDATA_INIT (.mmx_expand_565_g,             0x07e007e007e007e0),
+    MMXDATA_INIT (.mmx_expand_565_b,             0x001f001f001f001f),
+    MMXDATA_INIT (.mmx_mul_adjust_565_rb,        0x020f020f020f020f),
+    MMXDATA_INIT (.mmx_mul_adjust_565_g,         0x0103010301030103),
+    MMXDATA_INIT (.mmx_add_adjust_565_rb,        0x0017001700170017),
+    MMXDATA_INIT (.mmx_add_adjust_565_g,         0x0021002100210021),
 #ifndef USE_LOONGSON_MMI
     MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
     MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
@@ -518,6 +532,40 @@ expand565 (__m64 pixel, int pos)
     return _mm_srli_pi16 (pixel, 8);
 }
 
+void
+expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1)
+{
+       __m64 r = _mm_and_si64 (vin, MC (expand_565_r));
+       __m64 g = _mm_and_si64 (vin, MC (expand_565_g));
+       __m64 b = _mm_and_si64 (vin, MC (expand_565_b));
+
+       r = shift (r, -8 - 3);
+       g = shift (g, -3 - 2);
+
+       r = _mm_mullo_pi16 (r, MC (mul_adjust_565_rb));
+       g = _mm_mullo_pi16 (g, MC (mul_adjust_565_g));
+       b = _mm_mullo_pi16 (b, MC (mul_adjust_565_rb));
+
+       r = _mm_adds_pu16 (r, MC (add_adjust_565_rb));
+       g = _mm_adds_pu16 (g, MC (add_adjust_565_g));
+       b = _mm_adds_pu16 (b, MC (add_adjust_565_rb));
+
+       r = _mm_srli_pi16 (r, 6);
+       g = _mm_srli_pi16 (g, 6);
+       b = _mm_srli_pi16 (b, 6);
+
+       r = _mm_packs_pu16 (r, _mm_setzero_si64 ());
+       g = _mm_packs_pu16 (g, _mm_setzero_si64 ());
+       b = _mm_packs_pu16 (b, _mm_setzero_si64 ());
+
+       __m64 t0 = _mm_unpacklo_pi8 (b, g);
+       __m64 t1 = _mm_unpacklo_pi8 (r, _mm_cmpeq_pi32 (_mm_setzero_si64 (),
+                                                       _mm_setzero_si64 ()));
+
+       *vout0 = _mm_unpacklo_pi16 (t0, t1);
+       *vout1 = _mm_unpackhi_pi16 (t0, t1);
+}
+
 static force_inline __m64
 expand8888 (__m64 in, int pos)
 {
@@ -3341,14 +3389,12 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t 
*mask)
     while (w >= 4)
     {
        __m64 vsrc = ldq_u ((__m64 *)src);
+       __m64 mm0, mm1;
 
-       __m64 mm0 = expand565 (vsrc, 0);
-       __m64 mm1 = expand565 (vsrc, 1);
-       __m64 mm2 = expand565 (vsrc, 2);
-       __m64 mm3 = expand565 (vsrc, 3);
+       expand_4xpacked565 (vsrc, &mm0, &mm1);
 
-       *(__m64 *)(dst + 0) = _mm_or_si64 (pack8888 (mm0, mm1), MC (ff000000));
-       *(__m64 *)(dst + 2) = _mm_or_si64 (pack8888 (mm2, mm3), MC (ff000000));
+       *(__m64 *)(dst + 0) = mm0;
+       *(__m64 *)(dst + 2) = mm1;
 
        dst += 4;
        src += 4;
-- 
1.7.3.4

_______________________________________________
Pixman mailing list
Pixman@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/pixman

[Pixman] [RFC] mmx: add and use expand_4xpacked565

Reply via email to