Given a pixel with only the red component of these values, the results are off-by-one.
0x03 -> 0x19 (0x18) 0x07 -> 0x3A (0x39) 0x18 -> 0xC5 (0xC6) 0x1C -> 0xE6 (0xE7) (Same for blue, and green has many more cases) It uses R8 = ( R5 * 527 + 23 ) >> 6; G8 = ( G6 * 259 + 33 ) >> 6; B8 = ( B5 * 527 + 23 ) >> 6; I don't guess there's a way to tweak this to produce the same results we get from expand565, is there? --- pixman/pixman-mmx.c | 58 +++++++++++++++++++++++++++++++++++++++++++++----- 1 files changed, 52 insertions(+), 6 deletions(-) diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c index 01a2bc9..74a2ad8 100644 --- a/pixman/pixman-mmx.c +++ b/pixman/pixman-mmx.c @@ -185,6 +185,13 @@ typedef struct mmxdatafield mmx_565_b; mmxdatafield mmx_packed_565_rb; mmxdatafield mmx_packed_565_g; + mmxdatafield mmx_expand_565_r; + mmxdatafield mmx_expand_565_g; + mmxdatafield mmx_expand_565_b; + mmxdatafield mmx_mul_adjust_565_rb; + mmxdatafield mmx_mul_adjust_565_g; + mmxdatafield mmx_add_adjust_565_rb; + mmxdatafield mmx_add_adjust_565_g; #ifndef USE_LOONGSON_MMI mmxdatafield mmx_mask_0; mmxdatafield mmx_mask_1; @@ -216,6 +223,13 @@ static const mmx_data_t c = MMXDATA_INIT (.mmx_565_b, 0x00000000000000f8), MMXDATA_INIT (.mmx_packed_565_rb, 0x00f800f800f800f8), MMXDATA_INIT (.mmx_packed_565_g, 0x0000fc000000fc00), + MMXDATA_INIT (.mmx_expand_565_r, 0xf800f800f800f800), + MMXDATA_INIT (.mmx_expand_565_g, 0x07e007e007e007e0), + MMXDATA_INIT (.mmx_expand_565_b, 0x001f001f001f001f), + MMXDATA_INIT (.mmx_mul_adjust_565_rb, 0x020f020f020f020f), + MMXDATA_INIT (.mmx_mul_adjust_565_g, 0x0103010301030103), + MMXDATA_INIT (.mmx_add_adjust_565_rb, 0x0017001700170017), + MMXDATA_INIT (.mmx_add_adjust_565_g, 0x0021002100210021), #ifndef USE_LOONGSON_MMI MMXDATA_INIT (.mmx_mask_0, 0xffffffffffff0000), MMXDATA_INIT (.mmx_mask_1, 0xffffffff0000ffff), @@ -518,6 +532,40 @@ expand565 (__m64 pixel, int pos) return _mm_srli_pi16 (pixel, 8); } +void +expand_4xpacked565 (__m64 vin, __m64 *vout0, __m64 *vout1) +{ + __m64 r = _mm_and_si64 (vin, MC (expand_565_r)); + __m64 g = _mm_and_si64 (vin, MC (expand_565_g)); + __m64 b = _mm_and_si64 (vin, MC (expand_565_b)); + + r = shift (r, -8 - 3); + g = shift (g, -3 - 2); + + r = _mm_mullo_pi16 (r, MC (mul_adjust_565_rb)); + g = _mm_mullo_pi16 (g, MC (mul_adjust_565_g)); + b = _mm_mullo_pi16 (b, MC (mul_adjust_565_rb)); + + r = _mm_adds_pu16 (r, MC (add_adjust_565_rb)); + g = _mm_adds_pu16 (g, MC (add_adjust_565_g)); + b = _mm_adds_pu16 (b, MC (add_adjust_565_rb)); + + r = _mm_srli_pi16 (r, 6); + g = _mm_srli_pi16 (g, 6); + b = _mm_srli_pi16 (b, 6); + + r = _mm_packs_pu16 (r, _mm_setzero_si64 ()); + g = _mm_packs_pu16 (g, _mm_setzero_si64 ()); + b = _mm_packs_pu16 (b, _mm_setzero_si64 ()); + + __m64 t0 = _mm_unpacklo_pi8 (b, g); + __m64 t1 = _mm_unpacklo_pi8 (r, _mm_cmpeq_pi32 (_mm_setzero_si64 (), + _mm_setzero_si64 ())); + + *vout0 = _mm_unpacklo_pi16 (t0, t1); + *vout1 = _mm_unpackhi_pi16 (t0, t1); +} + static force_inline __m64 expand8888 (__m64 in, int pos) { @@ -3341,14 +3389,12 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) while (w >= 4) { __m64 vsrc = ldq_u ((__m64 *)src); + __m64 mm0, mm1; - __m64 mm0 = expand565 (vsrc, 0); - __m64 mm1 = expand565 (vsrc, 1); - __m64 mm2 = expand565 (vsrc, 2); - __m64 mm3 = expand565 (vsrc, 3); + expand_4xpacked565 (vsrc, &mm0, &mm1); - *(__m64 *)(dst + 0) = _mm_or_si64 (pack8888 (mm0, mm1), MC (ff000000)); - *(__m64 *)(dst + 2) = _mm_or_si64 (pack8888 (mm2, mm3), MC (ff000000)); + *(__m64 *)(dst + 0) = mm0; + *(__m64 *)(dst + 2) = mm1; dst += 4; src += 4; -- 1.7.3.4 _______________________________________________ Pixman mailing list Pixman@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/pixman