On Sunday 27 November 2016, Marc Glisse wrote: > On Sat, 26 Nov 2016, Allan Sandfeld Jensen wrote: > > Use the recently introduced unaligned variant of __m128i and add a > > similar __m64 and use those to make it clear these two intrinsics > > require neither 128- bit nor 64-bit alignment. > > Thanks for doing this. You'll want Uros or Kirill to review your patch. > There are probably several more places that could do with an unaligned > fix, but we don't have to find them all at once. > First I found it strange to use __m64, but then it actually seems like a > good call to use a type that is not just aligned(1) but also may_alias. > > + *(__m64_u *)__P = __m64(((__v2di)__B)[0]); > > gcc complains about this syntax for me, it wants parentheses around > __m64... Did it pass the testsuite for you?
Fixed, it now matches the move just below.
Index: gcc/config/i386/emmintrin.h =================================================================== --- gcc/config/i386/emmintrin.h (revision 242892) +++ gcc/config/i386/emmintrin.h (working copy) @@ -703,9 +703,9 @@ } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_loadl_epi64 (__m128i const *__P) +_mm_loadl_epi64 (__m128i_u const *__P) { - return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); + return _mm_set_epi64 ((__m64)0LL, *(__m64_u *)__P); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) @@ -721,9 +721,9 @@ } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_storel_epi64 (__m128i *__P, __m128i __B) +_mm_storel_epi64 (__m128i_u *__P, __m128i __B) { - *(long long *)__P = ((__v2di)__B)[0]; + *(__m64_u *)__P = (__m64) ((__v2di)__B)[0]; } extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) Index: gcc/config/i386/mmintrin.h =================================================================== --- gcc/config/i386/mmintrin.h (revision 242892) +++ gcc/config/i386/mmintrin.h (working copy) @@ -37,6 +37,9 @@ vector types, and their scalar components. */ typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__)); +/* Unaligned version of the same type */ +typedef int __m64_u __attribute__ ((__vector_size__ (8), __may_alias__, __aligned__ (1))); + /* Internal data types for implementing the intrinsics. */ typedef int __v2si __attribute__ ((__vector_size__ (8))); typedef short __v4hi __attribute__ ((__vector_size__ (8)));