Module Name: src Committed By: riastradh Date: Sat Jul 25 22:45:10 UTC 2020
Modified Files: src/sys/crypto/aes/arch/x86: immintrin.h Log Message: Add some Intel intrinsics for ChaCha. _mm_load1_ps _mm_loadu_si128 _mm_movelh_ps _mm_slli_epi32 _mm_storeu_si128 _mm_unpackhi_epi32 _mm_unpacklo_epi32 To generate a diff of this commit: cvs rdiff -u -r1.4 -r1.5 src/sys/crypto/aes/arch/x86/immintrin.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/crypto/aes/arch/x86/immintrin.h diff -u src/sys/crypto/aes/arch/x86/immintrin.h:1.4 src/sys/crypto/aes/arch/x86/immintrin.h:1.5 --- src/sys/crypto/aes/arch/x86/immintrin.h:1.4 Sat Jul 25 22:44:32 2020 +++ src/sys/crypto/aes/arch/x86/immintrin.h Sat Jul 25 22:45:10 2020 @@ -1,4 +1,4 @@ -/* $NetBSD: immintrin.h,v 1.4 2020/07/25 22:44:32 riastradh Exp $ */ +/* $NetBSD: immintrin.h,v 1.5 2020/07/25 22:45:10 riastradh Exp $ */ /*- * Copyright (c) 2020 The NetBSD Foundation, Inc. @@ -103,6 +103,20 @@ _mm_add_epi32(__m128i __a, __m128i __b) #endif _INTRINSATTR +static __inline __m128 +_mm_load1_ps(const float *__p) +{ + return __extension__ (__m128)(__v4sf) { *__p, *__p, *__p, *__p }; +} + +_INTRINSATTR +static __inline __m128i +_mm_loadu_si128(const __m128i_u *__p) +{ + return ((const struct { __m128i_u __v; } _PACKALIAS *)__p)->__v; +} + +_INTRINSATTR static __inline __m128i _mm_loadu_si32(const void *__p) { @@ -132,8 +146,18 @@ _mm_movehl_ps(__m128 __v0, __m128 __v1) #if defined(__GNUC__) && !defined(__clang__) return (__m128)__builtin_ia32_movhlps((__v4sf)__v0, (__v4sf)__v1); #elif defined(__clang__) - return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, - 6, 7, 2, 3); + return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 6,7,2,3); +#endif +} + +_INTRINSATTR +static __inline __m128 +_mm_movelh_ps(__m128 __v0, __m128 __v1) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128)__builtin_ia32_movlhps((__v4sf)__v0, (__v4sf)__v1); +#elif defined(__clang__) + return __builtin_shufflevector((__v4sf)__v0, (__v4sf)__v1, 0,1,4,5); #endif } @@ -205,6 +229,13 @@ _mm_shuffle_epi8(__m128i __vtbl, __m128i _INTRINSATTR static __inline __m128i +_mm_slli_epi32(__m128i __v, uint8_t __bits) +{ + return (__m128i)__builtin_ia32_pslldi128((__v4si)__v, (int)__bits); +} + +_INTRINSATTR +static __inline __m128i _mm_slli_epi64(__m128i __v, uint8_t __bits) { return (__m128i)__builtin_ia32_psllqi128((__v2di)__v, (int)__bits); @@ -245,6 +276,13 @@ _mm_srli_epi64(__m128i __v, uint8_t __bi _INTRINSATTR static __inline void +_mm_storeu_si128(__m128i_u *__p, __m128i __v) +{ + ((struct { __m128i_u __v; } _PACKALIAS *)__p)->__v = __v; +} + +_INTRINSATTR +static __inline void _mm_storeu_si32(void *__p, __m128i __v) { ((struct { int32_t __v; } _PACKALIAS *)__p)->__v = ((__v4si)__v)[0]; @@ -273,6 +311,32 @@ _mm_sub_epi64(__m128i __x, __m128i __y) _INTRINSATTR static __inline __m128i +_mm_unpackhi_epi32(__m128i __lo, __m128i __hi) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128i)__builtin_ia32_punpckhdq128((__v4si)__lo, + (__v4si)__hi); +#elif defined(__clang__) + return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi, + 2,6,3,7); +#endif +} + +_INTRINSATTR +static __inline __m128i +_mm_unpacklo_epi32(__m128i __lo, __m128i __hi) +{ +#if defined(__GNUC__) && !defined(__clang__) + return (__m128i)__builtin_ia32_punpckldq128((__v4si)__lo, + (__v4si)__hi); +#elif defined(__clang__) + return (__m128i)__builtin_shufflevector((__v4si)__lo, (__v4si)__hi, + 0,4,1,5); +#endif +} + +_INTRINSATTR +static __inline __m128i _mm_unpacklo_epi64(__m128i __lo, __m128i __hi) { #if defined(__GNUC__) && !defined(__clang__)