commit: b2ffd5b5e35dea8fd3c9dc8ab89bc20133024cdc Author: Stephan Hartmann <sultan <AT> gentoo <DOT> org> AuthorDate: Mon Oct 4 20:57:33 2021 +0000 Commit: Stephan Hartmann <sultan <AT> gentoo <DOT> org> CommitDate: Mon Oct 4 20:58:26 2021 +0000 URL: https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=b2ffd5b5
www-client/chromium: fix building without AVX2 Closes: https://bugs.gentoo.org/815487 Package-Manager: Portage-3.0.20, Repoman-3.0.3 Signed-off-by: Stephan Hartmann <sultan <AT> gentoo.org> www-client/chromium/chromium-95.0.4638.32.ebuild | 3 + www-client/chromium/chromium-96.0.4655.0.ebuild | 3 + .../chromium/files/chromium-95-eigen-avx-1.patch | 229 +++++++++++++++++++++ .../chromium/files/chromium-95-eigen-avx-2.patch | 30 +++ .../chromium/files/chromium-95-eigen-avx-3.patch | 44 ++++ 5 files changed, 309 insertions(+) diff --git a/www-client/chromium/chromium-95.0.4638.32.ebuild b/www-client/chromium/chromium-95.0.4638.32.ebuild index 15e8a266530..96e6f24a0b1 100644 --- a/www-client/chromium/chromium-95.0.4638.32.ebuild +++ b/www-client/chromium/chromium-95.0.4638.32.ebuild @@ -234,6 +234,9 @@ src_prepare() { "${FILESDIR}/chromium-93-EnumTable-crash.patch" "${FILESDIR}/chromium-93-InkDropHost-crash.patch" "${FILESDIR}/chromium-95-maldoca-zlib.patch" + "${FILESDIR}/chromium-95-eigen-avx-1.patch" + "${FILESDIR}/chromium-95-eigen-avx-2.patch" + "${FILESDIR}/chromium-95-eigen-avx-3.patch" "${FILESDIR}/chromium-use-oauth2-client-switches-as-default.patch" "${FILESDIR}/chromium-shim_headers.patch" ) diff --git a/www-client/chromium/chromium-96.0.4655.0.ebuild b/www-client/chromium/chromium-96.0.4655.0.ebuild index 15e18722e20..d53a4e5b63b 100644 --- a/www-client/chromium/chromium-96.0.4655.0.ebuild +++ b/www-client/chromium/chromium-96.0.4655.0.ebuild @@ -233,6 +233,9 @@ src_prepare() { "${WORKDIR}/patches" "${FILESDIR}/chromium-93-EnumTable-crash.patch" "${FILESDIR}/chromium-93-InkDropHost-crash.patch" + "${FILESDIR}/chromium-95-eigen-avx-1.patch" + "${FILESDIR}/chromium-95-eigen-avx-2.patch" + "${FILESDIR}/chromium-95-eigen-avx-3.patch" "${FILESDIR}/chromium-use-oauth2-client-switches-as-default.patch" "${FILESDIR}/chromium-shim_headers.patch" ) diff --git a/www-client/chromium/files/chromium-95-eigen-avx-1.patch b/www-client/chromium/files/chromium-95-eigen-avx-1.patch new file mode 100644 index 00000000000..21d520cc4d4 --- /dev/null +++ b/www-client/chromium/files/chromium-95-eigen-avx-1.patch @@ -0,0 +1,229 @@ +From 3d4ba855e014987cad86d62a8dff533492255695 Mon Sep 17 00:00:00 2001 +From: Antonio Sanchez <canton...@google.com> +Date: Wed, 1 Sep 2021 14:11:21 -0700 +Subject: [PATCH] Fix AVX integer packet issues. + +Most are instances of AVX2 functions not protected by +`EIGEN_VECTORIZE_AVX2`. There was also a missing semi-colon +for AVX512. +--- + Eigen/src/Core/arch/AVX/PacketMath.h | 83 ++++++++++++++++++------- + Eigen/src/Core/arch/AVX512/PacketMath.h | 6 +- + 2 files changed, 63 insertions(+), 26 deletions(-) + +diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +index dc1a1d6b0..247ee4efd 100644 +--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h ++++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +@@ -262,10 +262,6 @@ template<> EIGEN_STRONG_INLINE Packet4d peven_mask(const Packet4d& /*a*/) { retu + template<> EIGEN_STRONG_INLINE Packet8f pload1<Packet8f>(const float* from) { return _mm256_broadcast_ss(from); } + template<> EIGEN_STRONG_INLINE Packet4d pload1<Packet4d>(const double* from) { return _mm256_broadcast_sd(from); } + +-template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return _mm256_add_ps(_mm256_set1_ps(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } +-template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return _mm256_add_pd(_mm256_set1_pd(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } +-template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return _mm256_add_epi32(_mm256_set1_epi32(a), _mm256_set_epi32(7,6,5,4,3,2,1,0)); } +- + template<> EIGEN_STRONG_INLINE Packet8f padd<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_add_ps(a,b); } + template<> EIGEN_STRONG_INLINE Packet4d padd<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_add_pd(a,b); } + template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b) { +@@ -278,6 +274,10 @@ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const + #endif + } + ++template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return padd(pset1(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } ++template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return padd(pset1(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } ++template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return padd(pset1(a), _mm256_set_epi32(7,6,5,4,3,2,1,0)); } ++ + template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); } + template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); } + template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b) { +@@ -300,7 +300,7 @@ template<> EIGEN_STRONG_INLINE Packet4d pnegate(const Packet4d& a) + } + template<> EIGEN_STRONG_INLINE Packet8i pnegate(const Packet8i& a) + { +- return _mm256_sub_epi32(_mm256_set1_epi32(0), a); ++ return psub(pzero(a), a); + } + + template<> EIGEN_STRONG_INLINE Packet8f pconj(const Packet8f& a) { return a; } +@@ -419,7 +419,13 @@ template<> EIGEN_STRONG_INLINE Packet4d pmin<Packet4d>(const Packet4d& a, const + #endif + } + template<> EIGEN_STRONG_INLINE Packet8i pmin<Packet8i>(const Packet8i& a, const Packet8i& b) { ++#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_min_epi32(a, b); ++#else ++ __m128i lo = _mm_min_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0)); ++ __m128i hi = _mm_min_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1)); ++ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); ++#endif + } + + template<> EIGEN_STRONG_INLINE Packet8f pmax<Packet8f>(const Packet8f& a, const Packet8f& b) { +@@ -445,7 +451,13 @@ template<> EIGEN_STRONG_INLINE Packet4d pmax<Packet4d>(const Packet4d& a, const + #endif + } + template<> EIGEN_STRONG_INLINE Packet8i pmax<Packet8i>(const Packet8i& a, const Packet8i& b) { ++#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_max_epi32(a, b); ++#else ++ __m128i lo = _mm_max_epi32(_mm256_extractf128_si256(a, 0), _mm256_extractf128_si256(b, 0)); ++ __m128i hi = _mm_max_epi32(_mm256_extractf128_si256(a, 1), _mm256_extractf128_si256(b, 1)); ++ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); ++#endif + } + + // Add specializations for min/max with prescribed NaN progation. +@@ -641,17 +653,25 @@ template<> EIGEN_STRONG_INLINE Packet8f ploaddup<Packet8f>(const float* from) + // then we can perform a consistent permutation on the global register to get everything in shape: + return _mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2)); + } +-// Loads 2 doubles from memory a returns the packet {a0, a0 a1, a1} ++// Loads 2 doubles from memory a returns the packet {a0, a0, a1, a1} + template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from) + { + Packet4d tmp = _mm256_broadcast_pd((const __m128d*)(const void*)from); + return _mm256_permute_pd(tmp, 3<<2); + } +-// Loads 4 integers from memory a returns the packet {a0, a0 a1, a1, a2, a2, a3, a3} ++// Loads 4 integers from memory a returns the packet {a0, a0, a1, a1, a2, a2, a3, a3} + template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from) + { +- Packet8i a = _mm256_castsi128_si256(pload<Packet4i>(from)); ++#ifdef EIGEN_VECTORIZE_AVX2 ++ const Packet8i a = _mm256_castsi128_si256(pload<Packet4i>(from)); + return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3)); ++#else ++ __m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); ++ // mimic an "inplace" permutation of the lower 128bits using a blend ++ tmp = _mm256_blend_ps(tmp,_mm256_castps128_ps256(_mm_permute_ps( _mm256_castps256_ps128(tmp), _MM_SHUFFLE(1,0,1,0))), 15); ++ // then we can perform a consistent permutation on the global register to get everything in shape: ++ return _mm256_castps_si256(_mm256_permute_ps(tmp, _MM_SHUFFLE(3,3,2,2))); ++#endif + } + + // Loads 2 floats from memory a returns the packet {a0, a0 a0, a0, a1, a1, a1, a1} +@@ -662,7 +682,7 @@ template<> EIGEN_STRONG_INLINE Packet8f ploadquad<Packet8f>(const float* from) + } + template<> EIGEN_STRONG_INLINE Packet8i ploadquad<Packet8i>(const int* from) + { +- return _mm256_inserti128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from+1)), 1); ++ return _mm256_insertf128_si256(_mm256_set1_epi32(*from), _mm_set1_epi32(*(from+1)), 1); + } + + template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet8f& from) { EIGEN_DEBUG_ALIGNED_STORE _mm256_store_ps(to, from); } +@@ -723,13 +743,13 @@ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet4d>(double* to, + } + template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet8i>(int* to, const Packet8i& from, Index stride) + { +- __m128i low = _mm256_extracti128_si256(from, 0); ++ __m128i low = _mm256_extractf128_si256(from, 0); + to[stride*0] = _mm_extract_epi32(low, 0); + to[stride*1] = _mm_extract_epi32(low, 1); + to[stride*2] = _mm_extract_epi32(low, 2); + to[stride*3] = _mm_extract_epi32(low, 3); + +- __m128i high = _mm256_extracti128_si256(from, 1); ++ __m128i high = _mm256_extractf128_si256(from, 1); + to[stride*4] = _mm_extract_epi32(high, 0); + to[stride*5] = _mm_extract_epi32(high, 1); + to[stride*6] = _mm_extract_epi32(high, 2); +@@ -803,7 +823,13 @@ template<> EIGEN_STRONG_INLINE Packet4d pabs(const Packet4d& a) + } + template<> EIGEN_STRONG_INLINE Packet8i pabs(const Packet8i& a) + { ++#ifdef EIGEN_VECTORIZE_AVX2 + return _mm256_abs_epi32(a); ++#else ++ __m128i lo = _mm_abs_epi32(_mm256_extractf128_si256(a, 0)); ++ __m128i hi = _mm_abs_epi32(_mm256_extractf128_si256(a, 1)); ++ return _mm256_insertf128_si256(_mm256_castsi128_si256(lo), (hi), 1); ++#endif + } + + template<> EIGEN_STRONG_INLINE Packet8f pfrexp<Packet8f>(const Packet8f& a, Packet8f& exponent) { +@@ -989,16 +1015,27 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) { + #define MM256_SHUFFLE_EPI32(A, B, M) \ + _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M)) + ++#ifdef EIGEN_VECTORIZE_AVX2 ++#define MM256_UNPACKLO_EPI32(A, B) \ ++ _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B))) ++#define MM256_UNPACKHI_EPI32(A, B) \ ++ _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B))) ++#else ++#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_ps(A, B) ++#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_ps(A, B) ++#endif ++ ++ + EIGEN_DEVICE_FUNC inline void + ptranspose(PacketBlock<Packet8i,8>& kernel) { +- __m256i T0 = _mm256_unpacklo_epi32(kernel.packet[0], kernel.packet[1]); +- __m256i T1 = _mm256_unpackhi_epi32(kernel.packet[0], kernel.packet[1]); +- __m256i T2 = _mm256_unpacklo_epi32(kernel.packet[2], kernel.packet[3]); +- __m256i T3 = _mm256_unpackhi_epi32(kernel.packet[2], kernel.packet[3]); +- __m256i T4 = _mm256_unpacklo_epi32(kernel.packet[4], kernel.packet[5]); +- __m256i T5 = _mm256_unpackhi_epi32(kernel.packet[4], kernel.packet[5]); +- __m256i T6 = _mm256_unpacklo_epi32(kernel.packet[6], kernel.packet[7]); +- __m256i T7 = _mm256_unpackhi_epi32(kernel.packet[6], kernel.packet[7]); ++ __m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]); ++ __m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]); ++ __m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]); ++ __m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]); ++ __m256i T4 = MM256_UNPACKLO_EPI32(kernel.packet[4], kernel.packet[5]); ++ __m256i T5 = MM256_UNPACKHI_EPI32(kernel.packet[4], kernel.packet[5]); ++ __m256i T6 = MM256_UNPACKLO_EPI32(kernel.packet[6], kernel.packet[7]); ++ __m256i T7 = MM256_UNPACKHI_EPI32(kernel.packet[6], kernel.packet[7]); + __m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0)); + __m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2)); + __m256i S2 = MM256_SHUFFLE_EPI32(T1,T3,_MM_SHUFFLE(1,0,1,0)); +@@ -1019,10 +1056,10 @@ ptranspose(PacketBlock<Packet8i,8>& kernel) { + + EIGEN_DEVICE_FUNC inline void + ptranspose(PacketBlock<Packet8i,4>& kernel) { +- __m256i T0 = _mm256_unpacklo_epi32(kernel.packet[0], kernel.packet[1]); +- __m256i T1 = _mm256_unpackhi_epi32(kernel.packet[0], kernel.packet[1]); +- __m256i T2 = _mm256_unpacklo_epi32(kernel.packet[2], kernel.packet[3]); +- __m256i T3 = _mm256_unpackhi_epi32(kernel.packet[2], kernel.packet[3]); ++ __m256i T0 = MM256_UNPACKLO_EPI32(kernel.packet[0], kernel.packet[1]); ++ __m256i T1 = MM256_UNPACKHI_EPI32(kernel.packet[0], kernel.packet[1]); ++ __m256i T2 = MM256_UNPACKLO_EPI32(kernel.packet[2], kernel.packet[3]); ++ __m256i T3 = MM256_UNPACKHI_EPI32(kernel.packet[2], kernel.packet[3]); + + __m256i S0 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(1,0,1,0)); + __m256i S1 = MM256_SHUFFLE_EPI32(T0,T2,_MM_SHUFFLE(3,2,3,2)); +diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX512/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX512/PacketMath.h +index 6ce15c677..0810f66ee 100644 +--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX512/PacketMath.h ++++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX512/PacketMath.h +@@ -1028,7 +1028,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons + + // AVX512F does not define _mm512_extracti32x8_epi32 to extract _m256i from _m512i + #define EIGEN_EXTRACT_8i_FROM_16i(INPUT, OUTPUT) \ +- __m256i OUTPUT##_0 = _mm512_extracti32x8_epi32(INPUT, 0) \ ++ __m256i OUTPUT##_0 = _mm512_extracti32x8_epi32(INPUT, 0); \ + __m256i OUTPUT##_1 = _mm512_extracti32x8_epi32(INPUT, 1) + #else + #define EIGEN_EXTRACT_8f_FROM_16f(INPUT, OUTPUT) \ +@@ -1037,7 +1037,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons + _mm512_extractf32x4_ps(INPUT, 1), 1); \ + __m256 OUTPUT##_1 = _mm256_insertf128_ps( \ + _mm256_castps128_ps256(_mm512_extractf32x4_ps(INPUT, 2)), \ +- _mm512_extractf32x4_ps(INPUT, 3), 1); ++ _mm512_extractf32x4_ps(INPUT, 3), 1) + + #define EIGEN_EXTRACT_8i_FROM_16i(INPUT, OUTPUT) \ + __m256i OUTPUT##_0 = _mm256_insertf128_si256( \ +@@ -1045,7 +1045,7 @@ template<> EIGEN_STRONG_INLINE Packet8d pldexp<Packet8d>(const Packet8d& a, cons + _mm512_extracti32x4_epi32(INPUT, 1), 1); \ + __m256i OUTPUT##_1 = _mm256_insertf128_si256( \ + _mm256_castsi128_si256(_mm512_extracti32x4_epi32(INPUT, 2)), \ +- _mm512_extracti32x4_epi32(INPUT, 3), 1); ++ _mm512_extracti32x4_epi32(INPUT, 3), 1) + #endif + + #ifdef EIGEN_VECTORIZE_AVX512DQ +-- +GitLab + diff --git a/www-client/chromium/files/chromium-95-eigen-avx-2.patch b/www-client/chromium/files/chromium-95-eigen-avx-2.patch new file mode 100644 index 00000000000..1cb8007b6a6 --- /dev/null +++ b/www-client/chromium/files/chromium-95-eigen-avx-2.patch @@ -0,0 +1,30 @@ +From def145547fc6abd14236e103b9443a36064f664f Mon Sep 17 00:00:00 2001 +From: Antonio Sanchez <canton...@google.com> +Date: Thu, 2 Sep 2021 16:21:07 -0700 +Subject: [PATCH] Add missing packet types in pset1 call. + +Oops, introduced this when "fixing" integer packets. +--- + Eigen/src/Core/arch/AVX/PacketMath.h | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +index 247ee4efd..8da9031dc 100644 +--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h ++++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +@@ -274,9 +274,9 @@ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const + #endif + } + +-template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return padd(pset1(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } +-template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return padd(pset1(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } +-template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return padd(pset1(a), _mm256_set_epi32(7,6,5,4,3,2,1,0)); } ++template<> EIGEN_STRONG_INLINE Packet8f plset<Packet8f>(const float& a) { return padd(pset1<Packet8f>(a), _mm256_set_ps(7.0,6.0,5.0,4.0,3.0,2.0,1.0,0.0)); } ++template<> EIGEN_STRONG_INLINE Packet4d plset<Packet4d>(const double& a) { return padd(pset1<Packet4d>(a), _mm256_set_pd(3.0,2.0,1.0,0.0)); } ++template<> EIGEN_STRONG_INLINE Packet8i plset<Packet8i>(const int& a) { return padd(pset1<Packet8i>(a), _mm256_set_epi32(7,6,5,4,3,2,1,0)); } + + template<> EIGEN_STRONG_INLINE Packet8f psub<Packet8f>(const Packet8f& a, const Packet8f& b) { return _mm256_sub_ps(a,b); } + template<> EIGEN_STRONG_INLINE Packet4d psub<Packet4d>(const Packet4d& a, const Packet4d& b) { return _mm256_sub_pd(a,b); } +-- +GitLab + diff --git a/www-client/chromium/files/chromium-95-eigen-avx-3.patch b/www-client/chromium/files/chromium-95-eigen-avx-3.patch new file mode 100644 index 00000000000..44e9ef4e0ec --- /dev/null +++ b/www-client/chromium/files/chromium-95-eigen-avx-3.patch @@ -0,0 +1,44 @@ +From 7792b1e909a98703181aecb8810b4b654004c25d Mon Sep 17 00:00:00 2001 +From: Antonio Sanchez <canton...@google.com> +Date: Fri, 3 Sep 2021 10:41:35 -0700 +Subject: [PATCH] Fix AVX2 PacketMath.h. + +There were a couple typos ps -> epi32, and an unaligned load issue. +--- + Eigen/src/Core/arch/AVX/PacketMath.h | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +index 8da9031dc..41cb7af9c 100644 +--- a/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h ++++ b/third_party/eigen3/src/Eigen/src/Core/arch/AVX/PacketMath.h +@@ -663,7 +663,7 @@ template<> EIGEN_STRONG_INLINE Packet4d ploaddup<Packet4d>(const double* from) + template<> EIGEN_STRONG_INLINE Packet8i ploaddup<Packet8i>(const int* from) + { + #ifdef EIGEN_VECTORIZE_AVX2 +- const Packet8i a = _mm256_castsi128_si256(pload<Packet4i>(from)); ++ const Packet8i a = _mm256_castsi128_si256(ploadu<Packet4i>(from)); + return _mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 0, 1, 1, 2, 2, 3, 3)); + #else + __m256 tmp = _mm256_broadcast_ps((const __m128*)(const void*)from); +@@ -1015,14 +1015,14 @@ ptranspose(PacketBlock<Packet8f,4>& kernel) { + #define MM256_SHUFFLE_EPI32(A, B, M) \ + _mm256_castps_si256(_mm256_shuffle_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B), M)) + +-#ifdef EIGEN_VECTORIZE_AVX2 ++#ifndef EIGEN_VECTORIZE_AVX2 + #define MM256_UNPACKLO_EPI32(A, B) \ + _mm256_castps_si256(_mm256_unpacklo_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B))) + #define MM256_UNPACKHI_EPI32(A, B) \ + _mm256_castps_si256(_mm256_unpackhi_ps(_mm256_castsi256_ps(A), _mm256_castsi256_ps(B))) + #else +-#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_ps(A, B) +-#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_ps(A, B) ++#define MM256_UNPACKLO_EPI32(A, B) _mm256_unpacklo_epi32(A, B) ++#define MM256_UNPACKHI_EPI32(A, B) _mm256_unpackhi_epi32(A, B) + #endif + + +-- +GitLab +