The compiler (gcc version 4.5.0 20100312 (experimental) (GCC)) crashes when compiling the attached code with the following options:
g++ -std=gnu++0x -Wno-pmf-conversions -fno-deduce-init-list -g -Wall -Werror -Wno-unused -fno-lto -msse -msse2 -mfpmath=sse -march=native -mtune=native -fomit-frame-pointer -ggdb -shared-libgcc report.cpp ------------------8<----------------------- #include <xmmintrin.h> #include <cstdint> static const __v4sf g_VecMinusTwo{ -2.0f, -2.0f, -2.0f, -2.0f }; namespace simd { template <std::uint8_t M> __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4si pshufd(__v4si a) { return __builtin_ia32_pshufd(a, M); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf addps(__v4sf a, __v4sf b) { return __builtin_ia32_addps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf subps(__v4sf a, __v4sf b) { return __builtin_ia32_subps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf mulps(__v4sf a, __v4sf b) { return __builtin_ia32_mulps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf rsqrtps(__v4sf a) { return __builtin_ia32_rsqrtps(a); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf andps(__v4sf a, __v4sf b) { return __builtin_ia32_andps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf rsqrtss(__v4sf a) { return __builtin_ia32_rsqrtss(a); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf mulss(__v4sf a, __v4sf b) { return __builtin_ia32_mulss(a, b); } template <std::uint8_t M> __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf shufps(__v4sf a, __v4sf b) { return __builtin_ia32_shufps(a, b, M); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf andnps(__v4sf a, __v4sf b) { return __builtin_ia32_andnps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf orps(__v4sf a, __v4sf b) { return __builtin_ia32_orps(a, b); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf rcpps(__v4sf a) { return __builtin_ia32_rcpps(a); } __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf rcpss(__v4sf a) { return __builtin_ia32_rcpss(a); } template <std::uint8_t N> __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4si replicate(__v4si v) { return pshufd<(N | (N << 2) | (N << 4) | (N << 6))>(v); } template <std::uint8_t N> __attribute__((__always_inline__, __nothrow__, __const__)) inline __v4sf replicate(__v4sf v) { return (__v4sf) replicate<N>((__v4si) v); } } static __attribute__((__always_inline__)) __v4sf my_asin(__v4sf x) { static const __v4si g_Mask{ 0x7fffffff, 0x00000000, 0x7fffffff, 0x7fffffff }; __v4sf t; __v4sf u; __v4sf v; __v4sf r; u = simd::replicate<0>(x); u = simd::andps(u, (__v4sf) g_Mask); t = simd::mulps(u, __v4sf{ -1.0f, 0.0f, -0.1535779990f, 0.0f }); t = simd::addps(t, __v4sf{ 0.0f, 0.0f, 0.2836182315f, 0.0f }); t = simd::mulps(t, u); t = simd::addps(t, __v4sf{ 1.0f, 0.0f, -0.9315200116f, -2.144008022f }); r = simd::rsqrtss(t); u = simd::shufps<0b11100100>(r, u); t = simd::mulps(t, u); t = simd::addps(t, __v4sf{ 0.0f, 0.0f, -0.4089766186f, 1.103007131f }); t = simd::mulps(t, u); u = simd::mulss(u, __v4sf{ -0.5f, 0.0f, 0.0f, 0.0f }); t = simd::addps(t, __v4sf{ -3.0f, 1.0f, 1.507171600f, 1.507095111f }); t = simd::mulss(t, u); r = simd::rcpps(t); v = simd::mulps(t, r); v = simd::addps(v, g_VecMinusTwo); v = simd::mulps(v, r); t = simd::replicate<3>(t); t = simd::mulps(t, v); t = simd::replicate<2>(t); t = simd::subps(t, v); t = simd::andps((__v4sf) g_Mask, t); u = simd::andnps((__v4sf) g_Mask, x); t = simd::orps(t, u); return t; } ------------------8<----------------------- $ g++ -std=gnu++0x -Wno-pmf-conversions -fno-deduce-init-list -g -Wall -Werror -Wno-unused -fno-lto -msse -msse2 -mfpmath=sse -march=native -mtune=native -fom it-frame-pointer -ggdb -shared-libgcc report.cpp repo.cpp: In function 'float __vector[4] my_asin(float __vector[4])': repo.cpp:95:43: internal compiler error: Segmentation fault Please submit a full bug report, with preprocessed source if appropriate. See <http://gcc.gnu.org/bugs.html> for instructions. -- Summary: ICE during compiling SSE code Product: gcc Version: 4.5.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: piotr dot wyderski at gmail dot com GCC build triplet: i686-pc-cygwin GCC host triplet: i686-pc-cygwin GCC target triplet: i686-pc-cygwin http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43375