The compiler (gcc version 4.5.0 20100312 (experimental) (GCC)) crashes when
compiling the attached code with the following options:

g++ -std=gnu++0x -Wno-pmf-conversions -fno-deduce-init-list -g -Wall -Werror
-Wno-unused -fno-lto -msse -msse2 -mfpmath=sse -march=native -mtune=native
-fomit-frame-pointer -ggdb -shared-libgcc report.cpp

------------------8<-----------------------

#include <xmmintrin.h>
#include <cstdint>

static const __v4sf g_VecMinusTwo{ -2.0f, -2.0f, -2.0f, -2.0f };

    namespace simd {

        template <std::uint8_t M> __attribute__((__always_inline__,
__nothrow__, __const__)) inline __v4si pshufd(__v4si a) {

            return __builtin_ia32_pshufd(a, M);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf addps(__v4sf a, __v4sf b) {

            return __builtin_ia32_addps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf subps(__v4sf a, __v4sf b) {

            return __builtin_ia32_subps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf mulps(__v4sf a, __v4sf b) {

            return __builtin_ia32_mulps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf rsqrtps(__v4sf a) {

            return __builtin_ia32_rsqrtps(a);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf andps(__v4sf a, __v4sf b) {

            return __builtin_ia32_andps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf rsqrtss(__v4sf a) {

            return __builtin_ia32_rsqrtss(a);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf mulss(__v4sf a, __v4sf b) {

            return __builtin_ia32_mulss(a, b);
        }

        template <std::uint8_t M> __attribute__((__always_inline__,
__nothrow__, __const__)) inline __v4sf shufps(__v4sf a, __v4sf b) {

            return __builtin_ia32_shufps(a, b, M);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf andnps(__v4sf a, __v4sf b) {

            return __builtin_ia32_andnps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf orps(__v4sf a, __v4sf b) {

            return __builtin_ia32_orps(a, b);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf rcpps(__v4sf a) {

            return __builtin_ia32_rcpps(a);
        }

        __attribute__((__always_inline__, __nothrow__, __const__)) inline
__v4sf rcpss(__v4sf a) {

            return __builtin_ia32_rcpss(a);
        }

        template <std::uint8_t N> __attribute__((__always_inline__,
__nothrow__, __const__)) inline __v4si replicate(__v4si v) {

            return pshufd<(N | (N << 2) | (N << 4) | (N << 6))>(v);
        }

        template <std::uint8_t N> __attribute__((__always_inline__,
__nothrow__, __const__)) inline __v4sf replicate(__v4sf v) {

            return (__v4sf) replicate<N>((__v4si) v);
        }
    }

    static __attribute__((__always_inline__)) __v4sf my_asin(__v4sf x) {

        static const __v4si g_Mask{ 0x7fffffff, 0x00000000, 0x7fffffff,
0x7fffffff };

        __v4sf t;
        __v4sf u;
        __v4sf v;
        __v4sf r;

        u = simd::replicate<0>(x);
        u = simd::andps(u, (__v4sf) g_Mask);
        t = simd::mulps(u, __v4sf{ -1.0f, 0.0f, -0.1535779990f, 0.0f });
        t = simd::addps(t, __v4sf{ 0.0f, 0.0f, 0.2836182315f, 0.0f });
        t = simd::mulps(t, u);
        t = simd::addps(t, __v4sf{ 1.0f, 0.0f, -0.9315200116f, -2.144008022f
});
        r = simd::rsqrtss(t);
        u = simd::shufps<0b11100100>(r, u);
        t = simd::mulps(t, u);
        t = simd::addps(t, __v4sf{ 0.0f, 0.0f, -0.4089766186f, 1.103007131f });
        t = simd::mulps(t, u);
        u = simd::mulss(u, __v4sf{ -0.5f, 0.0f, 0.0f, 0.0f });
        t = simd::addps(t, __v4sf{ -3.0f, 1.0f, 1.507171600f, 1.507095111f });
        t = simd::mulss(t, u);
        r = simd::rcpps(t);
        v = simd::mulps(t, r);
        v = simd::addps(v, g_VecMinusTwo);
        v = simd::mulps(v, r);
        t = simd::replicate<3>(t);
        t = simd::mulps(t, v);
        t = simd::replicate<2>(t);
        t = simd::subps(t, v);
        t = simd::andps((__v4sf) g_Mask, t);
        u = simd::andnps((__v4sf) g_Mask, x);
        t = simd::orps(t, u);

        return t;
    }

------------------8<-----------------------

$ g++ -std=gnu++0x -Wno-pmf-conversions -fno-deduce-init-list -g -Wall -Werror
-Wno-unused -fno-lto -msse -msse2 -mfpmath=sse -march=native -mtune=native -fom
it-frame-pointer -ggdb -shared-libgcc report.cpp
repo.cpp: In function 'float __vector[4] my_asin(float __vector[4])':
repo.cpp:95:43: internal compiler error: Segmentation fault
Please submit a full bug report,
with preprocessed source if appropriate.
See <http://gcc.gnu.org/bugs.html> for instructions.


-- 
           Summary: ICE during compiling SSE code
           Product: gcc
           Version: 4.5.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
        AssignedTo: unassigned at gcc dot gnu dot org
        ReportedBy: piotr dot wyderski at gmail dot com
 GCC build triplet: i686-pc-cygwin
  GCC host triplet: i686-pc-cygwin
GCC target triplet: i686-pc-cygwin


http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43375

Reply via email to