Prevent spurious FP exceptions with _mm_cvt{,t}ps_pi32 for TARGET_MMX_WITH_SSE by clearing the top 64 bytes of the input XMM register.
2021-01-05 Uroš Bizjak <ubiz...@gmail.com> gcc/ PR target/98522 * config/i386/sse.md (sse_cvtps2pi): Redefine as define_insn_and_split. Clear the top 64 bytes of the input XMM register. (sse_cvttps2pi): Ditto. gcc/testsuite PR target/98522 * gcc.target/i386/pr98522.c: New test. Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}. Pushed to mainline, will be beckported to gcc-10. Uros.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d84103807ff..c8e771fd697 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -5103,31 +5103,65 @@ (set_attr "type" "ssecvt") (set_attr "mode" "V4SF")]) -(define_insn "sse_cvtps2pi" +(define_insn_and_split "sse_cvtps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_select:V2SI - (unspec:V4SI [(match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")] + (unspec:V4SI [(match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")] UNSPEC_FIX_NOTRUNC) (parallel [(const_int 0) (const_int 1)])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE" "@ cvtps2pi\t{%1, %0|%0, %q1} - %vcvtps2dq\t{%1, %0|%0, %1}" + #" + "TARGET_SSE2 && reload_completed + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx op1 = lowpart_subreg (V2SFmode, operands[1], + GET_MODE (operands[1])); + rtx tmp = lowpart_subreg (V4SFmode, operands[0], + GET_MODE (operands[0])); + + op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode)); + emit_insn (gen_rtx_SET (tmp, op1)); + + rtx dest = lowpart_subreg (V4SImode, operands[0], + GET_MODE (operands[0])); + emit_insn (gen_sse2_fix_notruncv4sfv4si (dest, tmp)); + DONE; +} [(set_attr "isa" "*,sse2") (set_attr "mmx_isa" "native,*") (set_attr "type" "ssecvt") (set_attr "unit" "mmx,*") (set_attr "mode" "DI")]) -(define_insn "sse_cvttps2pi" +(define_insn_and_split "sse_cvttps2pi" [(set (match_operand:V2SI 0 "register_operand" "=y,Yv") (vec_select:V2SI - (fix:V4SI (match_operand:V4SF 1 "register_mmxmem_operand" "xm,YvBm")) + (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm,YvBm")) (parallel [(const_int 0) (const_int 1)])))] "(TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE" "@ cvttps2pi\t{%1, %0|%0, %q1} - %vcvttps2dq\t{%1, %0|%0, %1}" + #" + "TARGET_SSE2 && reload_completed + && SSE_REG_P (operands[0])" + [(const_int 0)] +{ + rtx op1 = lowpart_subreg (V2SFmode, operands[1], + GET_MODE (operands[1])); + rtx tmp = lowpart_subreg (V4SFmode, operands[0], + GET_MODE (operands[0])); + + op1 = gen_rtx_VEC_CONCAT (V4SFmode, op1, CONST0_RTX (V2SFmode)); + emit_insn (gen_rtx_SET (tmp, op1)); + + rtx dest = lowpart_subreg (V4SImode, operands[0], + GET_MODE (operands[0])); + emit_insn (gen_fix_truncv4sfv4si2 (dest, tmp)); + DONE; +} [(set_attr "isa" "*,sse2") (set_attr "mmx_isa" "native,*") (set_attr "type" "ssecvt") @@ -8026,7 +8060,7 @@ (define_insn "*vec_concatv4sf_0" [(set (match_operand:V4SF 0 "register_operand" "=v") (vec_concat:V4SF - (match_operand:V2SF 1 "nonimmediate_operand" "xm") + (match_operand:V2SF 1 "nonimmediate_operand" "vm") (match_operand:V2SF 2 "const0_operand" " C")))] "TARGET_SSE2" "%vmovq\t{%1, %0|%0, %1}" @@ -10457,7 +10491,7 @@ [(set (match_operand:VF2_512_256 0 "register_operand" "=v") (vec_merge:VF2_512_256 (vec_duplicate:VF2_512_256 - (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "xm")) + (match_operand:<ssescalarmode> 2 "nonimmediate_operand" "vm")) (match_operand:VF2_512_256 1 "const0_operand" "C") (const_int 1)))] "TARGET_AVX" diff --git a/gcc/testsuite/gcc.target/i386/pr98522.c b/gcc/testsuite/gcc.target/i386/pr98522.c new file mode 100644 index 00000000000..762f2eded50 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr98522.c @@ -0,0 +1,39 @@ +/* PR target/98522 */ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2" } */ +/* { dg-require-effective-target fenv_exceptions } */ + +#include <emmintrin.h> +#include <fenv.h> + +__m64 +__attribute__((noinline)) +test_cvt (__m128 a) +{ + return _mm_cvt_ps2pi (a); +} + +__m64 +__attribute__((noinline)) +test_cvtt (__m128 a) +{ + return _mm_cvtt_ps2pi (a); +} + +int +main () +{ + __m128 x = (__m128)(__m128i){0x0000000000000000LL, 0x7fffffffffffffffLL}; + volatile __m64 y; + + feclearexcept (FE_INVALID); + + y = test_cvt(x); + y = test_cvtt (x); + + if (fetestexcept (FE_INVALID)) + __builtin_abort (); + + return 0; +} +