https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71559
Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |hjl.tools at gmail dot com, | |jsm28 at gcc dot gnu.org --- Comment #8 from Jakub Jelinek <jakub at gcc dot gnu.org> --- #define N 1024 float a[N], b[N]; int c[N]; void eq () { int i; for (i = 0; i < N; i++) c[i] = a[i] == b[i]; } void ne () { int i; for (i = 0; i < N; i++) c[i] = a[i] != b[i]; } void gt () { int i; for (i = 0; i < N; i++) c[i] = a[i] > b[i]; } void ge () { int i; for (i = 0; i < N; i++) c[i] = a[i] >= b[i]; } void lt () { int i; for (i = 0; i < N; i++) c[i] = a[i] < b[i]; } void le () { int i; for (i = 0; i < N; i++) c[i] = a[i] <= b[i]; } void unle () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_isgreater (a[i], b[i]); } void unlt () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_isgreaterequal (a[i], b[i]); } void unge () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_isless (a[i], b[i]); } void ungt () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_islessequal (a[i], b[i]); } void uneq () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_islessgreater (a[i], b[i]); } void ordered () { int i; for (i = 0; i < N; i++) c[i] = !__builtin_isunordered (a[i], b[i]); } void unordered () { int i; for (i = 0; i < N; i++) c[i] = __builtin_isunordered (a[i], b[i]); } shows the various codes in vcond. From C99 and other sources, all of the isgreater/isequal/isless/isequal/islessgreater return false if any argument is NaN and don't raise exceptions (except for sNaN). isunordered returns true only if any argument is NaN and doesn't raise exceptions either. The matching of the above to RTX codes has been confirmed by compiling the above testcase. Thus, IMNSHO the right values are: A > B A < B A = B UNORD SIGNAL IMM EQ F F T F N 0 NE T T F T N 4 GT T F F F Y 0xe GE T F T F Y 0xd LT F T F F Y 1 LE F T T F Y 2 UNLE F T T T N 0x1a UNLT F T F T N 0x19 UNGE T F T T N 0x15 UNGT T F F T N 0x16 UNEQ F F T T N 8 LTGT T T F F N 0xc ORDERED T T T F N 7 UNORDERED F F F T N 3 This is in sync with the 'D' stuff except for UN{LE,LT,GE,GT,EQ} where the AVX implementation uses the signalling instructions instead of non-signalling. Unless there is some bug in the generic code, I'd say if one gets UNLE for inverted isgreater, then in the above table one needs to replace all Ts for Fs and vice versa, but keep Y and N as is (because the fact whether the insn raises exception or not just depends on the arguments (and not even on their order), not on whether the result is inverted (nor arguments swapped). So I'd expect something like: --- gcc/config/i386/i386.c.jj 2016-06-16 21:00:08.000000000 +0200 +++ gcc/config/i386/i386.c 2016-06-17 19:35:52.237836780 +0200 @@ -17628,7 +17628,7 @@ ix86_print_operand (FILE *file, rtx x, i case UNEQ: if (TARGET_AVX) { - fputs ("eq_us", file); + fputs ("eq_uq", file); break; } case EQ: @@ -17637,7 +17637,7 @@ ix86_print_operand (FILE *file, rtx x, i case UNLT: if (TARGET_AVX) { - fputs ("nge", file); + fputs ("nge_uq", file); break; } case LT: @@ -17646,7 +17646,7 @@ ix86_print_operand (FILE *file, rtx x, i case UNLE: if (TARGET_AVX) { - fputs ("ngt", file); + fputs ("ngt_uq", file); break; } case LE: @@ -17671,7 +17671,10 @@ ix86_print_operand (FILE *file, rtx x, i break; } case UNGE: - fputs ("nlt", file); + if (TARGET_AVX) + fputs ("nlt_uq", file); + else + fputs ("nlt", file); break; case GT: if (TARGET_AVX) @@ -17680,7 +17683,10 @@ ix86_print_operand (FILE *file, rtx x, i break; } case UNGT: - fputs ("nle", file); + if (TARGET_AVX) + fputs ("nle_uq", file); + else + fputs ("nle", file); break; case ORDERED: fputs ("ord", file); @@ -23622,17 +23628,33 @@ ix86_fp_cmp_code_to_pcmp_immediate (enum switch (code) { case EQ: - return 0x08; + return 0x00; case NE: return 0x04; case GT: - return 0x16; + return 0x0e; case LE: - return 0x1a; + return 0x02; case GE: - return 0x15; + return 0x0d; case LT: + return 0x01; + case UNLE: + return 0x1a; + case UNLT: return 0x19; + case UNGE: + return 0x15; + case UNGT: + return 0x16; + case UNEQ: + return 0x08; + case LTGT: + return 0x0c; + case ORDERED: + return 0x07; + case UNORDERED: + return 0x03; default: gcc_unreachable (); } - the first 5 hunks tweak the TARGET_AVX stuff, the rest complete and fix up the AVX512F function. The patch is completely untested though (and wonder if we have testcases for not raising exceptions when isgreater etc. arguments are qNaNs. Also wonder about the SSE2 stuff. CCing also Joseph who knows the floating point stuff.