gcc/ChangeLog: PR target/117015 * config/s390/s390-protos.h (s390_expand_int_spaceship): New function. (s390_expand_fp_spaceship): New function. * config/s390/s390.cc (s390_expand_int_spaceship): New function. (s390_expand_fp_spaceship): New function. * config/s390/s390.md (spaceship<mode>4): New expander. * config/s390/vector.md (*vec_cmpv2di_lane0_<cc_tolower>): New insn. (*vec_cmpti_<cc_tolower>): New insn.
gcc/testsuite/ChangeLog: * gcc.target/s390/spaceship-fp-1.c: New test. * gcc.target/s390/spaceship-fp-2.c: New test. * gcc.target/s390/spaceship-fp-3.c: New test. * gcc.target/s390/spaceship-fp-4.c: New test. * gcc.target/s390/spaceship-int-1.c: New test. * gcc.target/s390/spaceship-int-2.c: New test. * gcc.target/s390/spaceship-int-3.c: New test. --- gcc/config/s390/s390-protos.h | 2 + gcc/config/s390/s390.cc | 161 ++++++++++++++++++ gcc/config/s390/s390.md | 21 +++ .../gcc.target/s390/spaceship-fp-1.c | 23 +++ .../gcc.target/s390/spaceship-fp-2.c | 23 +++ .../gcc.target/s390/spaceship-fp-3.c | 23 +++ .../gcc.target/s390/spaceship-fp-4.c | 53 ++++++ .../gcc.target/s390/spaceship-int-1.c | 30 ++++ .../gcc.target/s390/spaceship-int-2.c | 24 +++ .../gcc.target/s390/spaceship-int-3.c | 21 +++ 10 files changed, 381 insertions(+) create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-fp-1.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-fp-2.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-fp-3.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-fp-4.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-int-1.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-int-2.c create mode 100644 gcc/testsuite/gcc.target/s390/spaceship-int-3.c diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h index d760a7e20ff..6becad10def 100644 --- a/gcc/config/s390/s390-protos.h +++ b/gcc/config/s390/s390-protos.h @@ -128,6 +128,8 @@ extern void s390_expand_vcond (rtx, rtx, rtx, enum rtx_code, rtx, rtx); extern void s390_expand_vec_init (rtx, rtx); extern rtx s390_expand_merge_perm_const (machine_mode, bool); extern void s390_expand_merge (rtx, rtx, rtx, bool); +extern void s390_expand_int_spaceship (rtx, rtx, rtx, rtx); +extern void s390_expand_fp_spaceship (rtx, rtx, rtx, rtx); extern rtx s390_build_signbit_mask (machine_mode); extern rtx s390_return_addr_rtx (int, rtx); extern rtx s390_back_chain_rtx (void); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 737b176766a..2873cc1ddbb 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -8199,6 +8199,167 @@ s390_expand_atomic (machine_mode mode, enum rtx_code code, NULL_RTX, 1, OPTAB_DIRECT), 1); } +/* Expand integer op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : 1. + + Signedness is specified by op3. If op3 equals 1, then perform an unsigned + comparison, and if op3 equals -1, then perform a signed comparison. + + For integer comparisons we strive for a sequence like + CR[L] ; LHI ; LOCHIL ; LOCHIH + where the first three instructions fit into a group. */ + +void +s390_expand_int_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 == const1_rtx || op3 == constm1_rtx); + + rtx cc, cond_lt, cond_gt; + machine_mode cc_mode; + machine_mode mode = GET_MODE (op1); + + /* Prior VXE3 emulate a 128-bit comparison by breaking it up into three + comparisons. First test the high halfs. In case they equal, then test + the low halfs. Finally, test for equality. Depending on the results + make use of LOCs. */ + if (mode == TImode && !TARGET_VXE3) + { + gcc_assert (TARGET_VX); + op1 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0)); + op2 + = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0)); + rtx lab = gen_label_rtx (); + rtx ccz = gen_rtx_REG (CCZmode, CC_REGNUM); + /* Compare high halfs for equality. + VEC[L]G op1, op2 sets + CC1 if high(op1) < high(op2) + and + CC2 if high(op1) > high(op2). */ + machine_mode cc_mode = op3 == const1_rtx ? CCUmode : CCSmode; + rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); + emit_insn (gen_rtx_SET ( + gen_rtx_REG (cc_mode, CC_REGNUM), + gen_rtx_COMPARE (cc_mode, + gen_rtx_VEC_SELECT (DImode, op1, lane0), + gen_rtx_VEC_SELECT (DImode, op2, lane0)))); + s390_emit_jump (lab, gen_rtx_NE (CCZmode, ccz, const0_rtx)); + /* At this point we know that the high halfs equal. + VCHLGS op2, op1 sets CC1 if low(op1) < low(op2) */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM), + gen_rtx_COMPARE (CCVIHUmode, op2, op1)), + gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode))))); + emit_label (lab); + emit_insn (gen_rtx_SET (op0, const1_rtx)); + emit_insn ( + gen_movsicc (op0, + gen_rtx_LTU (CCUmode, gen_rtx_REG (CCUmode, CC_REGNUM), + const0_rtx), + constm1_rtx, op0)); + /* Deal with the case where both halfs equal. */ + emit_insn (gen_rtx_PARALLEL ( + VOIDmode, + gen_rtvec (2, + gen_rtx_SET (gen_rtx_REG (CCVEQmode, CC_REGNUM), + gen_rtx_COMPARE (CCVEQmode, op1, op2)), + gen_rtx_SET (gen_reg_rtx (V2DImode), + gen_rtx_EQ (V2DImode, op1, op2))))); + emit_insn (gen_movsicc (op0, gen_rtx_EQ (CCZmode, ccz, const0_rtx), + const0_rtx, op0)); + return; + } + + if (mode == QImode || mode == HImode) + { + rtx_code extend = op3 == const1_rtx ? ZERO_EXTEND : SIGN_EXTEND; + op1 = simplify_gen_unary (extend, SImode, op1, mode); + op1 = force_reg (SImode, op1); + op2 = simplify_gen_unary (extend, SImode, op2, mode); + op2 = force_reg (SImode, op2); + mode = SImode; + } + + if (op3 == const1_rtx) + { + cc_mode = CCUmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LTU (mode, cc, const0_rtx); + cond_gt = gen_rtx_GTU (mode, cc, const0_rtx); + } + else + { + cc_mode = CCSmode; + cc = gen_rtx_REG (cc_mode, CC_REGNUM); + cond_lt = gen_rtx_LT (mode, cc, const0_rtx); + cond_gt = gen_rtx_GT (mode, cc, const0_rtx); + } + + emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op1, op2))); + emit_move_insn (op0, const0_rtx); + emit_insn (gen_movsicc (op0, cond_lt, constm1_rtx, op0)); + emit_insn (gen_movsicc (op0, cond_gt, const1_rtx, op0)); +} + +/* Expand floating-point op0 = op1 <=> op2, i.e., + op0 = op1 == op2 ? 0 : op1 < op2 ? -1 : op1 > op2 ? 1 : 2. + + If op3 equals const0_rtx, then we are interested in the compare only (see + test spaceship-fp-4.c). Otherwise, op3 is a CONST_INT different than + const1_rtx and constm1_rtx which is used in order to set op0 for unordered. + + Emit a branch-only solution, i.e., let if-convert fold the branches into + LOCs if applicable. This has the benefit that the solution is also + applicable if we are only interested in the compare, i.e., if op3 equals + const0_rtx. + */ + +void +s390_expand_fp_spaceship (rtx op0, rtx op1, rtx op2, rtx op3) +{ + gcc_assert (op3 != const1_rtx && op3 != constm1_rtx); + + machine_mode mode = GET_MODE (op1); + machine_mode cc_mode = s390_select_ccmode (LTGT, op1, op2); + rtx cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + rtx cond_unordered = gen_rtx_UNORDERED (mode, cc_reg, const0_rtx); + rtx cond_eq = gen_rtx_EQ (mode, cc_reg, const0_rtx); + rtx cond_gt = gen_rtx_GT (mode, cc_reg, const0_rtx); + rtx_insn *insn; + rtx l_unordered = gen_label_rtx (); + rtx l_eq = gen_label_rtx (); + rtx l_gt = gen_label_rtx (); + rtx l_end = gen_label_rtx (); + + s390_emit_compare (VOIDmode, LTGT, op1, op2); + if (!flag_finite_math_only) + { + insn = s390_emit_jump (l_unordered, cond_unordered); + add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); + } + insn = s390_emit_jump (l_eq, cond_eq); + add_reg_br_prob_note (insn, profile_probability::unlikely ()); + insn = s390_emit_jump (l_gt, cond_gt); + add_reg_br_prob_note (insn, profile_probability::even ()); + emit_move_insn (op0, constm1_rtx); + emit_jump (l_end); + emit_label (l_eq); + emit_move_insn (op0, const0_rtx); + emit_jump (l_end); + emit_label (l_gt); + emit_move_insn (op0, const1_rtx); + if (!flag_finite_math_only) + { + emit_jump (l_end); + emit_label (l_unordered); + rtx unord_val = op3 == const0_rtx ? const2_rtx : op3; + emit_move_insn (op0, unord_val); + } + emit_label (l_end); +} + /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL. We need to emit DTP-relative relocations. */ diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 02bc149b0fb..63261a9422e 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -1526,6 +1526,27 @@ operands[0] = SET_DEST (PATTERN (curr_insn)); }) +; Restrict spaceship optab to z13 or later since there we have +; LOAD HALFWORD IMMEDIATE ON CONDITION. + +(define_mode_iterator SPACESHIP_INT [(TI "TARGET_VX") DI SI HI QI]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_INT 1 "register_operand") + (match_operand:SPACESHIP_INT 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT" + "s390_expand_int_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + +(define_mode_iterator SPACESHIP_BFP [TF DF SF]) +(define_expand "spaceship<mode>4" + [(match_operand:SI 0 "register_operand") + (match_operand:SPACESHIP_BFP 1 "register_operand") + (match_operand:SPACESHIP_BFP 2 "register_operand") + (match_operand:SI 3 "const_int_operand")] + "TARGET_Z13 && TARGET_64BIT && TARGET_HARD_FLOAT" + "s390_expand_fp_spaceship (operands[0], operands[1], operands[2], operands[3]); DONE;") + ; (TF|DF|SF|TD|DD|SD) instructions diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c new file mode 100644 index 00000000000..56c3d77de3c --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-1.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c new file mode 100644 index 00000000000..0c6e6b6aeb4 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-2.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -ffinite-math-only -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 2\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tc[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tk[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 2; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c new file mode 100644 index 00000000000..2f567d16e19 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-3.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 42\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else if (x > y) \ + return 1; \ + else \ + return 42; \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c new file mode 100644 index 00000000000..4531ecb6cd3 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-fp-4.c @@ -0,0 +1,53 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 0\)} 3 optimized } } */ +/* { dg-final { scan-assembler-times {\tk[edx]br\t} 3 } } */ +/* { dg-final { scan-assembler-not {\tloc} } } */ +/* { dg-final { scan-assembler-not {\tbrc} } } */ +/* { dg-final { scan-assembler-not {\tc[edx]br\t} } } */ + +/* By time of writing this we emit + + kebr %f0,%f2 + jo .L2 + je .L3 + jnh .L10 + jg f3@PLT +.L10: + jg f2@PLT +.L3: + jg f1@PLT +.L2: + jg f4@PLT + + which is not optimal. Instead we could fold the conditional branch with the + unconditional into something along the lines + + kebr %f0,%f2 + jo f4@PLT + je f1@PLT + jnh f2@PLT + jg f3@PLT +*/ + +void f1 (void); +void f2 (void); +void f3 (void); +void f4 (void); + +#define TEST(T, U) \ + void test_##U (T x, T y) \ + { \ + if (x == y) \ + f1 (); \ + else if (x < y) \ + f2 (); \ + else if (x > y) \ + f3 (); \ + else \ + f4 (); \ + } + +TEST (float, float) +TEST (double, double) +TEST (long double, longdouble) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-1.c b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c new file mode 100644 index 00000000000..8ca2677527a --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-1.c @@ -0,0 +1,30 @@ +/* { dg-do compile { target lp64 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 4 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 5 optimized } } */ +/* { dg-final { scan-assembler-times {\tlhi} 9 } } */ +/* { dg-final { scan-assembler-times {\tloc} 18 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(signed char, schar) +TEST(unsigned char, uchar) +TEST(char, char) + +TEST(short, sshort) +TEST(unsigned short, ushort) + +TEST(int, sint) +TEST(unsigned int, uint) + +TEST(long, slong) +TEST(unsigned long, ulong) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-2.c b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c new file mode 100644 index 00000000000..5f7975c1ab4 --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-2.c @@ -0,0 +1,24 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -mzarch -march=z13 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecg} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclg} 1 } } */ +/* { dg-final { scan-assembler-times {\tvchlgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tvceqgs} 2 } } */ +/* { dg-final { scan-assembler-times {\tlhi} 2 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) diff --git a/gcc/testsuite/gcc.target/s390/spaceship-int-3.c b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c new file mode 100644 index 00000000000..46b0e4a0bfe --- /dev/null +++ b/gcc/testsuite/gcc.target/s390/spaceship-int-3.c @@ -0,0 +1,21 @@ +/* { dg-do compile { target int128 } } */ +/* { dg-options "-O2 -march=z17 -fdump-tree-optimized" } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, -1\)} 1 optimized } } */ +/* { dg-final { scan-tree-dump-times {\.SPACESHIP \([^,]+, [^,]+, 1\)} 1 optimized } } */ +/* { dg-final { scan-assembler-times {\tvecq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tveclq\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tloc} 4 } } */ + +#define TEST(T, U) \ + int test_##U (T x, T y) \ + { \ + if (x == y) \ + return 0; \ + else if (x < y) \ + return -1; \ + else \ + return 1; \ + } + +TEST(__int128, sint128) +TEST(unsigned __int128, uint128) -- 2.49.0