This patch improves code generation for the PRU backend. Committed to trunk.
Manually expanding into 32-bit comparisons is much more efficient than the default expansion into word-size comparisons. Note that word for PRU is 8-bit. PR target/106562 gcc/ChangeLog: * config/pru/pru-protos.h (pru_noteq_condition): New function declaration. * config/pru/pru.cc (pru_noteq_condition): New function. * config/pru/pru.md (cbranchdi4): Define new pattern. gcc/testsuite/ChangeLog: * gcc.target/pru/pr106562-1.c: New test. * gcc.target/pru/pr106562-2.c: New test. * gcc.target/pru/pr106562-3.c: New test. * gcc.target/pru/pr106562-4.c: New test. Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu> --- gcc/config/pru/pru-protos.h | 1 + gcc/config/pru/pru.cc | 21 +++ gcc/config/pru/pru.md | 180 ++++++++++++++++++++++ gcc/testsuite/gcc.target/pru/pr106562-1.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-2.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-3.c | 9 ++ gcc/testsuite/gcc.target/pru/pr106562-4.c | 159 +++++++++++++++++++ 7 files changed, 388 insertions(+) create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-1.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-2.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-3.c create mode 100644 gcc/testsuite/gcc.target/pru/pr106562-4.c diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h index 4b190c98206..517fa02e272 100644 --- a/gcc/config/pru/pru-protos.h +++ b/gcc/config/pru/pru-protos.h @@ -52,6 +52,7 @@ extern const char *pru_output_signed_cbranch (rtx *, bool); extern const char *pru_output_signed_cbranch_ubyteop2 (rtx *, bool); extern const char *pru_output_signed_cbranch_zeroop2 (rtx *, bool); +extern enum rtx_code pru_noteq_condition (enum rtx_code code); extern rtx pru_expand_fp_compare (rtx comparison, machine_mode mode); extern void pru_emit_doloop (rtx *, int); diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 04eca90b255..0029dcbc6aa 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -895,6 +895,27 @@ pru_init_libfuncs (void) set_optab_libfunc (udivmod_optab, DImode, "__pruabi_divremull"); } +/* Given a comparison CODE, return a similar comparison but without + the "equals" condition. In other words, it strips GE/GEU/LE/LEU + and instead returns GT/GTU/LT/LTU. */ + +enum rtx_code +pru_noteq_condition (enum rtx_code code) +{ + switch (code) + { + case GT: return GT; + case GTU: return GTU; + case GE: return GT; + case GEU: return GTU; + case LT: return LT; + case LTU: return LTU; + case LE: return LT; + case LEU: return LTU; + default: + gcc_unreachable (); + } +} /* Emit comparison instruction if necessary, returning the expression that holds the compare result in the proper mode. Return the comparison diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 53ffff07708..bdc5ad79ba0 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -1309,6 +1309,186 @@ (define_expand "cbranch<mode>4" operands[2] = XEXP (t, 1); }) +;; Expand the cbranchdi pattern in order to avoid the default +;; expansion into word_mode operations, which is not efficient for PRU. +;; In pseudocode this expansion outputs: +;; +;; /* EQ */ +;; if (OP1_hi {reverse_condition (cmp)} OP2_hi) +;; goto fallthrough +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; fallthrough: +;; +;; /* NE */ +;; if (OP1_hi {cmp} OP2_hi) +;; goto label3 +;; if (OP1_lo {cmp} OP2_lo) +;; goto label3 +;; +;; The LT comparisons with zero take one machine instruction to simply +;; check the sign bit. The GT comparisons with zero take two - one +;; to check the sign bit, and one to check for zero. Hence arrange +;; the expand such that only LT comparison is used for OP1_HI, because +;; OP2_HI is const0_rtx. +;; +;; The LTU comparisons with zero will be removed by subsequent passes. +;; +;; /* LT/LTU/LE/LEU */ +;; if (OP1_hi {noteq_condition (cmp)} OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_hi != OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +;; /* GT/GTU/GE/GEU */ +;; if (OP1_hi {reverse_condition (noteq_condition (cmp))} OP2_hi) +;; goto fallthrough /* DI comparison obviously not true. */ +;; if (OP1_hi != OP2_hi) +;; goto label3 /* DI comparison obviously true. */ +;; if (OP1_lo {unsigned_condition (cmp)} OP2_lo) +;; goto label3 /* Comparison was deferred to lo parts. */ +;; fallthrough: + +(define_expand "cbranchdi4" + [(set (pc) + (if_then_else + (match_operator 0 "ordered_comparison_operator" + [(match_operand:DI 1 "register_operand") + (match_operand:DI 2 "reg_or_ubyte_operand")]) + (label_ref (match_operand 3 "")) + (pc)))] + "" +{ + const enum rtx_code code = GET_CODE (operands[0]); + rtx label3 = operands[3]; + rtx op1_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx op1_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + rtx op2_lo = simplify_gen_subreg (SImode, operands[2], DImode, 0); + rtx op2_hi = simplify_gen_subreg (SImode, operands[2], DImode, 4); + rtx j; + + if (code == EQ) + { + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + rtx label3_ref = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (EQ, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + if (code == NE) + { + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (NE, VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + DONE; + } + + if (code == LT || code == LTU || code == LE || code == LEU) + { + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Check for "DI comparison obviously not true". */ + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + + if (code == GT || code == GTU || code == GE || code == GEU) + { + /* Check for "DI comparison obviously not true". */ + const enum rtx_code reversed_code = reverse_condition (code); + rtx label_fallthrough = gen_label_rtx (); + rtx label_fallthrough_ref = gen_rtx_LABEL_REF (Pmode, label_fallthrough); + rtx cond_hi = gen_rtx_fmt_ee (pru_noteq_condition (reversed_code), + VOIDmode, op1_hi, op2_hi); + rtx check_hi = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hi, + label_fallthrough_ref, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hi)); + JUMP_LABEL (j) = label_fallthrough; + LABEL_NUSES (label_fallthrough)++; + + /* Check for "DI comparison obviously true". */ + rtx label3_ref1 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_hine = gen_rtx_fmt_ee (NE, VOIDmode, op1_hi, op2_hi); + rtx check_hine = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_hine, + label3_ref1, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_hine)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + /* Comparison deferred to the lo parts. */ + rtx label3_ref2 = gen_rtx_LABEL_REF (Pmode, label3); + rtx cond_lo = gen_rtx_fmt_ee (unsigned_condition (code), + VOIDmode, op1_lo, op2_lo); + rtx check_lo = gen_rtx_IF_THEN_ELSE (VOIDmode, cond_lo, + label3_ref2, pc_rtx); + j = emit_jump_insn (gen_rtx_SET (pc_rtx, check_lo)); + JUMP_LABEL (j) = label3; + LABEL_NUSES (label3)++; + + emit_label (label_fallthrough); + DONE; + } + gcc_unreachable (); +}) + ; ; Bit test branch diff --git a/gcc/testsuite/gcc.target/pru/pr106562-1.c b/gcc/testsuite/gcc.target/pru/pr106562-1.c new file mode 100644 index 00000000000..5bfbe52bb01 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-1.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 40 } } */ + + +char test(unsigned long long a, unsigned long long b) +{ + return a && b; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-2.c b/gcc/testsuite/gcc.target/pru/pr106562-2.c new file mode 100644 index 00000000000..ec5f82ad89f --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-2.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 32 } } */ + + +char test(long long a) +{ + return a > 10; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-3.c b/gcc/testsuite/gcc.target/pru/pr106562-3.c new file mode 100644 index 00000000000..d0980581f1d --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-3.c @@ -0,0 +1,9 @@ +/* { dg-do assemble } */ +/* { dg-options "-Os" } */ +/* { dg-final { object-size text <= 32 } } */ + + +char test(long long a) +{ + return a < 10; +} diff --git a/gcc/testsuite/gcc.target/pru/pr106562-4.c b/gcc/testsuite/gcc.target/pru/pr106562-4.c new file mode 100644 index 00000000000..b29e426b012 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/pr106562-4.c @@ -0,0 +1,159 @@ +/* Functional test for DI comparisons. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +/* The default test suite options use "-ansi", which + generates spurious errors by enabling "-Wlong-long". + Thus override the options and drop "-ansi", in order + to freely use 64-bit (long long) types for PRU. */ + +#include <stddef.h> +#include <stdint.h> + +extern void abort (void); + +char __attribute__((noinline)) test_lt (int64_t a, int64_t b) +{ + return a < b; +} + +char __attribute__((noinline)) test_ltu (uint64_t a, uint64_t b) +{ + return a < b; +} + +char __attribute__((noinline)) test_le (int64_t a, int64_t b) +{ + return a <= b; +} + +char __attribute__((noinline)) test_leu (uint64_t a, uint64_t b) +{ + return a <= b; +} + +char __attribute__((noinline)) test_gt (int64_t a, int64_t b) +{ + return a > b; +} + +char __attribute__((noinline)) test_gtu (uint64_t a, uint64_t b) +{ + return a > b; +} + +char __attribute__((noinline)) test_ge (int64_t a, int64_t b) +{ + return a >= b; +} + +char __attribute__((noinline)) test_geu (uint64_t a, uint64_t b) +{ + return a >= b; +} + +char __attribute__((noinline)) test_eq (uint64_t a, uint64_t b) +{ + return a == b; +} + +char __attribute__((noinline)) test_ne (uint64_t a, uint64_t b) +{ + return a != b; +} + +struct test_case { + uint64_t a; + uint64_t b; + char lt; + char ltu; + char le; + char leu; + char gt; + char gtu; + char ge; + char geu; + char eq; + char ne; +}; + +const struct test_case cases[] = { + /* LT,LTU,LE,LEU,GT,GTU,GE,GEU,EQ,NE */ + { 0x1234567800112233ULL, + 0x1234567800112233ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + { 0x0000000000000000ULL, + 0x0000000000000000ULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + { 0xffffffffffffffffULL, + 0xffffffffffffffffULL, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0 }, + + { 0xffffffffffffffefULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x8000000000000000ULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x80000000ffffffffULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0x80000000ffffffffULL, + 0xffffffff00000000ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + { 0xffefffffffffffffULL, + 0xffffffffffffffffULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, + + { 0x0000000000000000ULL, + 0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x0000000000000001ULL, + 0xffffffffffffffffULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x0000000000000001ULL, + 0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + { 0x7fffffffffffffffULL, + 0x8000000000000000ULL, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1 }, + + /* Ensure lo uses unsigned comparison if hi parts are same. */ + { 0x12345678ffffffffULL, + 0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0xf23456780fffffffULL, + 0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0xf2345678ffffffffULL, + 0xf234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0x1234567800000002ULL, + 0x1234567800000001ULL, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1 }, + { 0x1234567800000002ULL, + 0x1234567800000003ULL, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1 }, +}; + +int +main (void) +{ + size_t i; + + for (i = 0; i < (sizeof (cases)/sizeof (cases[0])); i++) + { + const int64_t sa = (int64_t)cases[i].a; + const int64_t sb = (int64_t)cases[i].b; + const uint64_t ua = cases[i].a; + const uint64_t ub = cases[i].b; + + if (cases[i].lt != test_lt (sa, sb)) + abort (); + if (cases[i].ltu != test_ltu (ua, ub)) + abort (); + if (cases[i].le != test_le (sa, sb)) + abort (); + if (cases[i].leu != test_leu (ua, ub)) + abort (); + if (cases[i].gt != test_gt (sa, sb)) + abort (); + if (cases[i].gtu != test_gtu (ua, ub)) + abort (); + if (cases[i].ge != test_ge (sa, sb)) + abort (); + if (cases[i].geu != test_geu (ua, ub)) + abort (); + if (cases[i].eq != test_eq (ua, ub)) + abort (); + if (cases[i].ne != test_ne (ua, ub)) + abort (); + } + + return 0; +} + -- 2.37.3