This patch improves code generation for the PRU backend. Committed to trunk.
If the number of shift positions is a constant, then the DI shift operation is expanded to a sequence of 2 to 4 machine instructions. That is more efficient than the default action to call libgcc. gcc/ChangeLog: * config/pru/pru.md (lshrdi3): New expand pattern. (ashldi3): Ditto. gcc/testsuite/ChangeLog: * gcc.target/pru/ashiftdi-1.c: New test. * gcc.target/pru/lshiftrtdi-1.c: New test. Signed-off-by: Dimitar Dimitrov <dimi...@dinux.eu> --- gcc/config/pru/pru.md | 196 ++++++++++++++++++++ gcc/testsuite/gcc.target/pru/ashiftdi-1.c | 53 ++++++ gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c | 53 ++++++ 3 files changed, 302 insertions(+) create mode 100644 gcc/testsuite/gcc.target/pru/ashiftdi-1.c create mode 100644 gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 144cd35d809..53ffff07708 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -703,6 +703,202 @@ (define_insn "ashr<mode>3_single" [(set_attr "type" "alu") (set_attr "length" "12")]) + +; 64-bit LSHIFTRT with a constant shift count can be expanded into +; more efficient code sequence than a variable register shift. +; +; 1. For shift >= 32: +; dst_lo = (src_hi >> (shift - 32)) +; dst_hi = 0 +; +; 2. For shift==1 there is no need for a temporary: +; dst_lo = (src_lo >> 1) +; if (src_hi & 1) +; dst_lo |= (1 << 31) +; dst_hi = (src_hi >> 1) +; +; 3. For shift < 32: +; dst_lo = (src_lo >> shift) +; tmp = (src_hi << (32 - shift) +; dst_lo |= tmp +; dst_hi = (src_hi >> shift) +; +; 4. For shift in a register: +; Fall back to calling libgcc. +(define_expand "lshrdi3" + [(set (match_operand:DI 0 "register_operand") + (lshiftrt:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_hi, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_LO and SRC_HI + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_HI and SRC_LO is safe because by the time DST_HI + is set, SRC_LO is no longer live. */ + if (reg_overlap_mentioned_p (dst_lo, src_hi)) + { + rtx new_src_hi = gen_reg_rtx (SImode); + + emit_move_insn (new_src_hi, src_hi); + src_hi = new_src_hi; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, src_lo, const1_rtx))); + + /* The code generated by `genemit' would create a LABEL_REF. */ + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_hi, + GEN_INT (0), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_lo, dst_lo, GEN_INT (1 << 31))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, src_hi, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_lo, dst_lo, tmpval)); + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_LSHIFTRT (SImode, + src_hi, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) + +; 64-bit ASHIFT with a constant shift count can be expanded into +; more efficient code sequence than the libgcc call required by +; a variable shift in a register. + +(define_expand "ashldi3" + [(set (match_operand:DI 0 "register_operand") + (ashift:DI + (match_operand:DI 1 "register_operand") + (match_operand:QI 2 "const_int_operand")))] + "" +{ + gcc_assert (CONST_INT_P (operands[2])); + + const int nshifts = INTVAL (operands[2]); + rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0); + rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4); + rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0); + rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4); + + if (nshifts >= 32) + { + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts - 32)))); + emit_insn (gen_rtx_SET (dst_lo, const0_rtx)); + DONE; + } + + gcc_assert (can_create_pseudo_p ()); + + /* The expansions which follow are safe only if DST_HI and SRC_LO + do not overlap. If they do, then fix by using a temporary register. + Overlapping of DST_LO and SRC_HI is safe because by the time DST_LO + is set, SRC_HI is no longer live. */ + if (reg_overlap_mentioned_p (dst_hi, src_lo)) + { + rtx new_src_lo = gen_reg_rtx (SImode); + + emit_move_insn (new_src_lo, src_lo); + src_lo = new_src_lo; + } + + if (nshifts == 1) + { + rtx_code_label *skip_hiset_label; + rtx j; + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, src_hi, const1_rtx))); + + skip_hiset_label = gen_label_rtx (); + j = emit_jump_insn (gen_cbranch_qbbx_const (EQ, + SImode, + src_lo, + GEN_INT (31), + skip_hiset_label)); + JUMP_LABEL (j) = skip_hiset_label; + LABEL_NUSES (skip_hiset_label)++; + + emit_insn (gen_iorsi3 (dst_hi, dst_hi, GEN_INT (1 << 0))); + emit_label (skip_hiset_label); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, src_lo, const1_rtx))); + DONE; + } + + if (nshifts < 32) + { + rtx tmpval = gen_reg_rtx (SImode); + + emit_insn (gen_rtx_SET (dst_hi, + gen_rtx_ASHIFT (SImode, + src_hi, + GEN_INT (nshifts)))); + emit_insn (gen_rtx_SET (tmpval, + gen_rtx_LSHIFTRT (SImode, + src_lo, + GEN_INT (32 - nshifts)))); + emit_insn (gen_iorsi3 (dst_hi, dst_hi, tmpval)); + emit_insn (gen_rtx_SET (dst_lo, + gen_rtx_ASHIFT (SImode, + src_lo, + GEN_INT (nshifts)))); + DONE; + } + gcc_unreachable (); +}) ;; Include ALU patterns with zero-extension of operands. That's where ;; the real insns are defined. diff --git a/gcc/testsuite/gcc.target/pru/ashiftdi-1.c b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c new file mode 100644 index 00000000000..516e5a86102 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/ashiftdi-1.c @@ -0,0 +1,53 @@ +/* Functional test for DI left shift. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +#include <stddef.h> +#include <stdint.h> + +extern void abort (void); + +uint64_t __attribute__((noinline)) ashift_1 (uint64_t a) +{ + return a << 1; +} + +uint64_t __attribute__((noinline)) ashift_10 (uint64_t a) +{ + return a << 10; +} + +uint64_t __attribute__((noinline)) ashift_32 (uint64_t a) +{ + return a << 32; +} + +uint64_t __attribute__((noinline)) ashift_36 (uint64_t a) +{ + return a << 36; +} + +int +main (int argc, char** argv) +{ + if (ashift_1 (0xaaaa5555aaaa5555ull) != 0x5554aaab5554aaaaull) + abort(); + if (ashift_10 (0xaaaa5555aaaa5555ull) != 0xa95556aaa9555400ull) + abort(); + if (ashift_32 (0xaaaa5555aaaa5555ull) != 0xaaaa555500000000ull) + abort(); + if (ashift_36 (0xaaaa5555aaaa5555ull) != 0xaaa5555000000000ull) + abort(); + + if (ashift_1 (0x1234567822334455ull) != 0x2468acf0446688aaull) + abort(); + if (ashift_10 (0x1234567822334455ull) != 0xd159e088cd115400ull) + abort(); + if (ashift_32 (0x1234567822334455ull) != 0x2233445500000000ull) + abort(); + if (ashift_36 (0x1234567822334455ull) != 0x2334455000000000ull) + abort(); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c new file mode 100644 index 00000000000..7adae6ccc13 --- /dev/null +++ b/gcc/testsuite/gcc.target/pru/lshiftrtdi-1.c @@ -0,0 +1,53 @@ +/* Functional test for DI right shift. */ + +/* { dg-do run } */ +/* { dg-options "-pedantic-errors" } */ + +#include <stddef.h> +#include <stdint.h> + +extern void abort (void); + +uint64_t __attribute__((noinline)) lshift_1 (uint64_t a) +{ + return a >> 1; +} + +uint64_t __attribute__((noinline)) lshift_10 (uint64_t a) +{ + return a >> 10; +} + +uint64_t __attribute__((noinline)) lshift_32 (uint64_t a) +{ + return a >> 32; +} + +uint64_t __attribute__((noinline)) lshift_36 (uint64_t a) +{ + return a >> 36; +} + +int +main (int argc, char** argv) +{ + if (lshift_1 (0xaaaa5555aaaa5555ull) != 0x55552aaad5552aaaull) + abort(); + if (lshift_10 (0xaaaa5555aaaa5555ull) != 0x002aaa95556aaa95ull) + abort(); + if (lshift_32 (0xaaaa5555aaaa5555ull) != 0x00000000aaaa5555ull) + abort(); + if (lshift_36 (0xaaaa5555aaaa5555ull) != 0x000000000aaaa555ull) + abort(); + + if (lshift_1 (0x1234567822334455ull) != 0x091a2b3c1119a22aull) + abort(); + if (lshift_10 (0x1234567822334455ull) != 0x00048d159e088cd1ull) + abort(); + if (lshift_32 (0x1234567822334455ull) != 0x0000000012345678ull) + abort(); + if (lshift_36 (0x1234567822334455ull) != 0x0000000001234567ull) + abort(); + + return 0; +} -- 2.37.3