This kind of transformation seems pretty generic and might be a candidate for adding to the middle-end, perhaps as part of combine.
I noticed these happened more often for LRA, which is the reason I went on this track of low-hanging-fruit-microoptimizations that are such an itch when noticing them, inspecting generated code for libgcc. Unfortunately, this one improves coremark only by a few cycles at the beginning or end (<0.0005%) for cris-elf -march=v10. The size of the coremark code is down by 0.4% (0.22% pre-lra). Using an iterator from the start because other binary operations will be added and their define_peephole2's would look exactly the same for the .md part. Some existing and-peephole2-related tests suffered, because many of them were using patterns with only contiguous 1:s in them: adjusted. Also, spotted and fixed, by adding a space, some scan-assembler-strings that were prone to spurious identifier or file name matches. gcc: * config/cris/cris.cc (cris_split_constant): New function. * config/cris/cris.md (splitop): New iterator. (opsplit1): New define_peephole2. * config/cris/cris-protos.h (cris_split_constant): Declare. (cris_splittable_constant_p): New macro. gcc/testsuite: * gcc.target/cris/peep2-andsplit1.c: New test. * gcc.target/cris/peep2-andu1.c, gcc.target/cris/peep2-andu2.c, gcc.target/cris/peep2-xsrand.c, gcc.target/cris/peep2-xsrand2.c: Adjust values to avoid interference with "opsplit1" with AND. Add whitespace to match-strings that may be confused with identifiers or file names. --- gcc/config/cris/cris-protos.h | 6 ++ gcc/config/cris/cris.cc | 78 +++++++++++++++++++ gcc/config/cris/cris.md | 26 +++++++ .../gcc.target/cris/peep2-andsplit1.c | 25 ++++++ gcc/testsuite/gcc.target/cris/peep2-andu1.c | 4 +- gcc/testsuite/gcc.target/cris/peep2-andu2.c | 6 +- gcc/testsuite/gcc.target/cris/peep2-xsrand.c | 6 +- gcc/testsuite/gcc.target/cris/peep2-xsrand2.c | 6 +- 8 files changed, 146 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/cris/peep2-andsplit1.c diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h index de9eacbae2aa..666e04f9eeec 100644 --- a/gcc/config/cris/cris-protos.h +++ b/gcc/config/cris/cris-protos.h @@ -44,6 +44,12 @@ extern rtx cris_emit_movem_store (rtx, rtx, int, bool); extern rtx_insn *cris_emit_insn (rtx x); extern void cris_order_for_addsi3 (rtx *, int); extern void cris_emit_trap_for_misalignment (rtx); +extern int cris_split_constant (HOST_WIDE_INT, enum rtx_code, + machine_mode, bool, + bool generate = false, + rtx dest = NULL_RTX, + rtx op = NULL_RTX); +#define cris_splittable_constant_p cris_split_constant #endif /* RTX_CODE */ extern void cris_asm_output_label_ref (FILE *, char *); extern void cris_asm_output_ident (const char *); diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc index 05dead9c0778..331f5908a538 100644 --- a/gcc/config/cris/cris.cc +++ b/gcc/config/cris/cris.cc @@ -2626,6 +2626,84 @@ cris_split_movdx (rtx *operands) return val; } +/* Try to split the constant WVAL into a number of separate insns of less cost + for the rtx operation CODE and the metric SPEED than using val as-is. + Generate those insns if GENERATE. DEST holds the destination, and OP holds + the other operand for binary operations; NULL when CODE is SET. Return the + number of insns for the operation or 0 if the constant can't be usefully + split (because it's already minimal or is not within range for the known + methods). Parts stolen from arm.cc. */ + +int +cris_split_constant (HOST_WIDE_INT wval, enum rtx_code code, + machine_mode mode, bool speed ATTRIBUTE_UNUSED, + bool generate, rtx dest, rtx op) +{ + int32_t ival = (int32_t) wval; + uint32_t uval = (uint32_t) wval; + + if (code != AND || IN_RANGE(ival, -32, 31) + /* Implemented using movu.[bw] elsewhere. */ + || ival == 255 || ival == 65535 + /* Implemented using clear.[bw] elsewhere. */ + || uval == 0xffffff00 || uval == 0xffff0000) + return 0; + + int i; + + int msb_zeros = 0; + int lsb_zeros = 0; + + /* Count number of leading zeros. */ + for (i = 31; i >= 0; i--) + { + if ((uval & (1 << i)) == 0) + msb_zeros++; + else + break; + } + + /* Count number of trailing zero's. */ + for (i = 0; i <= 31; i++) + { + if ((uval & (1 << i)) == 0) + lsb_zeros++; + else + break; + } + + /* Is there a lowest or highest part that is zero (but not both) + and the non-zero part is just ones? */ + if (exact_log2 ((uval >> lsb_zeros) + 1) > 0 + && (lsb_zeros != 0) != (msb_zeros != 0)) + { + /* If so, we can shift OP in the zero direction, then back. We don't + nominally win anything for uval < 256, except that the insns are split + into slottable insns so it's always beneficial. */ + if (generate) + { + if (mode != SImode) + { + dest = gen_rtx_REG (SImode, REGNO (dest)); + op = gen_rtx_REG (SImode, REGNO (op)); + } + if (msb_zeros) + { + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (msb_zeros))); + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (msb_zeros))); + } + else + { + emit_insn (gen_lshrsi3 (dest, op, GEN_INT (lsb_zeros))); + emit_insn (gen_ashlsi3 (dest, op, GEN_INT (lsb_zeros))); + } + } + return 2; + } + + return 0; +} + /* Try to change a comparison against a constant to be against zero, and an unsigned compare against zero to be an equality test. Beware: only valid for compares of integer-type operands. Also, note that we diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md index 366b4bc304bf..e72943b942e5 100644 --- a/gcc/config/cris/cris.md +++ b/gcc/config/cris/cris.md @@ -208,6 +208,9 @@ (define_code_iterator plusminusumin [plus minus umin]) ;; Ditto, commutative operators (i.e. not minus). (define_code_iterator plusumin [plus umin]) +;; For opsplit1. +(define_code_iterator splitop [and]) + ;; The addsubbo and nd code-attributes form a hack. We need to output ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd ;; need to refer to one iterator from the next. But, that can't be @@ -2888,6 +2891,29 @@ (define_peephole2 ; andqu operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode)); }) +;; Large (read: non-quick) numbers can sometimes be AND:ed by other means. +;; Testcase: gcc.target/cris/peep2-andsplit1.c +(define_peephole2 ; opsplit1 + [(parallel + [(set (match_operand 0 "register_operand") + (splitop + (match_operand 1 "register_operand") + (match_operand 2 "const_int_operand"))) + (clobber (reg:CC CRIS_CC0_REGNUM))])] + ;; Operands 0 and 1 can be separate identical objects, at least + ;; after matching peepholes above. */ + "REGNO (operands[0]) == REGNO (operands[1]) + && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>, + GET_MODE (operands[0]), + optimize_function_for_speed_p (cfun))" + [(const_int 0)] +{ + cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]), + optimize_function_for_speed_p (cfun), + true, operands[0], operands[0]); + DONE; +}) + ;; Fix a decomposed szext: fuse it with the memory operand of the ;; load. This is typically the sign-extension part of a decomposed ;; "indirect offset" address. diff --git a/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c b/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c new file mode 100644 index 000000000000..18b5cb8b17b1 --- /dev/null +++ b/gcc/testsuite/gcc.target/cris/peep2-andsplit1.c @@ -0,0 +1,25 @@ +/* Check that "opsplit1" with AND does its job. */ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int al0 (int x) +{ + return x & 0x7fffffff; +} + +int alN (int x) +{ + return x & 63; +} + +int ar0 (int x) +{ + return x & (-32*2); +} + +int arN (int x) +{ + return x & 0x80000000; +} + +/* { dg-final { scan-assembler-not "\[ \t\]and" } } */ diff --git a/gcc/testsuite/gcc.target/cris/peep2-andu1.c b/gcc/testsuite/gcc.target/cris/peep2-andu1.c index 3b54c3295860..ab307b4fec5e 100644 --- a/gcc/testsuite/gcc.target/cris/peep2-andu1.c +++ b/gcc/testsuite/gcc.target/cris/peep2-andu1.c @@ -20,13 +20,13 @@ clearb (int x, int *y) int andb (int x, int *y) { - return *y & 0x3f; + return *y & 0x3d; } int andw (int x, int *y) { - return *y & 0xfff; + return *y & 0xffd; } int diff --git a/gcc/testsuite/gcc.target/cris/peep2-andu2.c b/gcc/testsuite/gcc.target/cris/peep2-andu2.c index fd19cdd906a9..f16f28861ac1 100644 --- a/gcc/testsuite/gcc.target/cris/peep2-andu2.c +++ b/gcc/testsuite/gcc.target/cris/peep2-andu2.c @@ -1,6 +1,6 @@ /* { dg-do assemble } */ -/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2047," } } */ -/* { dg-final { scan-assembler "and.w 2047,\\\$|and.d \\\$r10," } } */ +/* { dg-final { scan-assembler "movu.w \\\$r10,\\\$|movu.w 2045," } } */ +/* { dg-final { scan-assembler "and.w 2045,\\\$|and.d \\\$r10," } } */ /* { dg-final { scan-assembler-not "move.d \\\$r10,\\\$" } } */ /* { dg-final { scan-assembler "movu.b \\\$r10,\\\$|movu.b 95," } } */ /* { dg-final { scan-assembler "and.b 95,\\\$|and.d \\\$r10," } } */ @@ -19,7 +19,7 @@ unsigned int and_peep2_hi (unsigned int y, unsigned int *x) { - *x = y & 0x7ff; + *x = y & 0x7fd; return y; } diff --git a/gcc/testsuite/gcc.target/cris/peep2-xsrand.c b/gcc/testsuite/gcc.target/cris/peep2-xsrand.c index df0e76886b47..9531f0a10ffe 100644 --- a/gcc/testsuite/gcc.target/cris/peep2-xsrand.c +++ b/gcc/testsuite/gcc.target/cris/peep2-xsrand.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-final { scan-assembler "and.w " } } */ /* { dg-final { scan-assembler "and.b " } } */ -/* { dg-final { scan-assembler-not "and.d" } } */ +/* { dg-final { scan-assembler-not "and.d " } } */ /* { dg-options "-O2" } */ /* Test the "asrandb", "asrandw", "lsrandb" and "lsrandw" peephole2:s @@ -10,7 +10,7 @@ unsigned int andwlsr (unsigned int x) { - return (x >> 17) & 0x7ff; + return (x >> 17) & 0x7fd; } unsigned int @@ -22,7 +22,7 @@ andblsr (unsigned int x) int andwasr (int x) { - return (x >> 17) & 0x7ff; + return (x >> 17) & 0x7fd; } int diff --git a/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c b/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c index 5d6ca788d73a..12f26dfb0fc0 100644 --- a/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c +++ b/gcc/testsuite/gcc.target/cris/peep2-xsrand2.c @@ -1,9 +1,9 @@ /* { dg-do compile } */ /* { dg-final { scan-assembler "and.w -137," } } */ -/* { dg-final { scan-assembler "and.b -64," } } */ +/* { dg-final { scan-assembler "and.b -62," } } */ /* { dg-final { scan-assembler "and.w -139," } } */ /* { dg-final { scan-assembler "and.b -63," } } */ -/* { dg-final { scan-assembler-not "and.d" } } */ +/* { dg-final { scan-assembler-not "and.d " } } */ /* { dg-options "-O2" } */ /* PR target/17984. Test-case based on @@ -18,7 +18,7 @@ andwlsr (unsigned int x) unsigned int andblsr (unsigned int x) { - return (x >> 24) & 0xc0; + return (x >> 24) & 0xc2; } int -- 2.30.2