Hi all, This patch adds an optimisation that exploits the AArch64 BFXIL instruction when or-ing the result of two bitwise and operations with non-overlapping bitmasks (e.g. (a & 0xFFFF0000) | (b & 0x0000FFFF)).
Example: unsigned long long combine(unsigned long long a, unsigned long long b) { return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll); } void read2(unsigned long long a, unsigned long long b, unsigned long long *c, unsigned long long *d) { *c = combine(a, b); *d = combine(b, a); } When compiled with -O2, read2 would result in: read2: and x5, x1, #0xffffffff and x4, x0, #0xffffffff00000000 orr x4, x4, x5 and x1, x1, #0xffffffff00000000 and x0, x0, #0xffffffff str x4, [x2] orr x0, x0, x1 str x0, [x3] ret But with this patch results in: read2: mov x4, x1 bfxil x4, x0, 0, 32 str x4, [x2] bfxil x0, x1, 0, 32 str x0, [x3] ret Bootstrapped and regtested on aarch64-none-linux-gnu and aarch64-none-elf with no regressions. gcc/ 2018-07-11 Sam Tebbs <sam.te...@arm.com> * config/aarch64/aarch64.md (*aarch64_bfxil, *aarch64_bfxil_alt): Define. * config/aarch64/aarch64-protos.h (aarch64_is_left_consecutive): Define. * config/aarch64/aarch64.c (aarch64_is_left_consecutive): New function. gcc/testsuite 2018-07-11 Sam Tebbs <sam.te...@arm.com> * gcc.target/aarch64/combine_bfxil.c: New file. * gcc.target/aarch64/combine_bfxil_2.c: New file.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 514ddc4..b025cd6 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -558,4 +558,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt); poly_uint64 aarch64_regmode_natural_size (machine_mode); +bool aarch64_is_left_consecutive (HOST_WIDE_INT); + #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index d75d45f..884958b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1439,6 +1439,14 @@ aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned, return SImode; } +/* Implement IS_LEFT_CONSECUTIVE. Check if an integer's bits are consecutive + ones from the MSB. */ +bool +aarch64_is_left_consecutive (HOST_WIDE_INT i) +{ + return (i | (i - 1)) == HOST_WIDE_INT_M1; +} + /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so that strcpy from constants will be faster. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index a014a01..383d699 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4844,6 +4844,42 @@ [(set_attr "type" "rev")] ) +(define_insn "*aarch64_bfxil" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (and:DI (match_operand:DI 1 "register_operand" "r") + (match_operand 3 "const_int_operand")) + (and:DI (match_operand:DI 2 "register_operand" "0") + (match_operand 4 "const_int_operand"))))] + "INTVAL (operands[3]) == ~INTVAL (operands[4]) + && aarch64_is_left_consecutive (INTVAL (operands[3]))" + { + HOST_WIDE_INT op4 = INTVAL (operands[4]); + operands[3] = GEN_INT (64 - ceil_log2 (op4)); + output_asm_insn ("bfxil\\t%0, %1, 0, %3", operands); + return ""; + } + [(set_attr "type" "bfx")] +) + +; An alternate bfxil pattern where the second bitmask is the smallest, and so +; the first register used is changed instead of the second +(define_insn "*aarch64_bfxil_alt" + [(set (match_operand:DI 0 "register_operand" "=r") + (ior:DI (and:DI (match_operand:DI 1 "register_operand" "0") + (match_operand 3 "const_int_operand")) + (and:DI (match_operand:DI 2 "register_operand" "r") + (match_operand 4 "const_int_operand"))))] + "INTVAL (operands[3]) == ~INTVAL (operands[4]) + && aarch64_is_left_consecutive (INTVAL (operands[4]))" + { + HOST_WIDE_INT op3 = INTVAL (operands[3]); + operands[3] = GEN_INT (64 - ceil_log2 (op3)); + output_asm_insn ("bfxil\\t%0, %2, 0, %3", operands); + return ""; + } + [(set_attr "type" "bfx")] +) + ;; There are no canonicalisation rules for the position of the lshiftrt, ashift ;; operations within an IOR/AND RTX, therefore we have two patterns matching ;; each valid permutation. diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c new file mode 100644 index 0000000..a0c6be4 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long +combine_balanced (unsigned long long a, unsigned long long b) +{ + return (a & 0xffffffff00000000ll) | (b & 0x00000000ffffffffll); +} + + +unsigned long long +combine_unbalanced (unsigned long long a, unsigned long long b) +{ + return (a & 0xffffffffff000000ll) | (b & 0x0000000000ffffffll); +} + +void +foo2 (unsigned long long a, unsigned long long b, unsigned long long *c, + unsigned long long *d) +{ + *c = combine_balanced(a, b); + *d = combine_balanced(b, a); +} + +void +foo3 (unsigned long long a, unsigned long long b, unsigned long long *c, + unsigned long long *d) +{ + *c = combine_unbalanced(a, b); + *d = combine_unbalanced(b, a); +} + +/* { dg-final { scan-assembler-times "bfxil\\t" 4 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c new file mode 100644 index 0000000..8237d94 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/combine_bfxil_2.c @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +unsigned long long +combine_non_consecutive (unsigned long long a, unsigned long long b) +{ + return (a & 0xfffffff200f00000ll) | (b & 0x00001000ffffffffll); +} + +void +foo4 (unsigned long long a, unsigned long long b, unsigned long long *c, + unsigned long long *d) { + /* { dg-final { scan-assembler-not "bfxil\\t" } } */ + *c = combine_non_consecutive(a, b); + *d = combine_non_consecutive(b, a); +}