Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On Tue, Jan 27, 2015 at 02:31:14PM +, Jiong Wang wrote: testcase changed to execution version, and moved to gcc.dg. the compile time only take several seconds. (previously I am using cc1 built by O0 which at most take 24s) ok to install? Ok for the testcase. The config/aarch64/ bits I'll defer to aarch64 maintainers. 2015-01-19 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com gcc/ * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. gcc/testsuite/ * gcc.dg/long_branch.c: New testcase. Jakub
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 27 January 2015 at 14:31, Jiong Wang jiong.w...@arm.com wrote: 2015-01-19 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com gcc/ * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. OK /Marcus gcc/testsuite/ * gcc.dg/long_branch.c: New testcase.
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 19/01/15 10:58, Jakub Jelinek wrote: On Mon, Jan 19, 2015 at 10:52:14AM +, Ramana Radhakrishnan wrote: What is aarch64 specific on the testcase? The number of if-then-else's required to get the compiler to generate cmp branch sequences rather than the tbnz instruction. That doesn't mean the same testcase couldn't be tested on other targets and perhaps find bugs in there. That said, if the testcase is too expensive to compile (several seconds is ok, minutes is not), then perhaps it shouldn't be included at all, or should be guarded with run_expensive_tests target. Jakub testcase changed to execution version, and moved to gcc.dg. the compile time only take several seconds. (previously I am using cc1 built by O0 which at most take 24s) ok to install? Thanks. 2015-01-19 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com gcc/ * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. gcc/testsuite/ * gcc.dg/long_branch.c: New testcase. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 597ff8c..1e00396 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -466,13 +466,17 @@ (const_int 0)) (label_ref (match_operand 2 )) (pc))) - (clobber (match_scratch:DI 3 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w3, %w0, %1, #1\;cbz\\t%w3, %l2\; - return \tbz\\t%w0, %1, %l2\; - + { +if (get_attr_length (insn) == 8) + { + operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); + return tst\t%w0, %1\;bcond\t%l2; + } +else + return tbz\t%w0, %1, %l2; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) @@ -486,13 +490,21 @@ (const_int 0)) (label_ref (match_operand 1 )) (pc))) - (clobber (match_scratch:DI 2 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w2, %w0, sizem1, #1\;cbz\\t%w2, %l1\; - return \tbz\\t%w0, sizem1, %l1\; - + { +if (get_attr_length (insn) == 8) + { + char buf[64]; + uint64_t val = ((uint64_t ) 1) + (GET_MODE_SIZE (MODEmode) * BITS_PER_UNIT - 1); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l1; + } +else + return tbz\t%w0, sizem1, %l1; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7dd3917..bd144f9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -823,6 +823,9 @@ (smax s) (umax u) (smin s) (umin u)]) +;; Emit conditional branch instructions. +(define_code_attr bcond [(eq beq) (ne bne) (lt bne) (ge beq)]) + ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq cbz) (ne cbnz) (lt cbnz) (ge cbz)]) diff --git a/gcc/testsuite/gcc.dg/long_branch.c b/gcc/testsuite/gcc.dg/long_branch.c new file mode 100644 index 000..f388a80 --- /dev/null +++ b/gcc/testsuite/gcc.dg/long_branch.c @@ -0,0 +1,198 @@ +/* { dg-do run } */ +/* { dg-options -O2 -fno-reorder-blocks } */ + +void abort (); + +__attribute__((noinline, noclone)) int +restore (int a, int b) +{ + return a * b; +} + +__attribute__((noinline, noclone)) void +do_nothing (int *input) +{ + *input = restore (*input, 1); + return; +} + +#define CASE_ENTRY(n) \ + case n: \ +sum = sum / (n + 1); \ +sum = restore (sum, n + 1); \ +if (sum == (n + addend)) \ + break;\ +sum = sum / (n + 2); \ +sum = restore (sum, n + 2); \ +sum = sum / (n + 3); \ +sum = restore (sum, n + 3); \ +sum = sum / (n + 4); \ +sum = restore (sum, n + 4); \ +sum = sum / (n + 5); \ +sum = restore (sum, n + 5); \ +sum = sum / (n + 6); \ +sum = restore (sum, n + 6); \ +sum = sum / (n + 7); \ +sum = restore (sum, n + 7); \ +sum = sum / (n + 8); \ +sum = restore (sum, n + 8); \ +sum = sum / (n + 9); \ +sum = restore (sum, n + 9); \ +sum = sum / (n + 10); \ +sum = restore (sum, n + 10); \ +sum = sum / (n + 11); \ +sum = restore (sum, n + 11); \ +sum = sum / (n + 12); \ +sum = restore (sum, n + 12); \ +sum = sum / (n + 13); \ +sum = restore (sum, n + 13); \ +sum = sum / (n + 14); \ +sum = restore (sum, n + 14); \ +sum = sum / (n + 15); \ +sum = restore (sum, n + 15); \ +sum = sum / (n + 16); \ +sum = restore (sum, n + 16); \ +sum = sum / (n + 17); \ +sum = restore (sum, n + 17); \ +sum = sum / (n + 18); \ +sum = restore (sum, n + 18); \ +sum = sum / (n + 19); \ +sum = restore (sum, n + 19);
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On Mon, Jan 19, 2015 at 10:52:14AM +, Ramana Radhakrishnan wrote: What is aarch64 specific on the testcase? The number of if-then-else's required to get the compiler to generate cmp branch sequences rather than the tbnz instruction. That doesn't mean the same testcase couldn't be tested on other targets and perhaps find bugs in there. That said, if the testcase is too expensive to compile (several seconds is ok, minutes is not), then perhaps it shouldn't be included at all, or should be guarded with run_expensive_tests target. Jakub
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On Mon, Jan 19, 2015 at 10:28:47AM +, Jiong Wang wrote: On 14/01/15 22:59, Richard Henderson wrote: On 12/15/2014 07:36 AM, Jiong Wang wrote: + char buf[64]; + uint64_t val = ((uint64_t) 1) UINTVAL (operands[1]); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l2; Better to simply modify the operand, as in operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); return tst\t%w0, %1\;bcond\t%l2; thanks, fixed. ok for trunk ? gcc/ 2015-01-19 Ramana radhakrishnanramana.radhakrish...@arm.com Jiong wangjiong.w...@arm.com That is not the right name/email format for ChangeLog entries. --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c What is aarch64 specific on the testcase? Best would be to turn into into an executable testcase (add __attribute__((noinline, noclone)) to dec and define somehow, perhaps with asm volatile with memory clobber in it) to check that it also works fine at runtime, but even if you don't, putting it into gcc.c-torture/compile/ might be preferrable over putting it into aarch64 specific dir. Jakub
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 14/01/15 22:59, Richard Henderson wrote: On 12/15/2014 07:36 AM, Jiong Wang wrote: + char buf[64]; + uint64_t val = ((uint64_t) 1) UINTVAL (operands[1]); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l2; Better to simply modify the operand, as in operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); return tst\t%w0, %1\;bcond\t%l2; thanks, fixed. ok for trunk ? gcc/ 2015-01-19 Ramana radhakrishnanramana.radhakrish...@arm.com Jiong wangjiong.w...@arm.com * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 597ff8c..1e00396 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -466,13 +466,17 @@ (const_int 0)) (label_ref (match_operand 2 )) (pc))) - (clobber (match_scratch:DI 3 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w3, %w0, %1, #1\;cbz\\t%w3, %l2\; - return \tbz\\t%w0, %1, %l2\; - + { +if (get_attr_length (insn) == 8) + { + operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); + return tst\t%w0, %1\;bcond\t%l2; + } +else + return tbz\t%w0, %1, %l2; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) @@ -486,13 +490,21 @@ (const_int 0)) (label_ref (match_operand 1 )) (pc))) - (clobber (match_scratch:DI 2 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w2, %w0, sizem1, #1\;cbz\\t%w2, %l1\; - return \tbz\\t%w0, sizem1, %l1\; - + { +if (get_attr_length (insn) == 8) + { + char buf[64]; + uint64_t val = ((uint64_t ) 1) + (GET_MODE_SIZE (MODEmode) * BITS_PER_UNIT - 1); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l1; + } +else + return tbz\t%w0, sizem1, %l1; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7dd3917..bd144f9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -823,6 +823,9 @@ (smax s) (umax u) (smin s) (umin u)]) +;; Emit conditional branch instructions. +(define_code_attr bcond [(eq beq) (ne bne) (lt bne) (ge beq)]) + ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq cbz) (ne cbnz) (lt cbnz) (ge cbz)]) diff --git a/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c new file mode 100644 index 000..d4782e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c @@ -0,0 +1,166 @@ +int dec (int); + +#define CASE_ENTRY(n) \ + case n: \ +sum = a / n; \ +sum = sum * (n - 1); \ +sum = dec (sum); \ +sum = sum / (n + 1); \ +sum = dec (sum); \ +sum = sum / (n + 2); \ +sum = dec (sum); \ +sum = sum / (n + 3); \ +sum = dec (sum); \ +sum = sum / (n + 4); \ +sum = dec (sum); \ +sum = sum / (n + 5); \ +sum = dec (sum); \ +sum = sum / (n + 6); \ +sum = dec (sum); \ +sum = sum / (n + 7); \ +sum = dec (sum); \ +sum = sum / (n + 8); \ +sum = dec (sum); \ +sum = sum / (n + 9); \ +sum = dec (sum); \ +sum = sum / (n + 10); \ +sum = dec (sum); \ +sum = sum / (n + 11); \ +sum = dec (sum); \ +sum = sum / (n + 12); \ +sum = dec (sum); \ +sum = sum / (n + 13); \ +sum = dec (sum); \ +sum = sum / (n + 14); \ +sum = dec (sum); \ +sum = sum / (n + 15); \ +sum = dec (sum); \ +sum = sum / (n + 16); \ +sum = dec (sum); \ +sum = sum / (n + 17); \ +sum = dec (sum); \ +sum = sum / (n + 18); \ +sum = dec (sum); \ +sum = sum / (n + 19); \ +sum = dec (sum); \ +sum = sum / (n + 20); \ +sum = dec (sum); \ +sum = sum / (n + 21); \ +sum = dec (sum); \ +sum = sum / (n + 22); \ +sum = dec (sum); \ +sum = sum / (n + 23); \ +sum = dec (sum); \ +sum = sum / (n + 24); \ +sum = dec (sum); \ +sum = sum / (n + 25); \ +sum = dec (sum); \ +sum = sum / (n + 26); \ +sum = dec (sum); \ +sum = sum / (n + 27); \ +sum = dec (sum); \ +sum = sum / (n + 28); \ +sum = dec (sum); \ +sum = sum / (n + 29); \ +sum = dec (sum); \ +sum = sum / (n + 30); \ +sum = dec (sum); \ +sum = sum / (n + 31); \ +break; + +int +cbranch (int a, int b, int c, int d, long long addend) +{ + long long sum; + if (a 0x2) +{ +start: + sum = b * c; + sum = sum +
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 19/01/15 10:34, Jakub Jelinek wrote: On Mon, Jan 19, 2015 at 10:28:47AM +, Jiong Wang wrote: On 14/01/15 22:59, Richard Henderson wrote: On 12/15/2014 07:36 AM, Jiong Wang wrote: + char buf[64]; + uint64_t val = ((uint64_t) 1) UINTVAL (operands[1]); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l2; Better to simply modify the operand, as in operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); return tst\t%w0, %1\;bcond\t%l2; thanks, fixed. ok for trunk ? gcc/ 2015-01-19 Ramana radhakrishnanramana.radhakrish...@arm.com Jiong wangjiong.w...@arm.com That is not the right name/email format for ChangeLog entries. --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c What is aarch64 specific on the testcase? Best would be to turn into into an executable testcase (add __attribute__((noinline, noclone)) to dec and define somehow, perhaps with asm volatile with memory clobber in it) to check that it also works fine at runtime, but even if you don't, putting it into gcc.c-torture/compile/ might be preferrable over putting it into aarch64 specific dir. Jakub Jakub, Thanks for review. As I have written in the initial email, the testcase included in the patch is for reproduce/record purpose only. I do not plan to commit it. it could verify the long branch situation, while because of the code is quite big, it takes a couple of seconds to compile, so will not commit it. change log updated 2015-01-19 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com gcc/ * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 597ff8c..1e00396 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -466,13 +466,17 @@ (const_int 0)) (label_ref (match_operand 2 )) (pc))) - (clobber (match_scratch:DI 3 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w3, %w0, %1, #1\;cbz\\t%w3, %l2\; - return \tbz\\t%w0, %1, %l2\; - + { +if (get_attr_length (insn) == 8) + { + operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); + return tst\t%w0, %1\;bcond\t%l2; + } +else + return tbz\t%w0, %1, %l2; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) @@ -486,13 +490,21 @@ (const_int 0)) (label_ref (match_operand 1 )) (pc))) - (clobber (match_scratch:DI 2 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w2, %w0, sizem1, #1\;cbz\\t%w2, %l1\; - return \tbz\\t%w0, sizem1, %l1\; - + { +if (get_attr_length (insn) == 8) + { + char buf[64]; + uint64_t val = ((uint64_t ) 1) + (GET_MODE_SIZE (MODEmode) * BITS_PER_UNIT - 1); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l1; + } +else + return tbz\t%w0, sizem1, %l1; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7dd3917..bd144f9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -823,6 +823,9 @@ (smax s) (umax u) (smin s) (umin u)]) +;; Emit conditional branch instructions. +(define_code_attr bcond [(eq beq) (ne bne) (lt bne) (ge beq)]) + ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq cbz) (ne cbnz) (lt cbnz) (ge cbz)]) diff --git a/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c new file mode 100644 index 000..d4782e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c @@ -0,0 +1,166 @@ +int dec (int); + +#define CASE_ENTRY(n) \ + case n: \ +sum = a / n; \ +sum = sum * (n - 1); \ +sum = dec (sum); \ +sum = sum / (n + 1); \ +sum = dec (sum); \ +sum = sum / (n + 2); \ +sum = dec (sum); \ +sum = sum / (n + 3); \ +sum = dec (sum); \ +sum = sum / (n + 4); \ +sum = dec (sum); \ +sum = sum / (n + 5); \ +sum = dec (sum); \ +sum = sum / (n + 6); \ +sum = dec (sum); \ +sum = sum / (n + 7); \ +sum = dec (sum); \ +sum = sum / (n + 8); \ +sum = dec (sum); \ +sum = sum / (n + 9); \ +sum = dec (sum); \ +sum = sum / (n + 10); \ +sum = dec (sum); \ +sum = sum / (n + 11); \ +sum = dec (sum); \ +sum = sum / (n + 12); \ +sum = dec (sum); \ +sum = sum / (n + 13);
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
What is aarch64 specific on the testcase? The number of if-then-else's required to get the compiler to generate cmp branch sequences rather than the tbnz instruction. Ramana
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 12/15/2014 07:36 AM, Jiong Wang wrote: + char buf[64]; + uint64_t val = ((uint64_t) 1) UINTVAL (operands[1]); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l2; Better to simply modify the operand, as in operands[1] = GEN_INT (HOST_WIDE_INT_1U UINTVAL (operands[1])); return tst\t%w0, %1\;bcond\t%l2; r~
Re: [PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
On 15/12/14 15:36, Jiong Wang wrote: from the discussion here https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01949.html the other problem it exposed is the unnecessary clobber of register x19 which is a callee-saved register, then there are unnecessary push/pop in pro/epilogue. the reason comes from the following pattern: (define_insn tboptabmode1 (define_insn cboptabmode1 they always declare (clobber (match_scratch:DI 3 =r)) while that register is used only when get_attr_length (insn) == 8. actually, we could clobber CC register instead of scratch register to avoid wasting of general purpose registers. this patch fix this, and give slightly improvement on spec2k. bootstrap OK, no regression on aarch64 bare-metal. ok for trunk? the testcase included in the patch is for verification purpose only. it could verify the long branch situation, while because of the code is very big, it takes a couple of seconds to compile. will not commit it. gcc/ 2014-12-15 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr. Ping~
[PATCH][AArch64] Improve bit-test-branch pattern to avoid unnecessary register clobber
from the discussion here https://gcc.gnu.org/ml/gcc-patches/2014-11/msg01949.html the other problem it exposed is the unnecessary clobber of register x19 which is a callee-saved register, then there are unnecessary push/pop in pro/epilogue. the reason comes from the following pattern: (define_insn tboptabmode1 (define_insn cboptabmode1 they always declare (clobber (match_scratch:DI 3 =r)) while that register is used only when get_attr_length (insn) == 8. actually, we could clobber CC register instead of scratch register to avoid wasting of general purpose registers. this patch fix this, and give slightly improvement on spec2k. bootstrap OK, no regression on aarch64 bare-metal. ok for trunk? the testcase included in the patch is for verification purpose only. it could verify the long branch situation, while because of the code is very big, it takes a couple of seconds to compile. will not commit it. gcc/ 2014-12-15 Ramana Radhakrishnan ramana.radhakrish...@arm.com Jiong Wang jiong.w...@arm.com * config/aarch64/aarch64.md (tboptabmode1): Clobber CC reg instead of scratch reg. (cboptabmode1): Likewise. * config/aarch64/iterators.md (bcond): New define_code_attr.diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 597ff8c..abf8e3f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -466,13 +466,20 @@ (const_int 0)) (label_ref (match_operand 2 )) (pc))) - (clobber (match_scratch:DI 3 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w3, %w0, %1, #1\;cbz\\t%w3, %l2\; - return \tbz\\t%w0, %1, %l2\; - + { +if (get_attr_length (insn) == 8) + { + char buf[64]; + uint64_t val = ((uint64_t) 1) UINTVAL (operands[1]); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l2; + } +else + return tbz\t%w0, %1, %l2; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768)) @@ -486,13 +493,21 @@ (const_int 0)) (label_ref (match_operand 1 )) (pc))) - (clobber (match_scratch:DI 2 =r))] + (clobber (reg:CC CC_REGNUM))] - * - if (get_attr_length (insn) == 8) -return \ubfx\\t%w2, %w0, sizem1, #1\;cbz\\t%w2, %l1\; - return \tbz\\t%w0, sizem1, %l1\; - + { +if (get_attr_length (insn) == 8) + { + char buf[64]; + uint64_t val = ((uint64_t ) 1) + (GET_MODE_SIZE (MODEmode) * BITS_PER_UNIT - 1); + sprintf (buf, tst\t%%w0, %PRId64, val); + output_asm_insn (buf, operands); + return bcond\t%l1; + } +else + return tbz\t%w0, sizem1, %l1; + } [(set_attr type branch) (set (attr length) (if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 7dd3917..bd144f9 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -823,6 +823,9 @@ (smax s) (umax u) (smin s) (umin u)]) +;; Emit conditional branch instructions. +(define_code_attr bcond [(eq beq) (ne bne) (lt bne) (ge beq)]) + ;; Emit cbz/cbnz depending on comparison type. (define_code_attr cbz [(eq cbz) (ne cbnz) (lt cbnz) (ge cbz)]) diff --git a/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c new file mode 100644 index 000..d4782e9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/long_range_bit_test_branch_1.c @@ -0,0 +1,166 @@ +int dec (int); + +#define CASE_ENTRY(n) \ + case n: \ +sum = a / n; \ +sum = sum * (n - 1); \ +sum = dec (sum); \ +sum = sum / (n + 1); \ +sum = dec (sum); \ +sum = sum / (n + 2); \ +sum = dec (sum); \ +sum = sum / (n + 3); \ +sum = dec (sum); \ +sum = sum / (n + 4); \ +sum = dec (sum); \ +sum = sum / (n + 5); \ +sum = dec (sum); \ +sum = sum / (n + 6); \ +sum = dec (sum); \ +sum = sum / (n + 7); \ +sum = dec (sum); \ +sum = sum / (n + 8); \ +sum = dec (sum); \ +sum = sum / (n + 9); \ +sum = dec (sum); \ +sum = sum / (n + 10); \ +sum = dec (sum); \ +sum = sum / (n + 11); \ +sum = dec (sum); \ +sum = sum / (n + 12); \ +sum = dec (sum); \ +sum = sum / (n + 13); \ +sum = dec (sum); \ +sum = sum / (n + 14); \ +sum = dec (sum); \ +sum = sum / (n + 15); \ +sum = dec (sum); \ +sum = sum / (n + 16); \ +sum = dec (sum); \ +sum = sum / (n + 17); \ +sum = dec (sum); \ +sum = sum / (n + 18); \ +sum = dec (sum); \ +sum = sum / (n + 19); \ +sum = dec (sum); \ +sum = sum / (n + 20); \ +sum = dec (sum); \ +sum = sum / (n + 21); \ +sum = dec (sum); \ +sum = sum / (n + 22); \ +sum = dec (sum); \ +sum = sum / (n + 23); \ +sum = dec (sum); \ +sum = sum / (n + 24); \ +