Hi all, This is a patch to demonstrate some unusual behavior I have encountered in combine.
A summary of the behaviour is: when combining A -> B, the register equivalence notes of A are checked, the register notes of B are not checked. Is this expected behaviour? from combine.c:1484 in combine_instructions /* Try this insn with each REG_EQUAL note it links back to. */ FOR_EACH_LOG_LINK (links, insn) { rtx set, note; rtx_insn *temp = links->insn; if ((set = single_set (temp)) != 0 && (note = find_reg_equal_equiv_note (temp)) != 0 && (note = XEXP (note, 0), GET_CODE (note)) != EXPR_LIST The register equivalance notes of temp are checked, but the register equivalence notes of insn are not checked. To reproduce: With the patch applied: Compile the following function void bar (float *a, int *b) { int i; for (i = 0; i < 1024; i++) a[i] = (((float)b[i])/ 4.0f); } Combine does not check the REG_EQUAL note on insn 12, and does not try the equivalent pattern, using a const_vector instead of register 99. Trying 10 -> 12: 10: r97:V4SF=float(r96:V4SI) REG_DEAD r96:V4SI 12: r98:V4SF=r97:V4SF*r99:V4SF REG_DEAD r97:V4SF REG_EQUAL r97:V4SF*const_vector Failed to match this instruction: (set (reg:V4SF 98 [ D.3422 ]) (mult:V4SF (float:V4SF (reg:V4SI 96 [ D.3420 ])) (reg:V4SF 99))) For comparison, a similar pattern, in which the REG_EQUAL note is attached to the first insn, the REG_EQUAL note is checked, and the equivalent constant is used. foo (float *a, int *b) { int i; for (i = 0; i < 1024; i++) b[i] = a[i] * 4.0f; } Trying 11 -> 12: 11: r97:V4SF=r96:V4SF*r98:V4SF REG_DEAD r96:V4SF REG_EQUAL r96:V4SF*const_vector 12: r99:V4SI=fix(unspec[r97:V4SF] 23) REG_DEAD r97:V4SF Failed to match this instruction: (set (reg:V4SI 99 [ D.3432 ]) (fix:V4SI (unspec:V4SI [ (mult:V4SF (reg:V4SF 96 [ D.3430 ]) (reg:V4SF 98)) ] UNSPEC_FRINTZ))) Trying 11 -> 12: 11: r97:V4SF=r96:V4SF*const_vector REG_DEAD r96:V4SF REG_EQUAL r96:V4SF*const_vector 12: r99:V4SI=fix(unspec[r97:V4SF] 23) REG_DEAD r97:V4SF Successfully matched this instruction: (set (reg:V4SI 99 [ D.3432 ]) (fix:V4SI (unspec:V4SI [ (mult:V4SF (reg:V4SF 96 [ D.3430 ]) (const_vector:V4SF [ (const_double:SF 4.0e+0 [0x0.8p+3]) repeated x4 ])) ] UNSPEC_FRINTZ))) Built from current trunk $gcc -v COLLECT_GCC=$BUILD/install/bin/aarch64-none-elf-gcc COLLECT_LTO_WRAPPER=$BUILD/install/libexec/gcc/aarch64-none-elf/10.0.0/lto-wrapper Target: aarch64-none-elf Configured with: $SRC/gcc/configure --target=aarch64-none-elf --prefix=$BUILD/install/ --with-gmp=$BUILD/host-tools --with-mpfr=$BUILD/host-tools --with-mpc=$BUILD/host-tools --with-isl=$BUILD/host-tools --disable-shared --disable-nls --disable-threads --disable-tls --enable-checking=yes --enable-languages=c,c++,fortran --with-newlib --with-pkgversion=unknown Thread model: single gcc version 10.0.0 20190524 (experimental) (unknown) Test cases compiled with: aarch64-none-elf-gcc -S -mcpu=cortex-a53 -O2 tmp.c -ftree-vectorize -fno-inline -fdump-rtl-all -fno-vect-cost-model -dp -fdump-rtl-combine-all -fdump-tree-optimized -o -
From 7e744509575030ca5b3fa6042d02d27171fbfbfd Mon Sep 17 00:00:00 2001 From: Joel Hutton <joel.hut...@arm.com> Date: Tue, 11 Jun 2019 10:10:07 +0100 Subject: [PATCH] Minimal pattern to demonstrate combine behaviour --- gcc/config/aarch64/aarch64-protos.h | 1 + gcc/config/aarch64/aarch64-simd.md | 13 +++++++++++++ gcc/config/aarch64/aarch64.c | 6 ++++++ gcc/config/aarch64/predicates.md | 3 +++ 4 files changed, 23 insertions(+) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index a0723266f22..ff1787c37ed 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -483,6 +483,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); enum reg_class aarch64_regno_regclass (unsigned); int aarch64_asm_preferred_eh_data_format (int, int); int aarch64_fpconst_pow_of_2 (rtx); +int aarch64_fp_const_vec (rtx); machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned, machine_mode); int aarch64_uxt_size (int, HOST_WIDE_INT); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d4c48d2aa61..698b49c006f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2133,6 +2133,19 @@ "TARGET_SIMD" {}) +(define_insn "*aarch64_combine_scvtf" + [(set (match_operand 0 "register_operand" "=w") + (mult + (float + (match_operand 1 "" "w")) + (match_operand 2 "aarch64_fp_const_vec" "")) + )] + "" + { + return "test_match"; + } +) + (define_insn "<optab><fcvt_target><VHSDF:mode>2" [(set (match_operand:VHSDF 0 "register_operand" "=w") (FLOATUORS:VHSDF diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 83453d03095..f836246e184 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -18327,6 +18327,12 @@ aarch64_fpconst_pow_of_2 (rtx x) return exact_log2 (real_to_integer (r)); } +int +aarch64_fp_const_vec (rtx x) +{ + return GET_CODE (x) == CONST_VECTOR; +} + /* If X is a vector of equal CONST_DOUBLE values and that value is Y, return the aarch64_fpconst_pow_of_2 of Y. Otherwise return -1. */ diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 10100ca830a..8fece3811b9 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -101,6 +101,9 @@ (define_predicate "aarch64_fp_vec_pow2" (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0")) +(define_predicate "aarch64_fp_const_vec" + (match_test "aarch64_fp_const_vec (op)")) + (define_predicate "aarch64_sve_cnt_immediate" (and (match_code "const_poly_int") (match_test "aarch64_sve_cnt_immediate_p (op)"))) -- 2.17.1