Hi all,

This is a patch to demonstrate some unusual behavior I have encountered in 
combine.

A summary of the behaviour is:
when combining A -> B, the register equivalence notes of A are checked, the 
register notes of B are not checked.

Is this expected behaviour?

from combine.c:1484 in combine_instructions

      /* Try this insn with each REG_EQUAL note it links back to.  */
      FOR_EACH_LOG_LINK (links, insn)
        {
          rtx set, note;
          rtx_insn *temp = links->insn;
          if ((set = single_set (temp)) != 0
          && (note = find_reg_equal_equiv_note (temp)) != 0
          && (note = XEXP (note, 0), GET_CODE (note)) != EXPR_LIST

The register equivalance notes of temp are checked, but the register 
equivalence notes of insn are not checked.

To reproduce:
With the patch applied:

Compile the following function
void
bar (float *a, int *b)
{
  int i;
  for (i = 0; i < 1024; i++)
    a[i] = (((float)b[i])/ 4.0f);
}

Combine does not check the REG_EQUAL note on insn 12, and does not try the 
equivalent pattern, using a const_vector instead of register 99.

Trying 10 -> 12:
   10: r97:V4SF=float(r96:V4SI)
      REG_DEAD r96:V4SI
   12: r98:V4SF=r97:V4SF*r99:V4SF
      REG_DEAD r97:V4SF
      REG_EQUAL r97:V4SF*const_vector
Failed to match this instruction:
(set (reg:V4SF 98 [ D.3422 ])
    (mult:V4SF (float:V4SF (reg:V4SI 96 [ D.3420 ]))
        (reg:V4SF 99)))


For comparison, a similar pattern, in which the REG_EQUAL note is attached to 
the first insn, the REG_EQUAL note is checked, and the equivalent constant is 
used.

foo (float *a, int *b)
{
  int i;
  for (i = 0; i < 1024; i++)
    b[i] = a[i] * 4.0f;
}

Trying 11 -> 12:
   11: r97:V4SF=r96:V4SF*r98:V4SF
      REG_DEAD r96:V4SF
      REG_EQUAL r96:V4SF*const_vector
   12: r99:V4SI=fix(unspec[r97:V4SF] 23)
      REG_DEAD r97:V4SF
Failed to match this instruction:
(set (reg:V4SI 99 [ D.3432 ])
    (fix:V4SI (unspec:V4SI [
                (mult:V4SF (reg:V4SF 96 [ D.3430 ])
                    (reg:V4SF 98))
            ] UNSPEC_FRINTZ)))

Trying 11 -> 12:
   11: r97:V4SF=r96:V4SF*const_vector
      REG_DEAD r96:V4SF
      REG_EQUAL r96:V4SF*const_vector
   12: r99:V4SI=fix(unspec[r97:V4SF] 23)
      REG_DEAD r97:V4SF
Successfully matched this instruction:
(set (reg:V4SI 99 [ D.3432 ])
    (fix:V4SI (unspec:V4SI [
                (mult:V4SF (reg:V4SF 96 [ D.3430 ])
                    (const_vector:V4SF [
                            (const_double:SF 4.0e+0 [0x0.8p+3]) repeated x4
                        ]))
            ] UNSPEC_FRINTZ)))
            
Built from current trunk
$gcc -v
COLLECT_GCC=$BUILD/install/bin/aarch64-none-elf-gcc
COLLECT_LTO_WRAPPER=$BUILD/install/libexec/gcc/aarch64-none-elf/10.0.0/lto-wrapper
Target: aarch64-none-elf
Configured with: $SRC/gcc/configure --target=aarch64-none-elf 
--prefix=$BUILD/install/ --with-gmp=$BUILD/host-tools 
--with-mpfr=$BUILD/host-tools --with-mpc=$BUILD/host-tools 
--with-isl=$BUILD/host-tools --disable-shared --disable-nls --disable-threads 
--disable-tls --enable-checking=yes --enable-languages=c,c++,fortran 
--with-newlib --with-pkgversion=unknown
Thread model: single
gcc version 10.0.0 20190524 (experimental) (unknown)

Test cases compiled with:
aarch64-none-elf-gcc -S -mcpu=cortex-a53 -O2 tmp.c -ftree-vectorize -fno-inline 
-fdump-rtl-all -fno-vect-cost-model -dp -fdump-rtl-combine-all 
-fdump-tree-optimized -o -
From 7e744509575030ca5b3fa6042d02d27171fbfbfd Mon Sep 17 00:00:00 2001
From: Joel Hutton <joel.hut...@arm.com>
Date: Tue, 11 Jun 2019 10:10:07 +0100
Subject: [PATCH] Minimal pattern to demonstrate combine behaviour

---
 gcc/config/aarch64/aarch64-protos.h |  1 +
 gcc/config/aarch64/aarch64-simd.md  | 13 +++++++++++++
 gcc/config/aarch64/aarch64.c        |  6 ++++++
 gcc/config/aarch64/predicates.md    |  3 +++
 4 files changed, 23 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a0723266f22..ff1787c37ed 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -483,6 +483,7 @@ enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
 enum reg_class aarch64_regno_regclass (unsigned);
 int aarch64_asm_preferred_eh_data_format (int, int);
 int aarch64_fpconst_pow_of_2 (rtx);
+int aarch64_fp_const_vec (rtx);
 machine_mode aarch64_hard_regno_caller_save_mode (unsigned, unsigned,
 						       machine_mode);
 int aarch64_uxt_size (int, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d4c48d2aa61..698b49c006f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -2133,6 +2133,19 @@
   "TARGET_SIMD"
   {})
 
+(define_insn "*aarch64_combine_scvtf"
+  [(set (match_operand 0 "register_operand" "=w")
+	(mult
+	 (float
+	  (match_operand 1 "" "w"))
+	 (match_operand 2 "aarch64_fp_const_vec" ""))
+	)]
+  ""
+  {
+    return "test_match";
+  }
+)
+
 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
   [(set (match_operand:VHSDF 0 "register_operand" "=w")
 	(FLOATUORS:VHSDF
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 83453d03095..f836246e184 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18327,6 +18327,12 @@ aarch64_fpconst_pow_of_2 (rtx x)
   return exact_log2 (real_to_integer (r));
 }
 
+int
+aarch64_fp_const_vec (rtx x)
+{
+  return GET_CODE (x) == CONST_VECTOR;
+}
+
 /* If X is a vector of equal CONST_DOUBLE values and that value is
    Y, return the aarch64_fpconst_pow_of_2 of Y.  Otherwise return -1.  */
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 10100ca830a..8fece3811b9 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -101,6 +101,9 @@
 (define_predicate "aarch64_fp_vec_pow2"
   (match_test "aarch64_vec_fpconst_pow_of_2 (op) > 0"))
 
+(define_predicate "aarch64_fp_const_vec"
+  (match_test "aarch64_fp_const_vec (op)"))
+
 (define_predicate "aarch64_sve_cnt_immediate"
   (and (match_code "const_poly_int")
        (match_test "aarch64_sve_cnt_immediate_p (op)")))
-- 
2.17.1

Reply via email to