Hi,

Please find attached the patch that implements automod load and store for
Thunderx2t99.
The patch doesn't change spec but improve other benchmarks.

Bootstrapped and Regression tested on aarch64-thunder-linux.
Please review the patch and let us know if its okay for Stage-1?

Thanks,
Naveen

2017-03-06  Julian Brown  <jul...@codesourcery.com>
            Naveen H.S  <naveen.hurugalaw...@cavium.com>

        * config/aarch64/aarch64-protos.h (aarch64_automod_addr_only_dep): Add
        prototype.
        * config/aarch64/aarch64.c (aarch64_automod_addr_only_dep): New
        function.
        * config/aarch64/thunderx2t99.md (thunderx2t99_load_basic)
        (thunderx2t99_store_basic, thunderx2t99_storepair_basic)
        (thunderx2t99_fp_load_basic, thunderx2t99_fp_loadpair_basic)
        (thunderx2t99_fp_storepair_basic): Add aarch64_mem_type_p test.
        (thunderx2t99_load_automod, thunderx2t99_load_regoffset)
        (thunderx2t99_load_scale_ext, thunderx2t99_store_automod)
        (thunderx2t99_store_regoffset_scale_ext, thunderx2t99_fp_load_automod)
        (thunderx2t99_storepair_automod, thunderx2t99_fp_load_regoffset)
        (thunderx2t99_fp_load_scale_ext, thunderx2t99_fp_loadpair_automod)
        (thunderx2t99_fp_store_automod, thunderx2t99_fp_storepair_automod)
        (thunderx2t99_fp_store_regoffset_scale_ext): New insn reservations.
        (thunderx2t99_load_automod, thunderx2t99_fp_load_automod)
        (thunderx2t99_fp_loadpair_automod): Add bypass for output address-only
        dependencies.
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e045df8..7472d98 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -488,5 +488,6 @@ std::string aarch64_get_extension_string_for_isa_flags (unsigned long,
 							unsigned long);
 
 rtl_opt_pass *make_pass_fma_steering (gcc::context *ctxt);
+int aarch64_automod_addr_only_dep (rtx_insn *, rtx_insn *);
 
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 62f5461..c674c51 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14875,6 +14875,94 @@ aarch64_run_selftests (void)
 
 #endif /* #if CHECKING_P */
 
+/* Return nonzero if the CONSUMER has a dependency only on an automodify
+   address in PRODUCER (a load instruction, i.e. the dependency is not on the
+   loaded value).  */
+
+int
+aarch64_automod_addr_only_dep (rtx_insn *producer, rtx_insn *consumer)
+{
+  rtx prod_set = single_set (producer);
+
+  if (prod_set)
+    {
+      rtx dst, src = SET_SRC (prod_set);
+
+      if (GET_CODE (src) == ZERO_EXTEND || GET_CODE (src) == SIGN_EXTEND)
+	src = XEXP (src, 0);
+
+      gcc_assert (MEM_P (src));
+
+      dst = XEXP (prod_set, 0);
+
+      rtx cons_set = single_set (consumer);
+      rtx cons_pat = PATTERN (consumer);
+
+      if (cons_set)
+	return !reg_overlap_mentioned_p (dst, cons_set);
+      else if (GET_CODE (cons_pat) == PARALLEL)
+	{
+	  for (int i = 0; i < XVECLEN (cons_pat, 0); i++)
+	    {
+	      rtx set = XVECEXP (cons_pat, 0, i);
+
+	      if (GET_CODE (set) != SET)
+		continue;
+
+	      if (reg_overlap_mentioned_p (dst, set))
+		return 0;
+	    }
+	}
+      else
+	return 0;
+    }
+  else if (GET_CODE (PATTERN (producer)) == PARALLEL)
+    {
+      rtx prod_pat = PATTERN (producer);
+      rtx cons_set = single_set (consumer);
+      rtx cons_pat = PATTERN (consumer);
+
+      for (int i = 0; i < XVECLEN (prod_pat, 0); i++)
+	{
+	  prod_set = XVECEXP (prod_pat, 0, i);
+
+	  if (GET_CODE (prod_set) == SET)
+	    {
+	      rtx src = XEXP (prod_set, 1), dst = XEXP (prod_set, 0);
+
+	      if (GET_CODE (src) == ZERO_EXTEND
+		  || GET_CODE (src) == SIGN_EXTEND)
+		src = XEXP (src, 0);
+
+	      gcc_assert (MEM_P (src));
+
+	      if (cons_set)
+		{
+		  if (reg_overlap_mentioned_p (dst, cons_set))
+		    return 0;
+		}
+	      else if (GET_CODE (cons_pat) == PARALLEL)
+		{
+		  for (int i = 0; i < XVECLEN (cons_pat, 0); i++)
+		    {
+		      rtx set = XVECEXP (cons_pat, 0, i);
+
+		      if (GET_CODE (set) != SET)
+		        continue;
+
+		      if (reg_overlap_mentioned_p (dst, set))
+			return 0;
+		    }
+		}
+	      else
+		return 0;
+	    }
+	}
+    }
+
+  return 1;
+}
+
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST aarch64_address_cost
 
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 936078c..add3707 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -123,24 +123,73 @@
 
 (define_insn_reservation "thunderx2t99_load_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "load1"))
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01")
 
+(define_insn_reservation "thunderx2t99_load_automod" 4
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01+thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_load_regoffset" 5
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+  "thunderx2t99_i012,thunderx2t99_ls01")
+
+(define_insn_reservation "thunderx2t99_load_scale_ext" 6
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "load1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_i012,thunderx2t99_ls01")
+
 (define_insn_reservation "thunderx2t99_loadpair" 5
   (and (eq_attr "tune" "thunderx2t99")
        (eq_attr "type" "load2"))
   "thunderx2t99_i012,thunderx2t99_ls01")
 
-(define_insn_reservation "thunderx2t99_store_basic" 1
+(define_insn_reservation "thunderx2t99_store_basic" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store1"))
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_storepair_basic" 1
+(define_insn_reservation "thunderx2t99_store_automod" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "store2"))
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
+(define_insn_reservation "thunderx2t99_store_regoffset_scale_ext" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store1")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+					      | AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd")
+
+(define_insn_reservation "thunderx2t99_storepair_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store2")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
+(define_insn_reservation "thunderx2t99_storepair_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "store2")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
 ;; FP data processing instructions.
 
 (define_insn_reservation "thunderx2t99_fp_simple" 5
@@ -204,24 +253,81 @@
 
 (define_insn_reservation "thunderx2t99_fp_load_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "f_loads,f_loadd"))
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01")
 
+(define_insn_reservation "thunderx2t99_fp_load_automod" 4
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_fp_load_regoffset" 5
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
+(define_insn_reservation "thunderx2t99_fp_load_scale_ext" 6
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_loads,f_loadd")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_ls01,thunderx2t99_i012")
+
 (define_insn_reservation "thunderx2t99_fp_loadpair_basic" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_load1_2reg"))
+       (eq_attr "type" "neon_load1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01*2")
 
-(define_insn_reservation "thunderx2t99_fp_store_basic" 1
+(define_insn_reservation "thunderx2t99_fp_loadpair_automod" 4
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "f_stores,f_stored"))
+       (eq_attr "type" "neon_load1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "(thunderx2t99_ls01+thunderx2t99_i012),thunderx2t99_ls01")
+
+(define_insn_reservation "thunderx2t99_fp_store_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+					      | AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,thunderx2t99_sd")
 
-(define_insn_reservation "thunderx2t99_fp_storepair_basic" 1
+(define_insn_reservation "thunderx2t99_fp_store_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_sd+thunderx2t99_i012)")
+
+(define_insn_reservation "thunderx2t99_fp_store_regoffset_scale_ext" 0
   (and (eq_attr "tune" "thunderx2t99")
-       (eq_attr "type" "neon_store1_2reg"))
+       (eq_attr "type" "f_stores,f_stored")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+					      | AARCH64_ADDR_REG_SHIFT
+					      | AARCH64_ADDR_REG_EXT
+					      | AARCH64_ADDR_REG_SHIFT_EXT)"))
+  "thunderx2t99_i012,thunderx2t99_ls01,thunderx2t99_sd")
+
+(define_insn_reservation "thunderx2t99_fp_storepair_basic" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+					      | AARCH64_ADDR_LO_SUM)"))
   "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd),thunderx2t99_sd")
 
+(define_insn_reservation "thunderx2t99_fp_storepair_automod" 0
+  (and (eq_attr "tune" "thunderx2t99")
+       (eq_attr "type" "neon_store1_2reg")
+       (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+  "thunderx2t99_ls01,(thunderx2t99_ls01+thunderx2t99_sd+thunderx2t99_i012),thunderx2t99_sd")
+
 ;; ASIMD integer instructions.
 
 (define_insn_reservation "thunderx2t99_asimd_int" 7
@@ -443,6 +549,16 @@
        (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
   "thunderx2t99_ls01,thunderx2t99_f01")
 
+;; Bypasses for automodify load insns.
+
+; For automodify loads, the address should be available before the loaded data.
+
+(define_bypass 1
+  "thunderx2t99_load_automod,thunderx2t99_fp_load_automod,\
+   thunderx2t99_fp_loadpair_automod"
+  "thunderx2t99_*"
+  "aarch64_automod_addr_only_dep")
+
 ;; Crypto extensions.
 
 ; FIXME: Forwarding path for aese/aesmc or aesd/aesimc pairs?

Reply via email to