From: Aaron Sawdey <acsaw...@linux.ibm.com>

This patch adds the first couple patterns to support p10 fusion. These
will allow combine to create a single insn for a pair of instructions
that that power10 can fuse and execute. These particular ones have the
requirement that only cr0 can be used when fusing a load with a compare
immediate of -1/0/1, so we want combine to put that requirement in, and
if it doesn't work out later the splitter can get used.

This also adds option -mpower10-fusion which defaults on for power10 and
will gate all these fusion patterns. In addition I have added an
undocumented option -mpower10-fusion-ld-cmpi (which may be removed later)
that just controls the load+compare-immediate patterns. I have make
these default on for power10 but they are not disallowed for earlier
processors because it is still valid code. This allows us to test the
correctness of fusion code generation by turning it on explicitly.

The intention is to work through more patterns of this style to support
the rest of the power10 fusion pairs.

Bootstrap and regtest looks good on ppc64le power9 with these patterns
enabled in stage2/stage3 and for regtest. Ok for trunk?

gcc/ChangeLog:

        * config/rs6000/predicates.md: Add const_me_to_1_operand.
        * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION and
        OPTION_MASK_P10_FUSION_LD_CMPI to ISA_3_1_MASKS_SERVER.
        * config/rs6000/rs6000-protos.h (address_ok_for_form): Add
        prototype.
        * config/rs6000/rs6000.c (rs6000_option_override_internal):
        automatically set -mpower10-fusion and -mpower10-fusion-ld-cmpi
        if target is power10.  (rs600_opt_masks): Allow -mpower10-fusion
        in function attributes.  (address_ok_for_form): New function.
        * config/rs6000/rs6000.h: Add MASK_P10_FUSION.
        * config/rs6000/rs6000.md (*ld_cmpi_cr0): New
        define_insn_and_split.
        (*lwa_cmpdi_cr0): New define_insn_and_split.
        (*lwa_cmpwi_cr0): New define_insn_and_split.
        * config/rs6000/rs6000.opt: Add -mpower10-fusion
        and -mpower10-fusion-ld-cmpi.
---
 gcc/config/rs6000/predicates.md   |  5 +++
 gcc/config/rs6000/rs6000-cpus.def |  6 ++-
 gcc/config/rs6000/rs6000-protos.h |  2 +
 gcc/config/rs6000/rs6000.c        | 34 ++++++++++++++++
 gcc/config/rs6000/rs6000.h        |  1 +
 gcc/config/rs6000/rs6000.md       | 68 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000.opt      |  8 ++++
 7 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 4c2fe7fa312..b75c1ddfb69 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -297,6 +297,11 @@ (define_predicate "const_0_to_1_operand"
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 0, 1)")))
 
+;; Match op = -1, op = 0, or op = 1.
+(define_predicate "const_m1_to_1_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), -1, 1)")))
+
 ;; Match op = 0..3.
 (define_predicate "const_0_to_3_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/rs6000/rs6000-cpus.def 
b/gcc/config/rs6000/rs6000-cpus.def
index 8d2c1ffd6cf..3e65289d8df 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -82,7 +82,9 @@
 
 #define ISA_3_1_MASKS_SERVER   (ISA_3_0_MASKS_SERVER                   \
                                 | OPTION_MASK_POWER10                  \
-                                | OTHER_POWER10_MASKS)
+                                | OTHER_POWER10_MASKS                  \
+                                | OPTION_MASK_P10_FUSION               \
+                                | OPTION_MASK_P10_FUSION_LD_CMPI)
 
 /* Flags that need to be turned off if -mno-power9-vector.  */
 #define OTHER_P9_VECTOR_MASKS  (OPTION_MASK_FLOAT128_HW                \
@@ -129,6 +131,8 @@
                                 | OPTION_MASK_FLOAT128_KEYWORD         \
                                 | OPTION_MASK_FPRND                    \
                                 | OPTION_MASK_POWER10                  \
+                                | OPTION_MASK_P10_FUSION               \
+                                | OPTION_MASK_P10_FUSION_LD_CMPI       \
                                 | OPTION_MASK_HTM                      \
                                 | OPTION_MASK_ISEL                     \
                                 | OPTION_MASK_MFCRF                    \
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 25fa5dd57cd..d8a344245e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -190,6 +190,8 @@ enum non_prefixed_form {
 
 extern enum insn_form address_to_insn_form (rtx, machine_mode,
                                            enum non_prefixed_form);
+extern bool address_ok_for_form (rtx, machine_mode,
+                                enum non_prefixed_form);
 extern bool prefixed_load_p (rtx_insn *);
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 4d528a39a37..b8de318a0bc 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4406,6 +4406,12 @@ rs6000_option_override_internal (bool global_init_p)
   if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
     rs6000_isa_flags |= OPTION_MASK_MMA;
 
+  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) 
== 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
+
+  if (TARGET_POWER10 && (rs6000_isa_flags_explicit & 
OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
+    rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
+
   /* Turn off vector pair/mma options on non-power10 systems.  */
   else if (!TARGET_POWER10 && TARGET_MMA)
     {
@@ -23391,6 +23397,7 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
   { "power9-minmax",           OPTION_MASK_P9_MINMAX,          false, true  },
   { "power9-misc",             OPTION_MASK_P9_MISC,            false, true  },
   { "power9-vector",           OPTION_MASK_P9_VECTOR,          false, true  },
+  { "power10-fusion",          OPTION_MASK_P10_FUSION,         false, true  },
   { "powerpc-gfxopt",          OPTION_MASK_PPC_GFXOPT,         false, true  },
   { "powerpc-gpopt",           OPTION_MASK_PPC_GPOPT,          false, true  },
   { "prefixed",                        OPTION_MASK_PREFIXED,           false, 
true  },
@@ -25482,6 +25489,33 @@ address_to_insn_form (rtx addr,
   return INSN_FORM_BAD;
 }
 
+bool
+address_ok_for_form (rtx addr,
+                    machine_mode mode,
+                    enum non_prefixed_form non_prefixed_format)
+{
+  enum insn_form result_form;
+
+  result_form = address_to_insn_form (addr, mode, non_prefixed_format);
+
+  switch (non_prefixed_format)
+    {
+    case NON_PREFIXED_DS:
+      switch (result_form)
+       {
+       case INSN_FORM_DS:
+       case INSN_FORM_BASE_REG:
+         return true;
+       default:
+         break;
+       }
+      break;
+    default:
+      break;
+    }
+  return false;
+}
+
 /* Helper function to see if we're potentially looking at lfs/stfs.
    - PARALLEL containing a SET and a CLOBBER
    - stfs:
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index bbd8060e143..884452fc6d9 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -539,6 +539,7 @@ extern int rs6000_vector_align[];
 #define MASK_UPDATE                    OPTION_MASK_UPDATE
 #define MASK_VSX                       OPTION_MASK_VSX
 #define MASK_POWER10                   OPTION_MASK_POWER10
+#define MASK_P10_FUSION                        OPTION_MASK_P10_FUSION
 
 #ifndef IN_LIBGCC2
 #define MASK_POWERPC64                 OPTION_MASK_POWERPC64
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dc060143104..bbcc6abe0f9 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1896,6 +1896,74 @@ (define_insn_and_split "*add<mode>3_imm_dot2"
    (set_attr "dot" "yes")
    (set_attr "length" "4,8")])
 
+;; Define an insn for ld+cmpi so we can force it to use CR0 on p10
+;; immediate has to be -1/0/1
+(define_insn_and_split "*ld_cmpi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+       (compare:CC (match_operand:DI 1 "memory_operand" "m")
+                   (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+       (match_dup 1))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "ld %0,%1\;cmpi 0,1,%0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
NON_PREFIXED_DS))"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+                   (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+;; Define an insn for lwa+cmpdi so we can force it to use CR0 on p10
+;; immediate is -1/0/1
+(define_insn_and_split "*lwa_cmpdi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+       (compare:CC (sign_extend:DI (match_operand:SI 1 "memory_operand" "m"))
+                   (match_operand:GPR 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+       (sign_extend:DI (match_dup 1)))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "lwa %0,%1\;cmpdi %0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
NON_PREFIXED_DS))"
+  [(set (match_dup 0)
+       (sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+                   (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+;; Define an insn for lwa+cmpwi so we can force it to use CR0 on p10
+;; immediate is -1/0/1
+(define_insn_and_split "*lwa_cmpwi_cr0"
+  [(set (match_operand:CC 2 "cc_reg_operand" "=x")
+       (compare:CC (match_operand:SI 1 "memory_operand" "m")
+                   (match_operand:SI 3 "const_m1_to_1_operand" "n")))
+   (set (match_operand:DI 0 "gpc_reg_operand" "=r")
+       (sign_extend:DI (match_dup 1)))
+   ]
+  "(TARGET_P10_FUSION && TARGET_P10_FUSION_LD_CMPI)"
+  "lwa %0,%1\;cmpwi %0,%3"
+  "&& reload_completed
+   && (cc_reg_not_cr0_operand (operands[2], CCmode)
+       || !address_ok_for_form (XEXP (operands[1],0), DImode, 
NON_PREFIXED_DS))"
+  [(set (match_dup 0)
+       (sign_extend:DI (match_dup 1)))
+   (set (match_dup 2)
+        (compare:CC (match_dup 0)
+                   (match_dup 3)))]
+  ""
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
 ;; Split an add that we can't do in one insn into two insns, each of which
 ;; does one 16-bit part.  This is used by combine.  Note that the low-order
 ;; add should be last in case the result gets used in an address.
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index b2a70e88ca8..63457efb607 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -479,6 +479,14 @@ mpower8-vector
 Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
 Use vector and scalar instructions added in ISA 2.07.
 
+mpower10-fusion
+Target Report Mask(P10_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power10.
+
+mpower10-fusion-ld-cmpi
+Target Undocumented Mask(P10_FUSION_LD_CMPI) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power10.
+
 mcrypto
 Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
 Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
-- 
2.18.4

Reply via email to