PATCH v6 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-09-17 Thread Ajit Agarwal via Gcc-patches
This new version of patch 6 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit


ree: Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign extension and done to
improve ree pass to eliminate such redundant zero and sign extension
using defined ABI interfaces.

2023-09-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 145 +-
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 155 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..e395af6b1bd 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,118 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) != ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code != ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses = get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+   return false;
+   }
+ }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction.  The changes
@@ -770,6 +883,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+  if 

[PATCH v1] rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

2023-09-12 Thread Ajit Agarwal via Gcc-patches
This patch removes zero extension from vctzlsbb as it already zero extends.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

For rs6000 target we dont need zero_extend after vctzlsbb as vctzlsbb
already zero extend.

2023-09-12  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/vsx.md (vctzlsbb_zext_): New define_insn.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/altivec-19.C: New testcase.
---
 gcc/config/rs6000/vsx.md  | 17 ++---
 gcc/testsuite/g++.target/powerpc/altivec-19.C | 10 ++
 2 files changed, 24 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/altivec-19.C

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 19abfeb565a..42379409e5f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5846,11 +5846,22 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb_"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "vctzlsbb_zext_"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
(unspec:SI
 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
-UNSPEC_VCTZLSBB))]
+UNSPEC_VCTZLSBB)))]
+  "TARGET_P9_VECTOR"
+  "vctzlsbb %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+;; Vector Count Trailing Zero Least-Significant Bits Byte
+(define_insn "vctzlsbb_"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
+ UNSPEC_VCTZLSBB))]
   "TARGET_P9_VECTOR"
   "vctzlsbb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/testsuite/g++.target/powerpc/altivec-19.C 
b/gcc/testsuite/g++.target/powerpc/altivec-19.C
new file mode 100644
index 000..e49e5076af8
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/altivec-19.C
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 " } */ 
+
+#include 
+
+unsigned int foo (vector unsigned char a, vector unsigned char b) {
+  return vec_first_match_or_eos_index (a, b);
+}
+/* { dg-final { scan-assembler-not {\mrldicl\M} } } */
-- 
2.39.3



[PING^3] [PATCH v8] tree-ssa-sink: Improve code sinking pass.

2023-09-12 Thread Ajit Agarwal via Gcc-patches



Ping!
 Forwarded Message 
Subject: [PING^2] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Mon, 21 Aug 2023 12:14:03 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Segher Boessenkool , Peter 
Bergner , rashmi.srid...@ibm.com

Ping!


 Forwarded Message 
Subject: [PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 1 Aug 2023 13:47:10 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Peter Bergner , Segher 
Boessenkool , rashmi.srid...@ibm.com

Ping! 


 Forwarded Message 
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Segher Boessenkool , Peter 
Bergner 

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++
 gcc/tree-ssa-sink.cc| 59 -
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more

[PING^5] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-09-12 Thread Ajit Agarwal via Gcc-patches


Ping!

 Forwarded Message 
Subject: [PING^4] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Mon, 21 Aug 2023 12:16:44 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner , rashmi.srid...@ibm.com


Ping!

 Forwarded Message 
Subject: [PING^3] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Tue, 1 Aug 2023 13:48:58 +0530
From: Ajit Agarwal 
To: gcc-patches , Jeff Law , 
Richard Biener , Peter Bergner 
, Segher Boessenkool , 
rashmi.srid...@ibm.com

Ping!


 Forwarded Message 
Subject: [PING^2] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Tue, 18 Jul 2023 13:28:08 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner 


Ping^2.

Please review.

Thanks & Regards
Ajit


This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defined
ABI interfaces.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 199 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 183 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..2025a7c43da 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+ 

[PING ^0][PATCH 3/4] Improve functionality of ree pass.

2023-09-12 Thread Ajit Agarwal via Gcc-patches


Ping!

 Forwarded Message 
Subject: [PATCH 3/4] Improve functionality of ree pass.
Date: Mon, 4 Sep 2023 13:27:42 +0530
From: Ajit Agarwal via Gcc-patches 
Reply-To: Ajit Agarwal 
To: Jeff Law , gcc-patches 
CC: Peter Bergner , Segher Boessenkool 



Hello Jeff:

This patch eliminates redundant zero and sign extension with ree pass for rs6000
target.

Bootstrapped and regtested for powerpc64-linux-gnu.

Thanks & Regards
Ajit


ree: Improve ree pass

For rs6000 target we see redundant zero and sign extension and ree pass
s improved to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND.

2023-09-04  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 487 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  17 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 534 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..931b9b08821 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,77 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+ machine_mode m_mode = GET_MODE (set);
+
+ if (CONST_INT_P (src)
+ && (INTVAL (src) == 1
+ || (m_mode == QImode && INTVAL (src) == 0x7)
+ || (m_mode == QImode && INTVAL (src) == 0x007F)
+ || (m_mode == HImode && INTVAL (src) == 0x7FFF)
+ || (m_mode == SImode && INTVAL (src) == 0x007F)))
+   return true;
+
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+ machine_mode m_mode = GET_MODE (set);
+
+ if (CONST_INT_P (src)
+ && (INTVAL (src) == 1
+ || (m_mode == QImode && INTVAL (src) == 0x7)
+ || (m_mode == QImode && INTVAL (src) == 0x007F)
+ || (m_mode == HImode && INTVAL (src) == 0x7FFF)
+ || (m_mode == SImode && INTVAL (src) == 0x007F)))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +390,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DE

[PING ^0] [PATCH] rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

2023-09-12 Thread Ajit Agarwal via Gcc-patches


Ping!

 Forwarded Message 
Subject: [PATCH] rs6000: unnecessary clear after vctzlsbb in 
vec_first_match_or_eos_index
Date: Thu, 31 Aug 2023 16:14:46 +0530
From: Ajit Agarwal via Gcc-patches 
Reply-To: Ajit Agarwal 
To: gcc-patches 
CC: Peter Bergner , Segher Boessenkool 



This patch removes zero extension from vctzlsbb as it already zero extends.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

For rs6000 target we dont need zero_extend after vctzlsbb as vctzlsbb
already zero extend.

2023-08-31  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/vsx.md: Add new pattern.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/altivec-19.C: New testcase.
---
 gcc/config/rs6000/vsx.md  | 17 ++---
 gcc/testsuite/g++.target/powerpc/altivec-19.C | 11 +++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/altivec-19.C

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 19abfeb565a..09d21a6d00a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5846,11 +5846,22 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb_"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "vctzlsbbzext_"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
(unspec:SI
 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
-UNSPEC_VCTZLSBB))]
+UNSPEC_VCTZLSBB)))]
+  "TARGET_P9_VECTOR"
+  "vctzlsbb %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+;; Vector Count Trailing Zero Least-Significant Bits Byte
+(define_insn "vctzlsbb_"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
+ UNSPEC_VCTZLSBB))]
   "TARGET_P9_VECTOR"
   "vctzlsbb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/testsuite/g++.target/powerpc/altivec-19.C 
b/gcc/testsuite/g++.target/powerpc/altivec-19.C
new file mode 100644
index 000..2d630b2fc1f
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/altivec-19.C
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 " } */ 
+
+#include 
+
+unsigned int foo (vector unsigned char a, vector unsigned char b) {
+  return vec_first_match_or_eos_index (a, b);
+}
+/* { dg-final { scan-assembler-not "rldicl" } } */
-- 
2.39.3



[PATCH 3/4] Improve functionality of ree pass.

2023-09-04 Thread Ajit Agarwal via Gcc-patches


Hello Jeff:

This patch eliminates redundant zero and sign extension with ree pass for rs6000
target.

Bootstrapped and regtested for powerpc64-linux-gnu.

Thanks & Regards
Ajit


ree: Improve ree pass

For rs6000 target we see redundant zero and sign extension and ree pass
s improved to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND.

2023-09-04  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 487 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  17 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 534 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..931b9b08821 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,77 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+ machine_mode m_mode = GET_MODE (set);
+
+ if (CONST_INT_P (src)
+ && (INTVAL (src) == 1
+ || (m_mode == QImode && INTVAL (src) == 0x7)
+ || (m_mode == QImode && INTVAL (src) == 0x007F)
+ || (m_mode == HImode && INTVAL (src) == 0x7FFF)
+ || (m_mode == SImode && INTVAL (src) == 0x007F)))
+   return true;
+
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+ machine_mode m_mode = GET_MODE (set);
+
+ if (CONST_INT_P (src)
+ && (INTVAL (src) == 1
+ || (m_mode == QImode && INTVAL (src) == 0x7)
+ || (m_mode == QImode && INTVAL (src) == 0x007F)
+ || (m_mode == HImode && INTVAL (src) == 0x7FFF)
+ || (m_mode == SImode && INTVAL (src) == 0x007F)))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +390,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +430,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  

[PATCH] rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

2023-08-31 Thread Ajit Agarwal via Gcc-patches
This patch removes zero extension from vctzlsbb as it already zero extends.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

For rs6000 target we dont need zero_extend after vctzlsbb as vctzlsbb
already zero extend.

2023-08-31  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/vsx.md: Add new pattern.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/altivec-19.C: New testcase.
---
 gcc/config/rs6000/vsx.md  | 17 ++---
 gcc/testsuite/g++.target/powerpc/altivec-19.C | 11 +++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/altivec-19.C

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 19abfeb565a..09d21a6d00a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5846,11 +5846,22 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb_"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "vctzlsbbzext_"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
(unspec:SI
 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
-UNSPEC_VCTZLSBB))]
+UNSPEC_VCTZLSBB)))]
+  "TARGET_P9_VECTOR"
+  "vctzlsbb %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+;; Vector Count Trailing Zero Least-Significant Bits Byte
+(define_insn "vctzlsbb_"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
+ UNSPEC_VCTZLSBB))]
   "TARGET_P9_VECTOR"
   "vctzlsbb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/testsuite/g++.target/powerpc/altivec-19.C 
b/gcc/testsuite/g++.target/powerpc/altivec-19.C
new file mode 100644
index 000..2d630b2fc1f
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/altivec-19.C
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 " } */ 
+
+#include 
+
+unsigned int foo (vector unsigned char a, vector unsigned char b) {
+  return vec_first_match_or_eos_index (a, b);
+}
+/* { dg-final { scan-assembler-not "rldicl" } } */
-- 
2.39.3



[PATCH] rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

2023-08-31 Thread Ajit Agarwal via Gcc-patches


This patch removes zero extension from vctzlsbb as it already zero extends.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: unnecessary clear after vctzlsbb in vec_first_match_or_eos_index

For rs6000 target we dont need zero_extend after vctzlsbb as vctzlsbb
already zero extend.

2023-08-31  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/vsx.md: Add new pattern.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/altivec-19.C: New testcase.
---
 gcc/config/rs6000/vsx.md  | 17 ++---
 gcc/testsuite/g++.target/powerpc/altivec-19.C | 11 +++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/altivec-19.C

diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 19abfeb565a..09d21a6d00a 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5846,11 +5846,22 @@
   [(set_attr "type" "vecsimple")])
 
 ;; Vector Count Trailing Zero Least-Significant Bits Byte
-(define_insn "vctzlsbb_"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "vctzlsbbzext_"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
(unspec:SI
 [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
-UNSPEC_VCTZLSBB))]
+UNSPEC_VCTZLSBB)))]
+  "TARGET_P9_VECTOR"
+  "vctzlsbb %0,%1"
+  [(set_attr "type" "vecsimple")])
+
+;; Vector Count Trailing Zero Least-Significant Bits Byte
+(define_insn "vctzlsbb_"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+(unspec:SI
+ [(match_operand:VSX_EXTRACT_I 1 "altivec_register_operand" "v")]
+ UNSPEC_VCTZLSBB))]
   "TARGET_P9_VECTOR"
   "vctzlsbb %0,%1"
   [(set_attr "type" "vecsimple")])
diff --git a/gcc/testsuite/g++.target/powerpc/altivec-19.C 
b/gcc/testsuite/g++.target/powerpc/altivec-19.C
new file mode 100644
index 000..2d630b2fc1f
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/altivec-19.C
@@ -0,0 +1,11 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 " } */ 
+
+#include 
+
+unsigned int foo (vector unsigned char a, vector unsigned char b) {
+  return vec_first_match_or_eos_index (a, b);
+}
+/* { dg-final { scan-assembler-not "rldicl" } } */
-- 
2.39.3



[PING^4] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-08-21 Thread Ajit Agarwal via Gcc-patches


Ping!

 Forwarded Message 
Subject: [PING^3] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Tue, 1 Aug 2023 13:48:58 +0530
From: Ajit Agarwal 
To: gcc-patches , Jeff Law , 
Richard Biener , Peter Bergner 
, Segher Boessenkool , 
rashmi.srid...@ibm.com

Ping!


 Forwarded Message 
Subject: [PING^2] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Tue, 18 Jul 2023 13:28:08 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner 


Ping^2.

Please review.

Thanks & Regards
Ajit


This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defined
ABI interfaces.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 199 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 183 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..2025a7c43da 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn)
+ != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE 

[PING^4] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 target.

2023-08-21 Thread Ajit Agarwal via Gcc-patches
Ping!


 Forwarded Message 
Subject: PING^3] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 
target.
Date: Tue, 1 Aug 2023 13:50:21 +0530
From: Ajit Agarwal 
To: gcc-patches , Jeff Law , 
Richard Biener , Peter Bergner 
, Segher Boessenkool , 
rashmi.srid...@ibm.com


Ping!

 Forwarded Message 
Subject: [PING^2] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 
target.
Date: Tue, 18 Jul 2023 13:31:27 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner 

Ping^2.

Please review.

Thanks & Regards
Ajit


This patch provide functionality to improve ree pass for rs6000 target.
Eliminated sign_extend/zero_extend/AND with varying constants.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND. Also support of AND with extension with different
constants other than 1.

2023-06-07  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 476 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 524 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..dc6da21ec16 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,66 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +379,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +419,41 @@ combine_set_extension (ext_cand 

[PING^2] [PATCH v8] tree-ssa-sink: Improve code sinking pass.

2023-08-21 Thread Ajit Agarwal via Gcc-patches
Ping!


 Forwarded Message 
Subject: [PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 1 Aug 2023 13:47:10 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Peter Bergner , Segher 
Boessenkool , rashmi.srid...@ibm.com

Ping! 


 Forwarded Message 
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Segher Boessenkool , Peter 
Bergner 

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++
 gcc/tree-ssa-sink.cc| 59 -
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more
+ profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+{
+  threshold += 7;
+  if (threshold > 100)
+   threshold = 100;
+}
 
   while (temp_bb != early_bb)
 {
@@ -203,34 

PING^3] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 target.

2023-08-01 Thread Ajit Agarwal via Gcc-patches


Ping!

 Forwarded Message 
Subject: [PING^2] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 
target.
Date: Tue, 18 Jul 2023 13:31:27 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner 

Ping^2.

Please review.

Thanks & Regards
Ajit


This patch provide functionality to improve ree pass for rs6000 target.
Eliminated sign_extend/zero_extend/AND with varying constants.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND. Also support of AND with extension with different
constants other than 1.

2023-06-07  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 476 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 524 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..dc6da21ec16 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,66 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +379,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +419,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = 

[PING^3] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-08-01 Thread Ajit Agarwal via Gcc-patches
Ping!


 Forwarded Message 
Subject: [PING^2] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using 
defined ABI interfaces.
Date: Tue, 18 Jul 2023 13:28:08 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Jeff Law , Richard Biener 
, Segher Boessenkool , 
Peter Bergner 


Ping^2.

Please review.

Thanks & Regards
Ajit


This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defined
ABI interfaces.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 199 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 183 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..2025a7c43da 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn)
+ != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+   return false;
+   }
+ }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source

[PING^1] [PATCH v8] tree-ssa-sink: Improve code sinking pass.

2023-08-01 Thread Ajit Agarwal via Gcc-patches
Ping! 


 Forwarded Message 
Subject: [PATCH v8] tree-ssa-sink: Improve code sinking pass.
Date: Tue, 18 Jul 2023 19:03:37 +0530
From: Ajit Agarwal 
To: gcc-patches 
CC: Richard Biener , Jeff Law 
, Segher Boessenkool , Peter 
Bergner 

Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++
 gcc/tree-ssa-sink.cc| 59 -
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more
+ profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+{
+  threshold += 7;
+  if (threshold > 100)
+   threshold = 100;
+}
 
   while (temp_bb != early_bb)
 {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
   if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
best_bb = temp_bb;
 
+  /* Placing a statement before a setjmp-like function would be invalid
+(it cannot be reevaluated when execution follows an 

[PATCH v8] tree-ssa-sink: Improve code sinking pass.

2023-07-18 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-07-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +++
 gcc/tree-ssa-sink.cc| 59 -
 3 files changed, 67 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..e7190323abe 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -173,7 +173,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +191,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more
+ profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+{
+  threshold += 7;
+  if (threshold > 100)
+   threshold = 100;
+}
 
   while (temp_bb != early_bb)
 {
@@ -203,34 +215,31 @@ select_best_block (basic_block early_bb,
   if (bb_loop_depth (temp_bb) < bb_loop_depth (best_bb))
best_bb = temp_bb;
 
+  /* Placing a statement before a setjmp-like function would be invalid
+(it cannot be reevaluated when execution follows an abnormal edge).
+If we selected a block with abnormal predecessors, just punt.  */
+  if (bb_has_abnormal_pred (temp_bb))
+   return early_bb;
+
+  /* if we have temp_bb post dominated by use block block then immediate
+   * 

Re: PING^1 [PATCH v7] tree-ssa-sink: Improve code sinking pass

2023-07-18 Thread Ajit Agarwal via Gcc-patches



On 18/07/23 4:38 pm, Prathamesh Kulkarni wrote:
> On Tue, 18 Jul 2023 at 13:26, Ajit Agarwal via Gcc-patches
>  wrote:
>>
>>
>> Ping!
>>
>> please review.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> This patch improves code sinking pass to sink statements before call to 
>> reduce
>> register pressure.
>> Review comments are incorporated.
>>
>> For example :
>>
>> void bar();
>> int j;
>> void foo(int a, int b, int c, int d, int e, int f)
>> {
>>   int l;
>>   l = a + b + c + d +e + f;
>>   if (a != 5)
>> {
>>   bar();
>>   j = l;
>> }
>> }
>>
>> Code Sinking does the following:
>>
>> void bar();
>> int j;
>> void foo(int a, int b, int c, int d, int e, int f)
>> {
>>   int l;
>>
>>   if (a != 5)
>> {
>>   l = a + b + c + d +e + f;
>>   bar();
>>   j = l;
>> }
>> }
>>
>> Bootstrapped regtested on powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> tree-ssa-sink: Improve code sinking pass
>>
>> Currently, code sinking will sink code after function calls.  This increases
>> register pressure for callee-saved registers.  The following patch improves
>> code sinking by placing the sunk code before calls in the use block or in
>> the immediate dominator of the use blocks.
>>
>> 2023-06-01  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> PR tree-optimization/81953
>> * tree-ssa-sink.cc (statement_sink_location): Move statements before
>> calls.
>> (def_use_same_block): New function.
>> (select_best_block): Add heuristics to select the best blocks in the
>> immediate post dominator.
>>
>> gcc/testsuite/ChangeLog:
>>
>> PR tree-optimization/81953
>> * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
>> * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +
>>  gcc/tree-ssa-sink.cc| 79 ++---
>>  3 files changed, 87 insertions(+), 26 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>> new file mode 100644
>> index 000..d3b79ca5803
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>> @@ -0,0 +1,15 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>> +void bar();
>> +int j;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +{
>> +  bar();
>> +  j = l;
>> +}
>> +}
>> +/* { dg-final { scan-tree-dump 
>> {l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> new file mode 100644
>> index 000..84e7938c54f
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> @@ -0,0 +1,19 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink-stats" } */
>> +void bar();
>> +int j, x;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +{
>> +  bar();
>> +  if (b != 3)
>> +x = 3;
>> +  else
>> +x = 5;
>> +  j = l;
>> +}
>> +}
>> +/* { dg-final { scan-tree-dump 
>> {l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>> index b1ba7a2ad6c..113c89d0967 100644
>> --- a/gcc/tree-ssa-sink.cc
>> +++ b/gcc/tree-ssa-sink.cc
>> @@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
>> bool *debug_stmts)
>>return commondom;
>>  }
>>
>> +/* Return TRUE if immediate defs of STMT and STMT are in same
>> + * block, FALSE otherwise.  */
>> +
>> +static bool
>> +def_use_same_block (gimple *stmt)
>> +{
>> +  def_operand_p def;
>> +  ssa

[PING^2] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 target.

2023-07-18 Thread Ajit Agarwal via Gcc-patches
Ping^2.

Please review.

Thanks & Regards
Ajit


This patch provide functionality to improve ree pass for rs6000 target.
Eliminated sign_extend/zero_extend/AND with varying constants.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND. Also support of AND with extension with different
constants other than 1.

2023-06-07  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 476 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 524 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..dc6da21ec16 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,66 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +379,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +419,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = NULL_RTX;
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_AND (cand->mode, XEXP (orig_src, 0), XEXP (orig_src, 1));
+  else
+   temp_extension
+= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
   rtx 

[PING^2] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-07-18 Thread Ajit Agarwal via Gcc-patches


Ping^2.

Please review.

Thanks & Regards
Ajit


This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defined
ABI interfaces.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 199 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 183 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..2025a7c43da 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn)
+ != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+   return false;
+   }
+ }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction.  The changes
@@ -770,6 +885,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 
   state->defs_list.truncate (0);
   

PING^1 [PATCH v7] tree-ssa-sink: Improve code sinking pass

2023-07-18 Thread Ajit Agarwal via Gcc-patches


Ping!

please review.

Thanks & Regards
Ajit


This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +
 gcc/tree-ssa-sink.cc| 79 ++---
 3 files changed, 87 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..113c89d0967 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate defs of STMT and STMT are in same
+ * block, FALSE otherwise.  */
+
+static bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +209,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more
+ profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+{
+  threshold += 7;
+  if (threshold > 100)
+   threshold = 100;
+}
 
   while (temp_bb != early_bb)
 {
@@ -203,34 +233,33 @@ select_best_block (basic_block 

[PATCH v7] tree-ssa-sink: Improve code sinking pass

2023-06-27 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 +
 gcc/tree-ssa-sink.cc| 79 ++---
 3 files changed, 87 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..113c89d0967 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate defs of STMT and STMT are in same
+ * block, FALSE otherwise.  */
+
+static bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,11 +209,22 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
   int threshold;
+  /* Get the sinking threshold.  If the statement to be moved has memory
+ operands, then increase the threshold by 7% as those are even more
+ profitable to avoid, clamping at 100%.  */
+  threshold = param_sink_frequency_threshold;
+  if (gimple_vuse (stmt) || gimple_vdef (stmt))
+{
+  threshold += 7;
+  if (threshold > 100)
+   threshold = 100;
+}
 
   while (temp_bb != early_bb)
 {
@@ -203,34 +233,33 @@ select_best_block (basic_block early_bb,
   if (bb_loop_depth 

[PING] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-06-26 Thread Ajit Agarwal via Gcc-patches
All:

Ok for trunk. Please review.

Thanks & Regards
Ajit

On 26/06/23 6:12 pm, Ajit Agarwal via Gcc-patches wrote:
> All:
> 
> Ok for trunk. Please review.
> 
> Thanks & Regards
> Ajit
> 
> On 01/06/23 10:53 am, Ajit Agarwal via Gcc-patches wrote:
>> Hello All:
>>
>> This new version of patch 4 use improve ree pass for rs6000 target using 
>> defined ABI interfaces.
>> Bootstrapped and regtested on power64-linux-gnu.
>>
>> Review comments incorporated.
>>
>> Thanks & Regards
>> Ajit
>>
>> Improve ree pass for rs6000 target using defined abi interfaces
>>
>> For rs6000 target we see redundant zero and sign
>> extension and done to improve ree pass to eliminate
>> such redundant zero and sign extension using defined
>> ABI interfaces.
>>
>> 2023-06-01  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>>  * ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
>>  defined abi interfaces.
>>  (add_removable_extension): Use of defined abi interfaces for no
>>  reaching defs.
>>  (abi_extension_candidate_return_reg_p): New function.
>>  (abi_extension_candidate_p): New function.
>>  (abi_extension_candidate_argno_p): New function.
>>  (abi_handle_regs_without_defs_p): New function.
>>  (abi_target_promote_function_mode): New function.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * g++.target/powerpc/zext-elim-3.C
>> ---
>>  gcc/ree.cc| 199 +++---
>>  .../g++.target/powerpc/zext-elim-3.C  |  13 ++
>>  2 files changed, 183 insertions(+), 29 deletions(-)
>>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index fc04249fa84..2025a7c43da 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>>  if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>>break;
>>  
>> -  gcc_assert (def != NULL);
>> +  if (def == NULL)
>> +return NULL;
>>  
>>ref_chain = DF_REF_CHAIN (def);
>>  
>> @@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
>>return src;
>>  }
>>  
>> +/* Return TRUE if target mode is equal to source mode of zero_extend
>> +   or sign_extend otherwise false.  */
>> +
>> +static bool
>> +abi_target_promote_function_mode (machine_mode mode)
>> +{
>> +  int unsignedp;
>> +  machine_mode tgt_mode =
>> +targetm.calls.promote_function_mode (NULL_TREE, mode, ,
>> + NULL_TREE, 1);
>> +
>> +  if (tgt_mode == mode)
>> +return true;
>> +  else
>> +return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an return  registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
>> +return false;
>> +
>> +  if (FUNCTION_VALUE_REGNO_P (regno))
>> +return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>> +   and not return registers and source and destination operand are same
>> +   and mode of source and destination operand are not same.  */
>> +
>> +static bool
>> +abi_extension_candidate_p (rtx_insn *insn)
>> +{
>> +  rtx set = single_set (insn);
>> +
>> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
>> +return false;
>> +
>> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
>> +  rtx orig_src = XEXP (SET_SRC (set),0);
>> +
>> +  bool copy_needed
>> += (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
>> +
>> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
>> +  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>> +  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
>> +return true;
>> +
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the candidate insn is zero extend and regno is
>> +   an argument registers.  */
>> +
>> +static bool
>> +abi_extension_candidate_argno_p (rtx_code code, int regno)
>> +{
>> +  if (code !=  ZERO_EXTEND)
>> +return false;
>> +
>> +  if (FUNCTION_ARG_REGN

Re: PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-06-26 Thread Ajit Agarwal via Gcc-patches
All:

Ok for trunk. Please review.

Thanks & Regards
Ajit

On 01/06/23 10:53 am, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> This new version of patch 4 use improve ree pass for rs6000 target using 
> defined ABI interfaces.
> Bootstrapped and regtested on power64-linux-gnu.
> 
> Review comments incorporated.
> 
> Thanks & Regards
> Ajit
> 
> Improve ree pass for rs6000 target using defined abi interfaces
> 
> For rs6000 target we see redundant zero and sign
> extension and done to improve ree pass to eliminate
> such redundant zero and sign extension using defined
> ABI interfaces.
> 
> 2023-06-01  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
>   defined abi interfaces.
>   (add_removable_extension): Use of defined abi interfaces for no
>   reaching defs.
>   (abi_extension_candidate_return_reg_p): New function.
>   (abi_extension_candidate_p): New function.
>   (abi_extension_candidate_argno_p): New function.
>   (abi_handle_regs_without_defs_p): New function.
>   (abi_target_promote_function_mode): New function.
> 
> gcc/testsuite/ChangeLog:
> 
> * g++.target/powerpc/zext-elim-3.C
> ---
>  gcc/ree.cc| 199 +++---
>  .../g++.target/powerpc/zext-elim-3.C  |  13 ++
>  2 files changed, 183 insertions(+), 29 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index fc04249fa84..2025a7c43da 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>  if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>break;
>  
> -  gcc_assert (def != NULL);
> +  if (def == NULL)
> +return NULL;
>  
>ref_chain = DF_REF_CHAIN (def);
>  
> @@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
>return src;
>  }
>  
> +/* Return TRUE if target mode is equal to source mode of zero_extend
> +   or sign_extend otherwise false.  */
> +
> +static bool
> +abi_target_promote_function_mode (machine_mode mode)
> +{
> +  int unsignedp;
> +  machine_mode tgt_mode =
> +targetm.calls.promote_function_mode (NULL_TREE, mode, ,
> +  NULL_TREE, 1);
> +
> +  if (tgt_mode == mode)
> +return true;
> +  else
> +return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an return  registers.  */
> +
> +static bool
> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
> +return false;
> +
> +  if (FUNCTION_VALUE_REGNO_P (regno))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if reg source operand of zero_extend is argument registers
> +   and not return registers and source and destination operand are same
> +   and mode of source and destination operand are not same.  */
> +
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
> +return false;
> +
> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set),0);
> +
> +  bool copy_needed
> += (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> +
> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
> +  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an argument registers.  */
> +
> +static bool
> +abi_extension_candidate_argno_p (rtx_code code, int regno)
> +{
> +  if (code !=  ZERO_EXTEND)
> +return false;
> +
> +  if (FUNCTION_ARG_REGNO_P (regno))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn doesn't have defs and have
> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
> +
> +static bool
> +abi_handle_regs_without_defs_p (rtx_insn *insn)
> +{
> +  if (side_effects_p (PATTERN (insn)))
> +return false;
> +
> +  struct df_link *uses
> += get_uses (insn, SET_DEST (PATTERN (insn)));
> +
> +  if (!uses)
> +return false;
> +
> +  for (df_link *use = uses; use; use = use->next)
> +{
> +  if (!use->ref)
> + return fal

[PING] [PATCH 3/4] ree: Improve functionality of ree pass for rs6000 target.

2023-06-26 Thread Ajit Agarwal via Gcc-patches
All:

Ok for trunk. Please review.

Thanks & Regards
Ajit

On 07/06/23 3:55 pm, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> This patch provide functionality to improve ree pass for rs6000 target.
> Eliminated sign_extend/zero_extend/AND with varying constants.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> Thanks & Regards
> Ajit
> 
> ree: Improve ree pass for rs6000 target
> 
> For rs6000 target we see redundant zero and sign extension and done to improve
> ree pass to eliminate such redundant zero and sign extension. Support of
> zero_extend/sign_extend/AND. Also support of AND with extension with different
> constants other than 1.
> 
> 2023-06-07  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * ree.cc (eliminate_across_bbs_p): Add checks to enable extension
>   elimination across and within basic blocks.
>   (def_arith_p): New function to check definition has arithmetic
>   operation.
>   (combine_set_extension): Modification to incorporate AND
>   and current zero_extend and sign_extend instruction.
>   (merge_def_and_ext): Add calls to eliminate_across_bbs_p and
>   zero_extend sign_extend and AND instruction.
>   (rtx_is_zext_p): New function.
>   (feasible_cfg): New function.
>   * rtl.h (reg_used_set_between_p): Add prototype.
>   * rtlanal.cc (reg_used_set_between_p): New function.
> 
> gcc/testsuite/ChangeLog:
> 
>   * g++.target/powerpc/zext-elim.C: New testcase.
>   * g++.target/powerpc/zext-elim-1.C: New testcase.
>   * g++.target/powerpc/zext-elim-2.C: New testcase.
>   * g++.target/powerpc/sext-elim.C: New testcase.
> ---
>  gcc/ree.cc| 476 --
>  gcc/rtl.h |   1 +
>  gcc/rtlanal.cc|  15 +
>  gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
>  .../g++.target/powerpc/zext-elim-1.C  |  19 +
>  .../g++.target/powerpc/zext-elim-2.C  |  11 +
>  gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
>  7 files changed, 524 insertions(+), 46 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index fc04249fa84..dc6da21ec16 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -253,6 +253,66 @@ struct ext_cand
>  
>  static int max_insn_uid;
>  
> +/* Return TRUE if OP can be considered a zero extension from one or
> +   more sub-word modes to larger modes up to a full word.
> +
> +   For example (and:DI (reg) (const_int X))
> +
> +   Depending on the value of X could be considered a zero extension
> +   from QI, HI and SI to larger modes up to DImode.  */
> +
> +static bool
> +rtx_is_zext_p (rtx insn)
> +{
> +  if (GET_CODE (insn) == AND)
> +{
> +  rtx set = XEXP (insn, 0);
> +  if (REG_P (set))
> + {
> +   rtx src = XEXP (insn, 1);
> +
> +   if (CONST_INT_P (src)
> +   && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
> + return true;
> + }
> +  else
> + return false;
> +}
> +
> +  return false;
> +}
> +/* Return TRUE if OP can be considered a zero extension from one or
> +   more sub-word modes to larger modes up to a full word.
> +
> +   For example (and:DI (reg) (const_int X))
> +
> +   Depending on the value of X could be considered a zero extension
> +   from QI, HI and SI to larger modes up to DImode.  */
> +
> +static bool
> +rtx_is_zext_p (rtx_insn *insn)
> +{
> +  rtx body = single_set (insn);
> +
> +  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
> +   {
> + rtx set = XEXP (SET_SRC (body), 0);
> +
> + if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
> +   {
> +   rtx src = XEXP (SET_SRC (body), 1);
> +
> +   if (CONST_INT_P (src)
> +   && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
> + return true;
> +   }
> + else
> +  return false;
> +   }
> +
> +   return false;
> +}
> +
>  /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
>  
>  static bool
> @@ -319,7 +379,7 @@ combine_set_extension (ext_cand *cand, rtx_insn 
> *curr_insn, rtx *orig_set)
>  {
>rtx orig_src = SET_SRC (*orig_set);
>machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
> -  rtx new_set;
> +  rtx new_set = NULL_RTX;
>rtx ca

[PING] PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-06-26 Thread Ajit Agarwal via Gcc-patches
All:

Ok for trunk. Please review.


Thanks & Regards
Ajit

On 01/06/23 10:53 am, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> This new version of patch 4 use improve ree pass for rs6000 target using 
> defined ABI interfaces.
> Bootstrapped and regtested on power64-linux-gnu.
> 
> Review comments incorporated.
> 
> Thanks & Regards
> Ajit
> 
> Improve ree pass for rs6000 target using defined abi interfaces
> 
> For rs6000 target we see redundant zero and sign
> extension and done to improve ree pass to eliminate
> such redundant zero and sign extension using defined
> ABI interfaces.
> 
> 2023-06-01  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
>   defined abi interfaces.
>   (add_removable_extension): Use of defined abi interfaces for no
>   reaching defs.
>   (abi_extension_candidate_return_reg_p): New function.
>   (abi_extension_candidate_p): New function.
>   (abi_extension_candidate_argno_p): New function.
>   (abi_handle_regs_without_defs_p): New function.
>   (abi_target_promote_function_mode): New function.
> 
> gcc/testsuite/ChangeLog:
> 
> * g++.target/powerpc/zext-elim-3.C
> ---
>  gcc/ree.cc| 199 +++---
>  .../g++.target/powerpc/zext-elim-3.C  |  13 ++
>  2 files changed, 183 insertions(+), 29 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index fc04249fa84..2025a7c43da 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
>  if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>break;
>  
> -  gcc_assert (def != NULL);
> +  if (def == NULL)
> +return NULL;
>  
>ref_chain = DF_REF_CHAIN (def);
>  
> @@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
>return src;
>  }
>  
> +/* Return TRUE if target mode is equal to source mode of zero_extend
> +   or sign_extend otherwise false.  */
> +
> +static bool
> +abi_target_promote_function_mode (machine_mode mode)
> +{
> +  int unsignedp;
> +  machine_mode tgt_mode =
> +targetm.calls.promote_function_mode (NULL_TREE, mode, ,
> +  NULL_TREE, 1);
> +
> +  if (tgt_mode == mode)
> +return true;
> +  else
> +return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an return  registers.  */
> +
> +static bool
> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
> +return false;
> +
> +  if (FUNCTION_VALUE_REGNO_P (regno))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if reg source operand of zero_extend is argument registers
> +   and not return registers and source and destination operand are same
> +   and mode of source and destination operand are not same.  */
> +
> +static bool
> +abi_extension_candidate_p (rtx_insn *insn)
> +{
> +  rtx set = single_set (insn);
> +
> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
> +return false;
> +
> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
> +  rtx orig_src = XEXP (SET_SRC (set),0);
> +
> +  bool copy_needed
> += (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
> +
> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
> +  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
> +  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn is zero extend and regno is
> +   an argument registers.  */
> +
> +static bool
> +abi_extension_candidate_argno_p (rtx_code code, int regno)
> +{
> +  if (code !=  ZERO_EXTEND)
> +return false;
> +
> +  if (FUNCTION_ARG_REGNO_P (regno))
> +return true;
> +
> +  return false;
> +}
> +
> +/* Return TRUE if the candidate insn doesn't have defs and have
> + * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
> +
> +static bool
> +abi_handle_regs_without_defs_p (rtx_insn *insn)
> +{
> +  if (side_effects_p (PATTERN (insn)))
> +return false;
> +
> +  struct df_link *uses
> += get_uses (insn, SET_DEST (PATTERN (insn)));
> +
> +  if (!uses)
> +return false;
> +
> +  for (df_link *use = uses; use; use = use->next)
> +{
> +  if (!use->ref)
> + return fal

[PATCH v6] tree-ssa-sink: Improve code sinking pass

2023-06-23 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-06-24  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++
 gcc/tree-ssa-sink.cc| 68 ++---
 3 files changed, 92 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..791d44249f9 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+static bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -237,7 +257,37 @@ select_best_block (basic_block early_bb,
   /* If result of comparsion is unknown, prefer EARLY_BB.
 Thus use !(...>=..) rather than (...<...)  */
   && !(best_bb->count * 100 >= early_bb->count * threshold))
-return best_bb;
+{
+  basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, 
best_bb);
+  /* Return best_bb if def and use are in same block otherwise new_best_bb.
+
+Things to consider:
+
+  new_best_bb is not equal to best_bb 

Re: [PATCH v5] tree-ssa-sink: Improve code sinking pass

2023-06-23 Thread Ajit Agarwal via Gcc-patches



On 23/06/23 7:44 am, Peter Bergner wrote:
> On 6/1/23 11:54 PM, Ajit Agarwal via Gcc-patches wrote:
>>
>>
>> On 01/06/23 2:06 pm, Bernhard Reutner-Fischer wrote:
>>> On 1 June 2023 09:20:08 CEST, Ajit Agarwal  wrote:
>>>> Hello All:
>>>>
>>>> This patch improves code sinking pass to sink statements before call to 
>>>> reduce
>>>> register pressure.
>>>> Review comments are incorporated.
>>>
>>> Hi Ajit!
>>>
>>> I had two comments for v4 that you did not address in v5 or followed up.
>>> thanks,
>>
>> Which comments I didn't address. Please let me know.
> 
> I believe he's referring to these two comments:
> 
>   > +   && dominated_by_p (CDI_DOMINATORS, new_best_bb, early_bb))
>   > + {
>   > +   if (def_use_same_block (use))
>   > + return best_bb;
>   > +
>   > +   return new_best_bb;
>   > + }
>   > + return best_bb;
>   > +}
>   >  
> 
>   Many returns.
>   I'd have said
> && !def_use_same_block (use)
>   return new_best_bb;
>   else
>   return best_bb;
> 
>   and rephrase the comment above list of Things to consider accordingly.
> 
> 
> I agree with Bernhard's comment that it could be rewritten to be clearer.
> Although, the "else" isn't really required.  So Bernhard's version would
> look like:
> 
>   if (new_best_bb
>   && use
>   && new_best_bb != best_bb
>   && new_best_bb != early_bb
>   && !is_gimple_call (stmt)
>   && gsi_end_p (gsi_start_phis (new_best_bb))
>   && gimple_bb (use) != early_bb
>   && !is_gimple_call (use)
>   && dominated_by_p (CDI_POST_DOMINATORS, new_best_bb, gimple_bb (use))
>   && dominated_by_p (CDI_DOMINATORS, new_best_bb, early_bb)
>   && !def_use_same_block (use))
> return new_best_bb;
>   else
> return best_bb;
> 
> ...or just:
> 
>   if (new_best_bb
>   && use
>   && new_best_bb != best_bb
>   && new_best_bb != early_bb
>   && !is_gimple_call (stmt)
>   && gsi_end_p (gsi_start_phis (new_best_bb))
>   && gimple_bb (use) != early_bb
>   && !is_gimple_call (use)
>   && dominated_by_p (CDI_POST_DOMINATORS, new_best_bb, gimple_bb (use))
>   && dominated_by_p (CDI_DOMINATORS, new_best_bb, early_bb)
>   && !def_use_same_block (use))
> return new_best_bb;
> 
>   return best_bb;
> 
> 
> Either works.

Thanks Peter. I will incorporate and send the new version of the patch.

> 
> 
> Peter
> 


[committed] [PATCH v1] rs6000: Update powerpc test fold-vec-extract-int.p8.c

2023-06-13 Thread Ajit Agarwal via Gcc-patches
commit 17714c08e9013b51cf8d04ac39f844d355c923f2 (HEAD -> master, origin/master, 
origin/HEAD)
Author: “Ajit Kumar Agarwal” 
Date:   Fri May 19 02:30:44 2023 -0500

testsuite: Update powerpc test fold-vec-extract-int.p8.c

Update powerpc tests with extra zero_extend removal with default ree pass.

2023-06-13  Ajit Kumar Agarwal  

gcc/testsuite/ChangeLog:

PR testsuite/109880
* gcc.target/powerpc/fold-vec-extract-int.p8.c: Update test.

On 13/06/23 1:58 pm, Kewen.Lin wrote:
> PR testsuite/109880


[PING] [PATCH v2] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-06-12 Thread Ajit Agarwal via Gcc-patches
Hello Segher:

Please review and let me know your feedback to submit in trunk.

Thanks & Regards
Ajit

On 25/02/23 3:20 pm, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> Here is the patch that uses xxlor instead of fmr where possible.
> Performance results shows that fmr is better in power9 and 
> power10 architectures whereas xxlor is better in power7 and
> power 8 architectures. fmr is the only option before p7.
> 
> Bootstrapped and regtested on powerpc64-linux-gnu
> 
> Thanks & Regards
> Ajit
> 
>   rs6000: Use xxlor instead of fmr where possible
> 
>   Replaces fmr with xxlor instruction for power7 and power8
>   architectures whereas for power9 and power10 keep fmr
>   instruction.
> 
>   Perf measurement results:
> 
>   Power9 fmr:  201,847,661 cycles.
>   Power9 xxlor: 201,877,78 cycles.
>   Power8 fmr: 200,901,043 cycles.
>   Power8 xxlor: 201,020,518 cycles.
>   Power7 fmr: 201,059,524 cycles.
>   Power7 xxlor: 201,042,851 cycles.
> 
>   2023-02-25  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor for power7
>   and power8 and fmr for power9 and power10.
> ---
>  gcc/config/rs6000/rs6000.md | 44 +++--
>  1 file changed, 28 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 81bffb04ceb..e101f7f5fc1 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -354,7 +354,7 @@ (define_attr "cpu"
>(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
>  
>  ;; The ISA we implement.
> -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
> +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p7p8v,p9,p9v,p9kf,p9tf,p10"
>(const_string "any"))
>  
>  ;; Is this alternative enabled for the current CPU/ISA/etc.?
> @@ -402,6 +402,11 @@ (define_attr "enabled" ""
>   (and (eq_attr "isa" "p10")
> (match_test "TARGET_POWER10"))
>   (const_int 1)
> +  
> + (and (eq_attr "isa" "p7p8v")
> +   (match_test "TARGET_VSX && !TARGET_P9_VECTOR"))
> + (const_int 1)
> +
>  ] (const_int 0)))
>  
>  ;; If this instruction is microcoded on the CELL processor
> @@ -8436,27 +8441,29 @@ (define_insn "*mov_softfloat32"
>  
>  (define_insn "*mov_hardfloat64"
>[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
> -   "=m,   d,  d,  ,   wY,
> - ,Z,  ,  ,  !r,
> +   "=m,   d,  ,  ,   wY,
> + ,Z,  wa, ,  !r,
>   YZ,  r,  !r, *c*l,   !r,
> -*h,   r,  ,   wa")
> +*h,   r,  ,   d,  wn,
> +wa")
>   (match_operand:FMOVE64 1 "input_operand"
> -"d,   m,  d,  wY, ,
> - Z,   ,   ,  ,  ,
> +"d,   m,  ,  wY, ,
> + Z,   ,   wa, ,  ,
>   r,   YZ, r,  r,  *h,
> - 0,   ,   r,  eP"))]
> + 0,   ,   r,  d,  wn,
> + eP"))]
>"TARGET_POWERPC64 && TARGET_HARD_FLOAT
> && (gpc_reg_operand (operands[0], mode)
> || gpc_reg_operand (operands[1], mode))"
>"@
> stfd%U0%X0 %1,%0
> lfd%U1%X1 %0,%1
> -   fmr %0,%1
> +   xxlor %x0,%x1,%x1
> lxsd %0,%1
> stxsd %1,%0
> lxsdx %x0,%y1
> stxsdx %x1,%y0
> -   xxlor %x0,%x1,%x1
> +   fmr %0,%1
> xxlxor %x0,%x0,%x0
> li %0,0
> std%U0%X0 %1,%0
> @@ -8467,23 +8474,28 @@ (define_insn "*mov_hardfloat64"
> nop
> mfvsrd %0,%x1
> mtvsrd %x0,%1
> +   fmr %0,%1
> +   fmr %0,%1
> #"
>[(set_attr "type"
> -"fpstore, fpload, fpsimple,   fpload, fpstore,
> +"fpstore, fpload, veclogical, fpload, fpstore,
>   fpload,  fpstore,veclogical, veclogical, integer,
>   store,   load,   *,  mtjmpr, mfjmpr,
> - *,   mfvsr,  mtvsr,  vecperm")
> + *,   mfvsr,  mtvsr,  fpsimple,   fpsimple,
> +   

[PATCH 3/4] ree: Improve functionality of ree pass for rs6000 target.

2023-06-07 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch provide functionality to improve ree pass for rs6000 target.
Eliminated sign_extend/zero_extend/AND with varying constants.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND. Also support of AND with extension with different
constants other than 1.

2023-06-07  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 476 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 524 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..dc6da21ec16 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,66 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ rtx src = XEXP (insn, 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+ rtx src = XEXP (SET_SRC (body), 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +379,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +419,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = NULL_RTX;
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_AND (cand->mode, XEXP (orig_src, 0), XEXP (orig_src, 1));
+  else
+   temp_extension
+= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
   rtx simplified_temp_extension = simplify_rtx 

Re: [PATCH v5] tree-ssa-sink: Improve code sinking pass

2023-06-01 Thread Ajit Agarwal via Gcc-patches



On 01/06/23 2:06 pm, Bernhard Reutner-Fischer wrote:
> On 1 June 2023 09:20:08 CEST, Ajit Agarwal  wrote:
>> Hello All:
>>
>> This patch improves code sinking pass to sink statements before call to 
>> reduce
>> register pressure.
>> Review comments are incorporated.
> 
> Hi Ajit!
> 
> I had two comments for v4 that you did not address in v5 or followed up.
> thanks,

Which comments I didn't address. Please let me know.

Thanks & Regards
Ajit


[PATCH v5] tree-ssa-sink: Improve code sinking pass

2023-06-01 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass

Currently, code sinking will sink code after function calls.  This increases
register pressure for callee-saved registers.  The following patch improves
code sinking by placing the sunk code before calls in the use block or in
the immediate dominator of the use blocks.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

PR tree-optimization/81953
* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

PR tree-optimization/81953
* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++
 gcc/tree-ssa-sink.cc| 71 ++---
 3 files changed, 95 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..d3b79ca5803
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..f1d25f1a0f8 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+static bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be an immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -237,7 +257,40 @@ select_best_block (basic_block early_bb,
   /* If result of comparsion is unknown, prefer EARLY_BB.
 Thus use !(...>=..) rather than (...<...)  */
   && !(best_bb->count * 100 >= early_bb->count * threshold))
-return best_bb;
+{
+  basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, 
best_bb);
+  /* Return best_bb if def and use are in same block otherwise new_best_bb.
+
+Things to consider:
+
+  new_best_bb is not equal to best_bb 

PATCH v5 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-05-31 Thread Ajit Agarwal via Gcc-patches
Hello All:

This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Review comments incorporated.

Thanks & Regards
Ajit

Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defined
ABI interfaces.

2023-06-01  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combine_reaching_defs): Use of  zero_extend and sign_extend
defined abi interfaces.
(add_removable_extension): Use of defined abi interfaces for no
reaching defs.
(abi_extension_candidate_return_reg_p): New function.
(abi_extension_candidate_p): New function.
(abi_extension_candidate_argno_p): New function.
(abi_handle_regs_without_defs_p): New function.
(abi_target_promote_function_mode): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 199 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  13 ++
 2 files changed, 183 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index fc04249fa84..2025a7c43da 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -514,7 +514,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +751,120 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if target mode is equal to source mode of zero_extend
+   or sign_extend otherwise false.  */
+
+static bool
+abi_target_promote_function_mode (machine_mode mode)
+{
+  int unsignedp;
+  machine_mode tgt_mode =
+targetm.calls.promote_function_mode (NULL_TREE, mode, ,
+NULL_TREE, 1);
+
+  if (tgt_mode == mode)
+return true;
+  else
+return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn)
+ != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+   return false;
+   }
+ }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction.  The changes
@@ -770,6 +885,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC 

[PATCH v4] tree-ssa-sink: Improve code sinking pass

2023-05-31 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass

Code Sinking sinks the blocks after call.This increases register pressure
for callee-saved registers. Improves code sinking before call in the use
blocks or immediate dominator of use blocks.

2023-05-24  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++
 gcc/tree-ssa-sink.cc| 74 +
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..49d5019ab93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..ee8988bbb2c 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be in immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -230,14 +250,46 @@ select_best_block (basic_block early_bb,
   if (threshold > 100)
threshold = 100;
 }
-
   /* If BEST_BB is at the same nesting level, then require it to have
  significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
   /* If result of comparsion is unknown, prefer EARLY_BB.
 Thus use !(...>=..) rather than (...<...)  */
   && !(best_bb->count * 100 >= early_bb->count * threshold))
-return best_bb;
+{
+  basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, 
best_bb);
+  /* Return 

[PATCH v4] tree-sea-sink: Improve code sinking pass

2023-05-31 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-sea-sink: Improve code sinking pass

Code Sinking sinks the blocks after call.This increases register pressure
for callee-saved registers. Improves code sinking before call in the use
blocks or immediate dominator of use blocks.

2023-05-24  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++
 gcc/tree-ssa-sink.cc| 74 +
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..49d5019ab93
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-sink-stats" } */
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_12\s+=\s+_4\s+\+\s+f_11\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..84e7938c54f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump 
{l_13\s+=\s+_4\s+\+\s+f_12\(D\);\n\s+bar\s+\(\)} sink1 } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..ee8988bbb2c 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be in immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -230,14 +250,46 @@ select_best_block (basic_block early_bb,
   if (threshold > 100)
threshold = 100;
 }
-
   /* If BEST_BB is at the same nesting level, then require it to have
  significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
   /* If result of comparsion is unknown, prefer EARLY_BB.
 Thus use !(...>=..) rather than (...<...)  */
   && !(best_bb->count * 100 >= early_bb->count * threshold))
-return best_bb;
+{
+  basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, 
best_bb);
+  /* Return 

[PING] [PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-05-30 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:

Please review Jeff.

Thanks & Regards
Ajit

On 12/05/23 4:48 pm, Ajit Agarwal via Gcc-patches wrote:
> Hello Jeff:
> 
> 
> On 29/04/23 3:40 am, Jeff Law wrote:
>>
>>
>> On 4/20/23 15:03, Ajit Agarwal wrote:
>>
>>>
>>> Currently I support AND with const1_rtx. This is what is equivalent to zero 
>>> extension instruction in power instruction set. When you specify many other 
>>> constants and Could you please specify what other constants needs to be 
>>> supported and how to determine on the Input and output modes.
>> x AND  will result in a zero-extended representation for a variety 
>> of constants, not just 1.  For example
>>
>> For example x AND 3, x AND 7, x AND 15, etc.
>>
>> If (const_int 1) is really that special here, then I've either completely 
>> misunderstood the intention of your patch or there's something quite special 
>> about the PPC port that I'm not aware of.
>>
> 
> Here is the patch to address above.
> 
>   ree: Improve ree pass for rs6000 target
> 
>   For rs6000 target we see redundant zero and sign
>   extension and done to improve ree pass to eliminate
>   such redundant zero and sign extension. Support of
>   AND with extension with different constants other
>   than 1.
> 
>   2023-05-12  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * ree.cc (rtx_is_zext_p): Add AND with varying contsants as
>   extensions.
> ---
>  gcc/ree.cc | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index 96fda1ac658..ddda5f194bb 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -269,8 +269,11 @@ rtx_is_zext_p (rtx insn)
>rtx set = XEXP (insn, 0);
>if (REG_P (set))
>   {
> -if (XEXP (insn, 1) == const1_rtx)
> -  return true;
> +   rtx src = XEXP (insn, 1);
> +
> +   if (CONST_INT_P (src)
> +   && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
> + return true;
>   }
>else
>   return false;
> @@ -297,9 +300,11 @@ rtx_is_zext_p (rtx_insn *insn)
>  
>   if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
> {
> -  if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (body)))
> -  >= GET_MODE_UNIT_SIZE (GET_MODE (set)))
> -return true;
> +   rtx src = XEXP (SET_SRC (body), 1);
> +
> +   if (CONST_INT_P (src)
> +   && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
> + return true;
> }
>   else
>return false;


Re: [PATCH v1] tree-ssa-sink: Improve code sinking pass.

2023-05-30 Thread Ajit Agarwal via Gcc-patches
Hello Richard:

On 30/05/23 12:34 pm, Richard Biener wrote:
> On Tue, May 30, 2023 at 7:06 AM Ajit Agarwal  wrote:
>>
>> Hello Richard:
>>
>> On 22/05/23 6:26 pm, Richard Biener wrote:
>>> On Thu, May 18, 2023 at 9:14 AM Ajit Agarwal  wrote:

 Hello All:

 This patch improves code sinking pass to sink statements before call to 
 reduce
 register pressure.
 Review comments are incorporated.

 Bootstrapped and regtested on powerpc64-linux-gnu.

 Thanks & Regards
 Ajit


 tree-ssa-sink: Improve code sinking pass.

 Code Sinking sinks the blocks after call. This increases
 register pressure for callee-saved registers. Improves
 code sinking before call in the use blocks or immediate
 dominator of use blocks.

 2023-05-18  Ajit Kumar Agarwal  

 gcc/ChangeLog:

 * tree-ssa-sink.cc (statement_sink_location): Modifed to
 move statements before calls.
 (block_call_p): New function.
 (def_use_same_block): New function.
 (select_best_block): Add heuristics to select the best
 blocks in the immediate post dominator.

 gcc/testsuite/ChangeLog:

 * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
 * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
 ---
  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 ++
  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  20 +++
  gcc/tree-ssa-sink.cc| 159 ++--
  3 files changed, 185 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
 b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 new file mode 100644
 index 000..716bc1f9257
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 @@ -0,0 +1,16 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized 
 -fdump-tree-sink-stats" } */
 +
 +void bar();
 +int j;
 +void foo(int a, int b, int c, int d, int e, int f)
 +{
 +  int l;
 +  l = a + b + c + d +e + f;
 +  if (a != 5)
 +{
 +  bar();
 +  j = l;
 +}
 +}
 +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
>>>
>>> this doesn't verify the place we sink to?
>>>
>>
>> I am not sure how to verify the place we sink to with dg-final.
> 
> I think dejagnu supports matching multi-line regexps so I suggest
> to scan for the sunk expr RHS to be followed by the call?
> 

You meant to use dg-begin-multiline-output and dg-end-multiline-output.

Thanks & Regards
Ajit
 diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
 b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
 new file mode 100644
 index 000..ff41e2ea8ae
 --- /dev/null
 +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
 @@ -0,0 +1,20 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -fdump-tree-sink-stats -fdump-tree-sink-stats" } */
 +
 +void bar();
 +int j, x;
 +void foo(int a, int b, int c, int d, int e, int f)
 +{
 +  int l;
 +  l = a + b + c + d +e + f;
 +  if (a != 5)
 +{
 +  bar();
 +  if (b != 3)
 +x = 3;
 +  else
 +x = 5;
 +  j = l;
 +}
 +}
 +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
>>>
>>> likewise.  So both tests already pass before the patch?
>>>
 diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
 index 87b1d40c174..76556e7795b 100644
 --- a/gcc/tree-ssa-sink.cc
 +++ b/gcc/tree-ssa-sink.cc
 @@ -171,6 +171,72 @@ nearest_common_dominator_of_uses (def_operand_p 
 def_p, bool *debug_stmts)
return commondom;
  }

 +/* Return TRUE if immediate uses of the defs in
 +   USE occur in the same block as USE, FALSE otherwise.  */
 +
 +bool
 +def_use_same_block (gimple *stmt)
 +{
 +  use_operand_p use_p;
 +  def_operand_p def_p;
 +  imm_use_iterator imm_iter;
 +  ssa_op_iter iter;
 +
 +  FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, iter, SSA_OP_DEF)
 +{
 +  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
 +   {
 + if (is_gimple_debug (USE_STMT (use_p)))
 +   continue;
 +
 + if (use_p
>>>
>>> use_p is never null
>>>
 + && (gimple_bb (USE_STMT (use_p)) == gimple_bb (stmt)))
 +   return true;
>>>
>>> the function behavior is obviously odd ...
>>>
 +   }
 + }
 +  return false;
 +}
 +
 +/* Return TRUE if the block has only calls, FALSE otherwise. */
 +
 +bool
 +block_call_p (basic_block bb)
 +{
 +  

Re: [PATCH v1] tree-ssa-sink: Improve code sinking pass.

2023-05-29 Thread Ajit Agarwal via Gcc-patches
Hello Richard:

On 22/05/23 6:26 pm, Richard Biener wrote:
> On Thu, May 18, 2023 at 9:14 AM Ajit Agarwal  wrote:
>>
>> Hello All:
>>
>> This patch improves code sinking pass to sink statements before call to 
>> reduce
>> register pressure.
>> Review comments are incorporated.
>>
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>>
>> tree-ssa-sink: Improve code sinking pass.
>>
>> Code Sinking sinks the blocks after call. This increases
>> register pressure for callee-saved registers. Improves
>> code sinking before call in the use blocks or immediate
>> dominator of use blocks.
>>
>> 2023-05-18  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * tree-ssa-sink.cc (statement_sink_location): Modifed to
>> move statements before calls.
>> (block_call_p): New function.
>> (def_use_same_block): New function.
>> (select_best_block): Add heuristics to select the best
>> blocks in the immediate post dominator.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
>> * gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
>> ---
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 ++
>>  gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  20 +++
>>  gcc/tree-ssa-sink.cc| 159 ++--
>>  3 files changed, 185 insertions(+), 10 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>>
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>> new file mode 100644
>> index 000..716bc1f9257
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized 
>> -fdump-tree-sink-stats" } */
>> +
>> +void bar();
>> +int j;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +{
>> +  bar();
>> +  j = l;
>> +}
>> +}
>> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
> 
> this doesn't verify the place we sink to?
>

I am not sure how to verify the place we sink to with dg-final.
 
>> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
>> b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> new file mode 100644
>> index 000..ff41e2ea8ae
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
>> @@ -0,0 +1,20 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -fdump-tree-sink-stats -fdump-tree-sink-stats" } */
>> +
>> +void bar();
>> +int j, x;
>> +void foo(int a, int b, int c, int d, int e, int f)
>> +{
>> +  int l;
>> +  l = a + b + c + d +e + f;
>> +  if (a != 5)
>> +{
>> +  bar();
>> +  if (b != 3)
>> +x = 3;
>> +  else
>> +x = 5;
>> +  j = l;
>> +}
>> +}
>> +/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
> 
> likewise.  So both tests already pass before the patch?
> 
>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>> index 87b1d40c174..76556e7795b 100644
>> --- a/gcc/tree-ssa-sink.cc
>> +++ b/gcc/tree-ssa-sink.cc
>> @@ -171,6 +171,72 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
>> bool *debug_stmts)
>>return commondom;
>>  }
>>
>> +/* Return TRUE if immediate uses of the defs in
>> +   USE occur in the same block as USE, FALSE otherwise.  */
>> +
>> +bool
>> +def_use_same_block (gimple *stmt)
>> +{
>> +  use_operand_p use_p;
>> +  def_operand_p def_p;
>> +  imm_use_iterator imm_iter;
>> +  ssa_op_iter iter;
>> +
>> +  FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, iter, SSA_OP_DEF)
>> +{
>> +  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
>> +   {
>> + if (is_gimple_debug (USE_STMT (use_p)))
>> +   continue;
>> +
>> + if (use_p
> 
> use_p is never null
> 
>> + && (gimple_bb (USE_STMT (use_p)) == gimple_bb (stmt)))
>> +   return true;
> 
> the function behavior is obviously odd ...
> 
>> +   }
>> + }
>> +  return false;
>> +}
>> +
>> +/* Return TRUE if the block has only calls, FALSE otherwise. */
>> +
>> +bool
>> +block_call_p (basic_block bb)
>> +{
>> +  int i = 0;
>> +  bool is_call = false;
>> +  gimple_stmt_iterator gsi = gsi_last_bb (bb);
>> +  gimple *last_stmt = gsi_stmt (gsi);
>> +
>> +  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
>> +{
>> +  if (!gsi_end_p (gsi))
>> +   gsi_prev ();
>> +
>> +   for (; !gsi_end_p (gsi);)
>> +{
>> +  gimple *stmt = gsi_stmt (gsi);
>> +
>> +  /* We have already seen a call.  */
>> +  if (is_call)
>> +return false;
> 
> Likewise.  Do you want to check whether a block has
> a single stmt and that is a call and that is followed by
> a condition?  It looks like a very convoluted way to 

[PATCH v3] tree-ssa-sink: Improve code sinking pass

2023-05-24 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit
  
tree-ssa-sink: Improve code sinking pass

Code Sinking sinks the blocks after call.This increases register pressure
for callee-saved registers. Improves code sinking before call in the use
blocks or immediate dominator of use blocks.

2023-05-24  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 15 +
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 19 ++
 gcc/tree-ssa-sink.cc| 74 +
 3 files changed, 96 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..69fa6d32e7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-sink-stats" } */
+
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..b34959c8a4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..ee8988bbb2c 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,9 +171,28 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+bool
+def_use_same_block (gimple *stmt)
+{
+  def_operand_p def;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  gimple *def_stmt = SSA_NAME_DEF_STMT (DEF_FROM_PTR (def));
+  if ((gimple_bb (def_stmt) == gimple_bb (stmt)))
+   return true;
+ }
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
-   statements.
+   statements. The best basic block should be in immediate dominator of
+   best basic block if the use stmt is after the call.
 
We want the most control dependent block in the shallowest loop nest.
 
@@ -190,7 +209,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -230,14 +250,46 @@ select_best_block (basic_block early_bb,
   if (threshold > 100)
threshold = 100;
 }
-
   /* If BEST_BB is at the same nesting level, then require it to have
  significantly lower execution frequency to avoid gratuitous movement.  */
   if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
   /* If result of comparsion is unknown, prefer EARLY_BB.
 Thus use !(...>=..) rather than (...<...)  */
   && !(best_bb->count * 100 >= early_bb->count * threshold))
-return best_bb;
+{
+  basic_block new_best_bb = get_immediate_dominator (CDI_DOMINATORS, 
best_bb);
+  /* Return best_bb if def and use are in same block otherwise new_best_bb.
+
+Things to 

[PATCH v2] tree-ssa-sink: Improve code sinking pass

2023-05-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

For example :

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  l = a + b + c + d +e + f;
  if (a != 5)
{
  bar();
  j = l;
}
}

Code Sinking does the following:

void bar();
int j;
void foo(int a, int b, int c, int d, int e, int f)
{
  int l;
  
  if (a != 5)
{
  l = a + b + c + d +e + f; 
  bar();
  j = l;
}
}

Bootstrapped regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass

Code Sinking sinks the blocks after call.This increases register pressure
for callee-saved registers. Improves code sinking before call in the use
blocks or immediate dominator of use blocks.

2023-05-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Move statements before
calls.
(block_call_p): New function.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best blocks in the
immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  15 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  19 +++
 gcc/tree-ssa-sink.cc| 160 ++--
 3 files changed, 183 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..69fa6d32e7c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,15 @@
+/* { dg-options "-O2 -fdump-tree-optimized -fdump-tree-sink-stats" } */
+
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..b34959c8a4d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,19 @@
+/* { dg-options "-O2 -fdump-tree-sink-stats" } */
+
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index b1ba7a2ad6c..091aa90d289 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,6 +171,71 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   STMT occur in the same block as STMT, FALSE otherwise.  */
+
+bool
+def_use_same_block (gimple *stmt)
+{
+  use_operand_p use;
+  def_operand_p def;
+  imm_use_iterator imm_iter;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def, stmt, iter, SSA_OP_DEF)
+{
+  FOR_EACH_IMM_USE_FAST (use, imm_iter, DEF_FROM_PTR (def))
+   {
+ if (is_gimple_debug (USE_STMT (use)))
+   continue;
+
+ if (use && (gimple_bb (USE_STMT (use)) == gimple_bb (stmt)))
+   return true;
+   }
+ }
+  return false;
+}
+
+/* Return TRUE if the block has only one call statement, FALSE otherwise. */
+
+bool
+block_call_p (basic_block bb)
+{
+  int i = 0;
+  bool is_call = false;
+  gimple_stmt_iterator gsi = gsi_last_bb (bb);
+  gimple *last_stmt = gsi_stmt (gsi);
+
+  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
+{
+  if (!gsi_end_p (gsi))
+   gsi_prev ();
+
+   for (; !gsi_end_p (gsi);)
+{
+  gimple *stmt = gsi_stmt (gsi);
+
+  /* We have already seen a call.  */
+  if (is_call)
+return false;
+
+  if (is_gimple_call (stmt))
+is_call = true;
+  else
+return false;
+
+  if (!gsi_end_p (gsi))
+gsi_prev ();
+
+   ++i;
+   }
+ }
+  if (is_call && i == 1)
+return true;
+
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
statements.
@@ -190,7 +255,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  

[PATCH v1] rs6000: Update powerpc test fold-vec-extract-int.p8.c

2023-05-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

Update powerpc tests for both le and be endian with extra removal of zero 
extension and sign extension.
with default ree pass for rs6000 target.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: Update powerpc test fold-vec-extract-int.p8.c

Update powerpc tests with extra zero_extend removal with default ree pass.

2023-05-19  Ajit Kumar Agarwal  

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/fold-vec-extract-int.p8.c: Update test.
---
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
index 75eaf25943b..f5f953320d8 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
@@ -13,8 +13,8 @@
 
 /* { dg-final { scan-assembler-times {\mvspltw\M} 3 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {\mrldicl\M} 7 { target { le } } } } */
-/* { dg-final { scan-assembler-times {\mrldicl\M} 4 { target { lp64 && be } } 
} } */
+/* { dg-final { scan-assembler-times {\mrldicl\M} 5 { target { le } } } } */
+/* { dg-final { scan-assembler-times {\mrldicl\M} 2 { target { lp64 && be } } 
} } */
 /* { dg-final { scan-assembler-times {\msubfic\M} 3 { target { le } } } } */
 /* { dg-final { scan-assembler-times {\msldi\M} 3  { target lp64 } } } */
 /* { dg-final { scan-assembler-times {\mmtvsrd\M} 3 { target lp64 } } } */
-- 
2.31.1



[PATCH] rs6000: Update powerpc test fold-vec-extract-int.p8.c

2023-05-18 Thread Ajit Agarwal via Gcc-patches


Hello All:

Update powerpc tests with extra zero_extend removal with default ree pass.
Bootstrapped and Regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


rs6000: Update powerpc test fold-vec-extract-int.p8.c

Update powerpc tests with extra zero_extend removal with default ree pass.

2023-04-16  Ajit Kumar Agarwal  

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/fold-vec-extract-int.p8.c: Update test.
---
 gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c 
b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
index 75eaf25943b..e8f1055ddc0 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-extract-int.p8.c
@@ -13,7 +13,7 @@
 
 /* { dg-final { scan-assembler-times {\mvspltw\M} 3 { target lp64 } } } */
 /* { dg-final { scan-assembler-times {\mmfvsrwz\M} 3 { target lp64 } } } */
-/* { dg-final { scan-assembler-times {\mrldicl\M} 7 { target { le } } } } */
+/* { dg-final { scan-assembler-times {\mrldicl\M} 5 { target { le } } } } */
 /* { dg-final { scan-assembler-times {\mrldicl\M} 4 { target { lp64 && be } } 
} } */
 /* { dg-final { scan-assembler-times {\msubfic\M} 3 { target { le } } } } */
 /* { dg-final { scan-assembler-times {\msldi\M} 3  { target lp64 } } } */
-- 
2.31.1



[PATCH v1] tree-ssa-sink: Improve code sinking pass.

2023-05-18 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink statements before call to reduce
register pressure.
Review comments are incorporated.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


tree-ssa-sink: Improve code sinking pass.

Code Sinking sinks the blocks after call. This increases
register pressure for callee-saved registers. Improves
code sinking before call in the use blocks or immediate
dominator of use blocks.

2023-05-18  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Modifed to
move statements before calls.
(block_call_p): New function.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best
blocks in the immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 ++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  20 +++
 gcc/tree-ssa-sink.cc| 159 ++--
 3 files changed, 185 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..716bc1f9257
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized 
-fdump-tree-sink-stats" } */
+
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..ff41e2ea8ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */ 
+/* { dg-options "-O2 -fdump-tree-sink-stats -fdump-tree-sink-stats" } */
+
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index 87b1d40c174..76556e7795b 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,6 +171,72 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Return TRUE if immediate uses of the defs in
+   USE occur in the same block as USE, FALSE otherwise.  */
+
+bool
+def_use_same_block (gimple *stmt)
+{
+  use_operand_p use_p;
+  def_operand_p def_p;
+  imm_use_iterator imm_iter;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, iter, SSA_OP_DEF)
+{
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
+   {
+ if (is_gimple_debug (USE_STMT (use_p)))
+   continue;
+
+ if (use_p
+ && (gimple_bb (USE_STMT (use_p)) == gimple_bb (stmt)))
+   return true;
+   }
+ }
+  return false;
+}
+
+/* Return TRUE if the block has only calls, FALSE otherwise. */
+
+bool
+block_call_p (basic_block bb)
+{
+  int i = 0;
+  bool is_call = false;
+  gimple_stmt_iterator gsi = gsi_last_bb (bb);
+  gimple *last_stmt = gsi_stmt (gsi);
+
+  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
+{
+  if (!gsi_end_p (gsi))
+   gsi_prev ();
+
+   for (; !gsi_end_p (gsi);)
+{
+  gimple *stmt = gsi_stmt (gsi);
+
+  /* We have already seen a call.  */
+  if (is_call)
+return false;
+
+  if (is_gimple_call (stmt))
+is_call = true;
+  else
+return false;
+
+  if (!gsi_end_p (gsi))
+gsi_prev ();
+
+   ++i;
+   }
+ }
+  if (is_call && i == 1)
+return true;
+
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
statements.
@@ -190,7 +256,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -230,14 +297,47 @@ select_best_block (basic_block early_bb,
   if (threshold > 100)
threshold = 100;
 }
-
   /* If BEST_BB is at the same nesting level, then 

[committed] rs6000: Enable REE pass by default

2023-05-16 Thread Ajit Agarwal via Gcc-patches
rs6000: Enable REE pass by default

Add ree pass as a default pass for rs6000 target for
O2 and above.

2023-05-16  Ajit Kumar Agarwal  

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
* doc/invoke.texi: Document -free
---
 gcc/common/config/rs6000/rs6000-common.cc | 2 ++
 gcc/doc/invoke.texi   | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b92b8576027..2c525762171 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12455,8 +12455,8 @@ Attempt to remove redundant extension instructions.  
This is especially
 helpful for the x86-64 architecture, which implicitly zero-extends in 64-bit
 registers after writing to their lower 32-bit half.

-Enabled for Alpha, AArch64 and x86 at levels @option{-O2},
-@option{-O3}, @option{-Os}.
+Enabled for Alpha, AArch64, PowerPC, RISC-V, SPARC, h83000 and x86 at levels
+@option{-O2}, @option{-O3}, @option{-Os}.

 @opindex fno-lifetime-dse
 @opindex flifetime-dse
-- 
2.31.1


Re: [PATCH v4 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-05-16 Thread Ajit Agarwal via Gcc-patches



On 29/04/23 5:03 am, Jeff Law wrote:
> 
> 
> On 4/28/23 16:42, Hans-Peter Nilsson wrote:
>> On Sat, 22 Apr 2023, Ajit Agarwal via Gcc-patches wrote:
>>
>>> Hello All:
>>>
>>> This new version of patch 4 use improve ree pass for rs6000 target using 
>>> defined ABI interfaces.
>>> Bootstrapped and regtested on power64-linux-gnu.
>>>
>>> Thanks & Regards
>>> Ajit
>>>
>>>
>>> ree: Improve ree pass for rs6000 target using defined abi interfaces
>>>
>>>  For rs6000 target we see redundant zero and sign
>>>  extension and done to improve ree pass to eliminate
>>>  such redundant zero and sign extension using defines
>>>  ABI interfaces.
>>>
>>>  2023-04-22  Ajit Kumar Agarwal  
>>>
>>> gcc/ChangeLog:
>>>
>>>  * ree.cc (combline_reaching_defs): Add zero_extend
>>>  using defined abi interfaces.
>>>  (add_removable_extension): use of defined abi interfaces
>>>  for no reaching defs.
>>>  (abi_extension_candidate_return_reg_p): New defined ABI function.
>>>  (abi_extension_candidate_p): New defined ABI function.
>>>  (abi_extension_candidate_argno_p): New defined ABI function.
>>>  (abi_handle_regs_without_defs_p): New defined ABI function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>>  * g++.target/powerpc/zext-elim-3.C
>>> ---
>>>   gcc/ree.cc    | 176 +++---
>>>   .../g++.target/powerpc/zext-elim-3.C  |  16 ++
>>>   2 files changed, 162 insertions(+), 30 deletions(-)
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
>>>
>>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>>> index 413aec7c8eb..0de96b1ece1 100644
>>> --- a/gcc/ree.cc
>>> +++ b/gcc/ree.cc
>>> @@ -473,7 +473,8 @@ get_defs (rtx_insn *insn, rtx reg, vec 
>>> *dest)
>>>   break;
>>>   }
>>>   -  gcc_assert (use != NULL);
>>> +  if (use == NULL)
>>> +    return NULL;
>>>       ref_chain = DF_REF_CHAIN (use);
>>>   @@ -514,7 +515,8 @@ get_uses (rtx_insn *insn, rtx reg)
>>>   if (REGNO (DF_REF_REG (def)) == REGNO (reg))
>>>     break;
>>>   -  gcc_assert (def != NULL);
>>> +  if (def == NULL)
>>> +    return NULL;
>>>       ref_chain = DF_REF_CHAIN (def);
>>>   @@ -750,6 +752,103 @@ get_extended_src_reg (rtx src)
>>>     return src;
>>>   }
>>>   +/* Return TRUE if the candidate insn is zero extend and regno is
>>> +   an return  registers.  */
>>> +
>>> +static bool
>>> +abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
>>> +{
>>> +  rtx set = single_set (insn);
>>> +
>>> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
>>> +    return false;
>>> +
>>> +  if (FUNCTION_VALUE_REGNO_P (regno))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>>> +
>>> +/* Return TRUE if reg source operand of zero_extend is argument registers
>>> +   and not return registers and source and destination operand are same
>>> +   and mode of source and destination operand are not same.  */
>>> +
>>> +static bool
>>> +abi_extension_candidate_p (rtx_insn *insn)
>>> +{
>>> +  rtx set = single_set (insn);
>>> +
>>> +  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
>>> +    return false;
>>> +
>>> +  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
>>> +  rtx orig_src = XEXP (SET_SRC (set),0);
>>> +
>>> +  bool copy_needed
>>> +    = (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
>>> +
>>> +  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
>>> +  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
>>> +  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>>> +
>>> +/* Return TRUE if the candidate insn is zero extend and regno is
>>> +   an argument registers.  */
>>> +
>>> +static bool
>>> +abi_extension_candidate_argno_p (rtx_code code, int regno)
>>> +{
>>> +  if (code !=  ZERO_EXTEND)
>>> +    return false;
>>> +
>>> +  if (FUNCTION_ARG_REGNO_P (regno))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>>
>> I don't see anything in those functions that checks if
>> ZERO_EXTEND is actually a feature of the ABI, e.g. as opposed to
>> no extension or SIGN_EXTEND.  Do I miss something?
> I don't think you missed anything.  That was one of the points I was making 
> last week.  Somewhere, somehow we need to describe what the ABI mandates and 
> guarantees.
> 
> So while what Ajit has done is a step forward, at some point the actual 
> details of the ABI need to be described in a way that can be checked and 
> consumed by REE.


The ABI we need for ree pass are the argument registers and return registers. 
Based on that I have described interfaces that we need. Other than that we dont 
any other ABI hooks. I have used FUNCTION_VALUE_REGNO_P and 
FuNCTION_ARG_REGNO_P abi hooks.

Thanks & Regards
Ajit
> 
> Jeff


[PATCH v5 1/4] rs6000: Enable REE pass by default

2023-05-16 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch enable ree pass as a default pass for rs6000 target.
Bootstrapped and regtested for powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: Enable REE pass by default

Add ree pass as a default pass for rs6000 target for
O2 and above.

2023-05-16  Ajit Kumar Agarwal  

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
* doc/invoke.texi: Document -free
---
 gcc/common/config/rs6000/rs6000-common.cc | 2 ++
 gcc/doc/invoke.texi   | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index b92b8576027..168fcc88b1d 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12455,8 +12455,8 @@ Attempt to remove redundant extension instructions.  
This is especially
 helpful for the x86-64 architecture, which implicitly zero-extends in 64-bit
 registers after writing to their lower 32-bit half.
 
-Enabled for Alpha, AArch64 and x86 at levels @option{-O2},
-@option{-O3}, @option{-Os}.
+Enabled for Alpha, AArch64, RS/6000, RISC-V, SPARC, h83000 and x86 at levels 
+@option{-O2}, @option{-O3}, @option{-Os}.
 
 @opindex fno-lifetime-dse
 @opindex flifetime-dse
-- 
2.31.1



Re: [PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-05-12 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:


On 29/04/23 3:40 am, Jeff Law wrote:
> 
> 
> On 4/20/23 15:03, Ajit Agarwal wrote:
> 
>>
>> Currently I support AND with const1_rtx. This is what is equivalent to zero 
>> extension instruction in power instruction set. When you specify many other 
>> constants and Could you please specify what other constants needs to be 
>> supported and how to determine on the Input and output modes.
> x AND  will result in a zero-extended representation for a variety 
> of constants, not just 1.  For example
> 
> For example x AND 3, x AND 7, x AND 15, etc.
> 
> If (const_int 1) is really that special here, then I've either completely 
> misunderstood the intention of your patch or there's something quite special 
> about the PPC port that I'm not aware of.
>

Here is the patch to address above.

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
AND with extension with different constants other
than 1.

2023-05-12  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (rtx_is_zext_p): Add AND with varying contsants as
extensions.
---
 gcc/ree.cc | 15 ++-
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 96fda1ac658..ddda5f194bb 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -269,8 +269,11 @@ rtx_is_zext_p (rtx insn)
   rtx set = XEXP (insn, 0);
   if (REG_P (set))
{
-  if (XEXP (insn, 1) == const1_rtx)
-return true;
+ rtx src = XEXP (insn, 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
}
   else
return false;
@@ -297,9 +300,11 @@ rtx_is_zext_p (rtx_insn *insn)
 
  if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
{
-if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (body)))
->= GET_MODE_UNIT_SIZE (GET_MODE (set)))
-  return true;
+ rtx src = XEXP (SET_SRC (body), 1);
+
+ if (CONST_INT_P (src)
+ && IN_RANGE (exact_log2 (UINTVAL (src)), 0, 7))
+   return true;
}
  else
   return false;
-- 
2.31.1


 
> Jeff


Re: [PATCH v3 4/4] ree: Using ABI interfaces to improve ree pass for rs6000 target.

2023-04-22 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:

On 20/04/23 3:29 am, Jeff Law wrote:
> 
> 
> On 4/19/23 12:03, Ajit Agarwal wrote:
>> Hello All:
>>
>> This is patch-4 to improve ree pass for rs6000 target.
>> Use ABI interfaces support.
>>
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>> ree: Improve ree pass for rs6000 target.
>>
>> For rs6000 target we see redundant zero and sign
>> extension and done to improve ree pass to eliminate
>> such redundant zero and sign extension. Support of
>> ABI interfaces.
>>
>> 2023-04-19  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * ree.cc (combline_reaching_defs): Add zero_extend and sign_extend.
>> Add FUNCTION_ARG_REGNO_P abi interfaces calls and
>> FUNCTION_VALUE_REGNO_P support.
>> (add_removable_extension): Add FUNCTION_ARG_REGNO_P abi
>> interface calls.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * g++.target/powerpc/zext-elim-3.C
> So my general comment on this code is we need to expose properties of the ABI 
> so they can be queried.  ie, just because you found that a REGNO happens to 
> be a function argument doesn't mean you know anything about its extension 
> status.   We need a way to describe the extension property of the ABI.  
> Ideally there'll be something pre-existing that we can query, but I'm not 
> sure that's the case.
> 
> The overarching point is what you're doing is highly dependent on the precise 
> semantics of the ABI.  But nowhere do you ask the question "does the ABI 
> mandate a particular sign/zero extension state for this argument?"  So it's 
> just wrong as-written as far as I can tell.
>

I have submitted new version of the patch which incorporates the above. Please 
review and let me know your feedback.

Thanks & Regards
Ajit
 
> Jeff
> 



Re: [PATCH v4 1/4] rs6000: Enable REE pass by default

2023-04-22 Thread Ajit Agarwal via Gcc-patches
Hello Segher:

On 22/04/23 6:56 pm, Segher Boessenkool wrote:
> Hi!
> 
> Please look at
> 
> and reply to that message, with answers to the questions?  And make sure
> you are listed in MAINTAINERS before anything else.  Thanks!
> 

Done.

Thanks & Regards
Ajit
> 
> Segher


Re: [PATCH v3 1/4] ree: Default ree pass for O2 and above for rs6000 target.

2023-04-22 Thread Ajit Agarwal via Gcc-patches
Hello Segher:

On 20/04/23 1:30 am, Segher Boessenkool wrote:
> Hi!
> 
> The subject should be something like
> 
> rs6000: Enable REE pass by default
> 
> (and no period at the end).
> 
> On Wed, Apr 19, 2023 at 11:23:07PM +0530, Ajit Agarwal wrote:
>> This is the patch-1 for improving ree pass for rs6000 target.
> 
> It actually just enables it :-)
> he c
> The mail body should be the proposed commit message.  Nothing more,
> nothing less.  If you need (or want) to talk about more things, that is
> what a "0/4" message is for (you create that with --cover).  Your patch
> messages here do not thread properly, how did you create them?  Things
> work fine if you use  git format-patch --thread  :-)
> 
>>  ree: Improve ree pass for rs6000 target.
>>
>>  Add ree pass as a default pass for rs6000 target.
>>
>>  2023-04-19  Ajit Kumar Agarwal  
> 
> You aren't in MAINTAINERS yet, please fix that first!
> 
>>

Done. Already added Write after approval in MAINTAINERS and pushed the changes.

>> gcc/ChangeLog:
>>
>>  * common/config/rs6000/rs6000-common.cc: Add REE pass as a
>>  default rs6000 target pass for O2 and above.
> 
> Why only for -O2?  Only when optimising at all makes sense, people use
> -O0 only when they want to skip as many optimisations as possible, maybe
> because of compilation time concerns, maybe to avoid an ICE or other
> bug.  Isn't REE *always* a good thing, it never degrades code quality?
> Or are there situations where it results in worse code?
> 

I think it should be O2 and above and am not sure how it behaves with O0.
According to me,  REE is always a good optimization to have and I don't think 
it degrades any performance or code quality. I don't see any situation where it 
results in worse code. It tries to remove extensions and combine them which 
will surely improves performance and code quality instead of worsening the code.

Thanks & Regards
Ajit

 
> Segher


[PATCH v4 4/4] ree: Improve ree pass for rs6000 target using defined ABI interfaces.

2023-04-22 Thread Ajit Agarwal via Gcc-patches
Hello All:

This new version of patch 4 use improve ree pass for rs6000 target using 
defined ABI interfaces.
Bootstrapped and regtested on power64-linux-gnu.

Thanks & Regards
Ajit


ree: Improve ree pass for rs6000 target using defined abi interfaces

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension using defines
ABI interfaces.

2023-04-22  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combline_reaching_defs): Add zero_extend
using defined abi interfaces.
(add_removable_extension): use of defined abi interfaces
for no reaching defs.
(abi_extension_candidate_return_reg_p): New defined ABI function.
(abi_extension_candidate_p): New defined ABI function.
(abi_extension_candidate_argno_p): New defined ABI function.
(abi_handle_regs_without_defs_p): New defined ABI function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 176 +++---
 .../g++.target/powerpc/zext-elim-3.C  |  16 ++
 2 files changed, 162 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..0de96b1ece1 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -473,7 +473,8 @@ get_defs (rtx_insn *insn, rtx reg, vec *dest)
break;
 }
 
-  gcc_assert (use != NULL);
+  if (use == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (use);
 
@@ -514,7 +515,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -750,6 +752,103 @@ get_extended_src_reg (rtx src)
   return src;
 }
 
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an return  registers.  */
+
+static bool
+abi_extension_candidate_return_reg_p (rtx_insn *insn, int regno)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_VALUE_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if reg source operand of zero_extend is argument registers
+   and not return registers and source and destination operand are same
+   and mode of source and destination operand are not same.  */
+
+static bool
+abi_extension_candidate_p (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+
+  if (GET_CODE (SET_SRC (set)) !=  ZERO_EXTEND)
+return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+  rtx orig_src = XEXP (SET_SRC (set),0);
+
+  bool copy_needed
+= (REGNO (SET_DEST (set)) != REGNO (XEXP (SET_SRC (set), 0)));
+
+  if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+  && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+  && !abi_extension_candidate_return_reg_p (insn, REGNO (orig_src)))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn is zero extend and regno is
+   an argument registers.  */
+
+static bool
+abi_extension_candidate_argno_p (rtx_code code, int regno)
+{
+  if (code !=  ZERO_EXTEND)
+return false;
+
+  if (FUNCTION_ARG_REGNO_P (regno))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if the candidate insn doesn't have defs and have
+ * uses without RTX_BIN_ARITH/RTX_COMM_ARITH/RTX_UNARY rtx class.  */
+
+static bool
+abi_handle_regs_without_defs_p (rtx_insn *insn)
+{
+  if (side_effects_p (PATTERN (insn)))
+return false;
+
+  struct df_link *uses
+= get_uses (insn, SET_DEST (PATTERN (insn)));
+
+  if (!uses)
+return false;
+
+  for (df_link *use = uses; use; use = use->next)
+{
+  if (!use->ref)
+   return false;
+
+  if (BLOCK_FOR_INSN (insn)
+ != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+   return false;
+
+  rtx_insn *use_insn = DF_REF_INSN (use->ref);
+
+  if (GET_CODE (PATTERN (use_insn)) == SET)
+   {
+ rtx_code code = GET_CODE (SET_SRC (PATTERN (use_insn)));
+
+ if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+ || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+ || GET_RTX_CLASS (code) == RTX_UNARY)
+   return false;
+   }
+ }
+  return true;
+}
+
 /* This function goes through all reaching defs of the source
of the candidate for elimination (CAND) and tries to combine
the extension with the definition instruction.  The changes
@@ -770,6 +869,11 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
 
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+
+  if (abi_extension_candidate_p (cand->insn)
+  && (!get_defs (cand->insn, orig_src, NULL)))
+return abi_handle_regs_without_defs_p (cand->insn);
 
   outcome = make_defs_and_copies_lists 

[PATCH v4 3/4] ree: Main functionality to improve ree pass for rs6000 target.

2023-04-21 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch is the new version of patch-3 to improve ree pass for rs6000 target.
Bootstrapped and regtested on power64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND.

2023-04-21  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(feasible_cfg): New function.
* rtl.h (reg_used_set_between_p): Add prototype.
* rtlanal.cc (reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 471 --
 gcc/rtl.h |   1 +
 gcc/rtlanal.cc|  15 +
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 7 files changed, 519 insertions(+), 46 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..96fda1ac658 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,61 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+  if (XEXP (insn, 1) == const1_rtx)
+return true;
+   }
+  else
+   return false;
+}
+
+  return false;
+}
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static bool
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (body)))
+>= GET_MODE_UNIT_SIZE (GET_MODE (set)))
+  return true;
+   }
+ else
+  return false;
+   }
+
+   return false;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +374,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +414,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = NULL_RTX;
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_AND (cand->mode, XEXP (orig_src, 0), XEXP (orig_src, 1));
+  else
+   temp_extension
+= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
   rtx simplified_temp_extension = simplify_rtx (temp_extension);
   if (simplified_temp_extension)
 temp_extension = simplified_temp_extension;
+
   new_set = gen_rtx_SET (new_reg, temp_extension);
 }
   else if (GET_CODE 

[PATCH v4 1/4] rs6000: Enable REE pass by default

2023-04-21 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch enable REE pass by default at O2 and above.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: Enable REE pass by default

Add ree pass as a default pass for rs6000 target for
O2 and above.

2023-04-21  Ajit Kumar Agarwal  

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
---
 gcc/common/config/rs6000/rs6000-common.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
-- 
2.31.1



Re: [PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-04-20 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:

On 21/04/23 2:33 am, Ajit Agarwal wrote:
> Hello Jeff:
> 
> On 20/04/23 3:23 am, Jeff Law wrote:
>>
>>
>> On 4/19/23 12:00, Ajit Agarwal wrote:
>>> Hello All:
>>>
>>> This is patch-3 to improve ree pass for rs6000 target.
>>> Main functionality routines to imprve ree pass.
>>>
>>> Bootstrapped and regtested on powerpc64-gnu-linux.
>>>
>>> Thanks & Regards
>>> Ajit
>>>
>>> ree: Improve ree pass for rs6000 target.
>>>
>>> For rs6000 target we see redundant zero and sign
>>> extension and done to improve ree pass to eliminate
>>> such redundant zero and sign extension. Support of
>>> zero_extend/sign_extend/AND.
>>>
>>> 2023-04-19  Ajit Kumar Agarwal  
>>>
>>> gcc/ChangeLog:
>>>
>>> * ree.cc (eliminate_across_bbs_p): Add checks to enable extension
>>> elimination across and within basic blocks.
>>> (def_arith_p): New function to check definition has arithmetic
>>> operation.
>>> (combine_set_extension): Modification to incorporate AND
>>> and current zero_extend and sign_extend instruction.
>>> (merge_def_and_ext): Add calls to eliminate_across_bbs_p and
>>> zero_extend sign_extend and AND instruction.
>>> (rtx_is_zext_p): New function.
>>> (reg_used_set_between_p): New function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * g++.target/powerpc/zext-elim.C: New testcase.
>>> * g++.target/powerpc/zext-elim-1.C: New testcase.
>>> * g++.target/powerpc/zext-elim-2.C: New testcase.
>>> * g++.target/powerpc/sext-elim.C: New testcase.
>>> ---
>>>   gcc/ree.cc    | 451 --
>>>   gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
>>>   .../g++.target/powerpc/zext-elim-1.C  |  19 +
>>>   .../g++.target/powerpc/zext-elim-2.C  |  11 +
>>>   gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
>>>   5 files changed, 482 insertions(+), 47 deletions(-)
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C
>>>
>>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>>> index 413aec7c8eb..053db2e8ff3 100644
>>> --- a/gcc/ree.cc
>>> +++ b/gcc/ree.cc
>>> @@ -253,6 +253,71 @@ struct ext_cand
>>>     static int max_insn_uid;
>>>   +bool
>>> +reg_used_set_between_p (rtx set, rtx_insn *def_insn, rtx_insn *insn
>>> +{
>>> +  if (reg_used_between_p (set, def_insn, insn)
>>> +  || reg_set_between_p (set, def_insn, insn))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>> This seems general enough that it should go into the same file as 
>> reg_used_between_p and reg_set_between_p.  It needs a function comment as 
>> well.
>>
>>
>>> +static unsigned int
>>> +rtx_is_zext_p (rtx insn)
>>> +{
>>> +  if (GET_CODE (insn) == AND)
>>> +    {
>>> +  rtx set = XEXP (insn, 0);
>>> +  if (REG_P (set))
>>> +    {
>>> +  if (XEXP (insn, 1) == const1_rtx)
>>> +    return 1;
>>> +    }
>>> +  else
>>> +    return 0;
>>> +    }
>>> +
>>> +  return 0;
>>> +}
>> So my comment from the prior version stands.  Testing for const1_rtx is just 
>> wrong.  The optimization you're trying to perform (If I understand it 
>> correctly) works for many other constants and the set of constants supported 
>> will vary based on the input and output modes.
>>
>> Similarly in rtx_is_zext_p.
>>
>> You still have numerous formatting issues which makes reading the patch more 
>> difficult than it should be.  Please review the formatting guidelines and 
>> follow them.   In particular please review how to indent multi-line 
>> conditionals.
>>
>>
> 
> Currently I support AND with const1_rtx. This is what is equivalent to zero 
> extension instruction in power instruction set. When you specify many other 
> constants and Could you please specify what other constants needs to be 
> supported and how to determine on the Input and output modes.

On top of that I support eliminating zero_extend and sign_extend wherein if 
result mode of def insn not equal to source operand of zero_extend and 
sign_extend.

Thanks & Regards
Ajit
>>
>>
>>
>> You sti
>>> @@ -698,6 +777,226 @@ get_sub_rtx (rtx_insn *def_insn)
>>>     return sub_rtx;
>>>   }
>>>   +/* Check if the def insn is ASHIFT and LSHIFTRT.
>>> +  Inputs: insn for which def has to be checked.
>>> +  source operand rtx.
>>> +   Output: True or false if def has arithmetic
>>> +   peration like ASHIFT and LSHIFTRT.  */
>> This still needs work.  Between the comments and code, I still don't know 
>> what you're really trying to do here.  I can make some guesses, but it's 
>> really your job to write clear comments about what you're doing so that a 
>> review or someone looking at the code in the future don't have to guess.
>>
>> It looks like you want to look at all the reaching definitions of INSN for 
>> ORIG_SRC and if they 

Re: [PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-04-20 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:


On 21/04/23 2:33 am, Ajit Agarwal via Gcc-patches wrote:
> Hello Jeff:
> 
> On 20/04/23 3:23 am, Jeff Law wrote:
>>
>>
>> On 4/19/23 12:00, Ajit Agarwal wrote:
>>> Hello All:
>>>
>>> This is patch-3 to improve ree pass for rs6000 target.
>>> Main functionality routines to imprve ree pass.
>>>
>>> Bootstrapped and regtested on powerpc64-gnu-linux.
>>>
>>> Thanks & Regards
>>> Ajit
>>>
>>> ree: Improve ree pass for rs6000 target.
>>>
>>> For rs6000 target we see redundant zero and sign
>>> extension and done to improve ree pass to eliminate
>>> such redundant zero and sign extension. Support of
>>> zero_extend/sign_extend/AND.
>>>
>>> 2023-04-19  Ajit Kumar Agarwal  
>>>
>>> gcc/ChangeLog:
>>>
>>> * ree.cc (eliminate_across_bbs_p): Add checks to enable extension
>>> elimination across and within basic blocks.
>>> (def_arith_p): New function to check definition has arithmetic
>>> operation.
>>> (combine_set_extension): Modification to incorporate AND
>>> and current zero_extend and sign_extend instruction.
>>> (merge_def_and_ext): Add calls to eliminate_across_bbs_p and
>>> zero_extend sign_extend and AND instruction.
>>> (rtx_is_zext_p): New function.
>>> (reg_used_set_between_p): New function.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> * g++.target/powerpc/zext-elim.C: New testcase.
>>> * g++.target/powerpc/zext-elim-1.C: New testcase.
>>> * g++.target/powerpc/zext-elim-2.C: New testcase.
>>> * g++.target/powerpc/sext-elim.C: New testcase.
>>> ---
>>>   gcc/ree.cc    | 451 --
>>>   gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
>>>   .../g++.target/powerpc/zext-elim-1.C  |  19 +
>>>   .../g++.target/powerpc/zext-elim-2.C  |  11 +
>>>   gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
>>>   5 files changed, 482 insertions(+), 47 deletions(-)
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
>>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C
>>>
>>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>>> index 413aec7c8eb..053db2e8ff3 100644
>>> --- a/gcc/ree.cc
>>> +++ b/gcc/ree.cc
>>> @@ -253,6 +253,71 @@ struct ext_cand
>>>     static int max_insn_uid;
>>>   +bool
>>> +reg_used_set_between_p (rtx set, rtx_insn *def_insn, rtx_insn *insn
>>> +{
>>> +  if (reg_used_between_p (set, def_insn, insn)
>>> +  || reg_set_between_p (set, def_insn, insn))
>>> +    return true;
>>> +
>>> +  return false;
>>> +}
>> This seems general enough that it should go into the same file as 
>> reg_used_between_p and reg_set_between_p.  It needs a function comment as 
>> well.
>>
>>
>>> +static unsigned int
>>> +rtx_is_zext_p (rtx insn)
>>> +{
>>> +  if (GET_CODE (insn) == AND)
>>> +    {
>>> +  rtx set = XEXP (insn, 0);
>>> +  if (REG_P (set))
>>> +    {
>>> +  if (XEXP (insn, 1) == const1_rtx)
>>> +    return 1;
>>> +    }
>>> +  else
>>> +    return 0;
>>> +    }
>>> +
>>> +  return 0;
>>> +}
>> So my comment from the prior version stands.  Testing for const1_rtx is just 
>> wrong.  The optimization you're trying to perform (If I understand it 
>> correctly) works for many other constants and the set of constants supported 
>> will vary based on the input and output modes.
>>
>> Similarly in rtx_is_zext_p.
>>
>> You still have numerous formatting issues which makes reading the patch more 
>> difficult than it should be.  Please review the formatting guidelines and 
>> follow them.   In particular please review how to indent multi-line 
>> conditionals.
>>
>>
> 
> Currently I support AND with const1_rtx. This is what is equivalent to zero 
> extension instruction in power instruction set. When you specify many other 
> constants and Could you please specify what other constants needs to be 
> supported and how to determine on the Input and output modes.
>>
On top of that I suppor

Re: [PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-04-20 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:

On 20/04/23 3:23 am, Jeff Law wrote:
> 
> 
> On 4/19/23 12:00, Ajit Agarwal wrote:
>> Hello All:
>>
>> This is patch-3 to improve ree pass for rs6000 target.
>> Main functionality routines to imprve ree pass.
>>
>> Bootstrapped and regtested on powerpc64-gnu-linux.
>>
>> Thanks & Regards
>> Ajit
>>
>> ree: Improve ree pass for rs6000 target.
>>
>> For rs6000 target we see redundant zero and sign
>> extension and done to improve ree pass to eliminate
>> such redundant zero and sign extension. Support of
>> zero_extend/sign_extend/AND.
>>
>> 2023-04-19  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * ree.cc (eliminate_across_bbs_p): Add checks to enable extension
>> elimination across and within basic blocks.
>> (def_arith_p): New function to check definition has arithmetic
>> operation.
>> (combine_set_extension): Modification to incorporate AND
>> and current zero_extend and sign_extend instruction.
>> (merge_def_and_ext): Add calls to eliminate_across_bbs_p and
>> zero_extend sign_extend and AND instruction.
>> (rtx_is_zext_p): New function.
>> (reg_used_set_between_p): New function.
>>
>> gcc/testsuite/ChangeLog:
>>
>> * g++.target/powerpc/zext-elim.C: New testcase.
>> * g++.target/powerpc/zext-elim-1.C: New testcase.
>> * g++.target/powerpc/zext-elim-2.C: New testcase.
>> * g++.target/powerpc/sext-elim.C: New testcase.
>> ---
>>   gcc/ree.cc    | 451 --
>>   gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
>>   .../g++.target/powerpc/zext-elim-1.C  |  19 +
>>   .../g++.target/powerpc/zext-elim-2.C  |  11 +
>>   gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
>>   5 files changed, 482 insertions(+), 47 deletions(-)
>>   create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
>>   create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C
>>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index 413aec7c8eb..053db2e8ff3 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -253,6 +253,71 @@ struct ext_cand
>>     static int max_insn_uid;
>>   +bool
>> +reg_used_set_between_p (rtx set, rtx_insn *def_insn, rtx_insn *insn
>> +{
>> +  if (reg_used_between_p (set, def_insn, insn)
>> +  || reg_set_between_p (set, def_insn, insn))
>> +    return true;
>> +
>> +  return false;
>> +}
> This seems general enough that it should go into the same file as 
> reg_used_between_p and reg_set_between_p.  It needs a function comment as 
> well.
> 
> 
>> +static unsigned int
>> +rtx_is_zext_p (rtx insn)
>> +{
>> +  if (GET_CODE (insn) == AND)
>> +    {
>> +  rtx set = XEXP (insn, 0);
>> +  if (REG_P (set))
>> +    {
>> +  if (XEXP (insn, 1) == const1_rtx)
>> +    return 1;
>> +    }
>> +  else
>> +    return 0;
>> +    }
>> +
>> +  return 0;
>> +}
> So my comment from the prior version stands.  Testing for const1_rtx is just 
> wrong.  The optimization you're trying to perform (If I understand it 
> correctly) works for many other constants and the set of constants supported 
> will vary based on the input and output modes.
> 
> Similarly in rtx_is_zext_p.
> 
> You still have numerous formatting issues which makes reading the patch more 
> difficult than it should be.  Please review the formatting guidelines and 
> follow them.   In particular please review how to indent multi-line 
> conditionals.
> 
> 

Currently I support AND with const1_rtx. This is what is equivalent to zero 
extension instruction in power instruction set. When you specify many other 
constants and Could you please specify what other constants needs to be 
supported and how to determine on the Input and output modes.
> 
> 
> 
> You sti
>> @@ -698,6 +777,226 @@ get_sub_rtx (rtx_insn *def_insn)
>>     return sub_rtx;
>>   }
>>   +/* Check if the def insn is ASHIFT and LSHIFTRT.
>> +  Inputs: insn for which def has to be checked.
>> +  source operand rtx.
>> +   Output: True or false if def has arithmetic
>> +   peration like ASHIFT and LSHIFTRT.  */
> This still needs work.  Between the comments and code, I still don't know 
> what you're really trying to do here.  I can make some guesses, but it's 
> really your job to write clear comments about what you're doing so that a 
> review or someone looking at the code in the future don't have to guess.
> 
> It looks like you want to look at all the reaching definitions of INSN for 
> ORIG_SRC and if they are ASHIFT/LSHIFTRT do...  what?
> 
> Why are ASHIFT/LSHIFTRT interesting here?  Why are you looking for them?
> 
> 
> 
>> +
>> +/* Find feasibility of extension elimination
>> +   across basic blocks.
>> +   Input: candiate to check the feasibility.
>> +  def_insn of candidate.
>> +   Output: Returns true or false if feasible or not.  */
> Function comments 

[PATCH v3 4/4] ree: Using ABI interfaces to improve ree pass for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is patch-4 to improve ree pass for rs6000 target.
Use ABI interfaces support.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
ABI interfaces.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (combline_reaching_defs): Add zero_extend and sign_extend.
Add FUNCTION_ARG_REGNO_P abi interfaces calls and
FUNCTION_VALUE_REGNO_P support.
(add_removable_extension): Add FUNCTION_ARG_REGNO_P abi
interface calls.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim-3.C
---
 gcc/ree.cc| 127 +-
 .../g++.target/powerpc/zext-elim-3.C  |  16 +++
 2 files changed, 113 insertions(+), 30 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..33c803f16ce 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -473,7 +473,8 @@ get_defs (rtx_insn *insn, rtx reg, vec *dest)
break;
 }
 
-  gcc_assert (use != NULL);
+  if (use == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (use);
 
@@ -514,7 +515,8 @@ get_uses (rtx_insn *insn, rtx reg)
 if (REGNO (DF_REF_REG (def)) == REGNO (reg))
   break;
 
-  gcc_assert (def != NULL);
+  if (def == NULL)
+return NULL;
 
   ref_chain = DF_REF_CHAIN (def);
 
@@ -771,6 +773,58 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   state->defs_list.truncate (0);
   state->copies_list.truncate (0);
 
+  if (cand->code == ZERO_EXTEND)
+{
+  rtx orig_src = XEXP (SET_SRC (cand->expr),0);
+  rtx set = single_set (cand->insn);
+
+  if (!set)
+   return false;
+
+  machine_mode ext_dst_mode = GET_MODE (SET_DEST (set));
+
+  if (!get_defs (cand->insn, orig_src, NULL))
+   {
+  bool copy_needed
+= (REGNO (SET_DEST (cand->expr)) != REGNO (XEXP (SET_SRC 
(cand->expr), 0)));
+
+ if (!copy_needed && ext_dst_mode != GET_MODE (orig_src)
+ && FUNCTION_ARG_REGNO_P (REGNO (orig_src))
+ && !FUNCTION_VALUE_REGNO_P (REGNO (orig_src)))
+{
+   if (side_effects_p (PATTERN (cand->insn)))
+ return false;
+
+   struct df_link *uses
+ = get_uses (cand->insn, SET_DEST (PATTERN (cand->insn)));
+
+   if (!uses) return false;
+
+   for (df_link *use = uses; use; use = use->next)
+ {
+   if (!use->ref)
+ return false;
+
+   if (BLOCK_FOR_INSN (cand->insn)
+   != BLOCK_FOR_INSN (DF_REF_INSN (use->ref)))
+ return false;
+
+   rtx_insn *insn = DF_REF_INSN (use->ref);
+
+   if (GET_CODE (PATTERN (insn)) == SET)
+ {
+   rtx_code code = GET_CODE (SET_SRC (PATTERN (insn)));
+   if (GET_RTX_CLASS (code) == RTX_BIN_ARITH
+   || GET_RTX_CLASS (code) == RTX_COMM_ARITH
+   || GET_RTX_CLASS (code) == RTX_UNARY)
+ return false;
+  }
+   }
+return true;
+}
+}
+}
+
   outcome = make_defs_and_copies_lists (cand->insn, set_pat, state);
 
   if (!outcome)
@@ -1112,26 +1166,35 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
   rtx reg = XEXP (src, 0);
   struct df_link *defs, *def;
   ext_cand *cand;
+  defs = get_defs (insn, reg, NULL);
 
   /* Zero-extension of an undefined value is partly defined (it's
 completely undefined for sign-extension, though).  So if there exists
 a path from the entry to this zero-extension that leaves this register
 uninitialized, removing the extension could change the behavior of
 correct programs.  So first, check it is not the case.  */
-  if (code == ZERO_EXTEND && !bitmap_bit_p (init_regs, REGNO (reg)))
+  if (!defs && code == ZERO_EXTEND && FUNCTION_ARG_REGNO_P (REGNO (reg)))
{
- if (dump_file)
-   {
- fprintf (dump_file, "Cannot eliminate extension:\n");
- print_rtl_single (dump_file, insn);
- fprintf (dump_file, " because it can operate on uninitialized"
- " data\n");
-   }
+ ext_cand e = {expr, code, mode, insn};
+ insn_list->safe_push (e);
  return;
}
 
+
+   if ((code == ZERO_EXTEND
+   && !bitmap_bit_p (init_regs, REGNO (reg
+ {
+   if (dump_file)
+ {
+   fprintf (dump_file, "Cannot eliminate 

[PATCH v3 3/4] ree: Main functionality to Improve ree pass for rs6000 target

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is patch-3 to improve ree pass for rs6000 target.
Main functionality routines to imprve ree pass.

Bootstrapped and regtested on powerpc64-gnu-linux.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
such redundant zero and sign extension. Support of
zero_extend/sign_extend/AND.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(rtx_is_zext_p): New function.
(reg_used_set_between_p): New function.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/ree.cc| 451 --
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 ++
 5 files changed, 482 insertions(+), 47 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..053db2e8ff3 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,71 @@ struct ext_cand
 
 static int max_insn_uid;
 
+bool
+reg_used_set_between_p (rtx set, rtx_insn *def_insn, rtx_insn *insn)
+{
+  if (reg_used_between_p (set, def_insn, insn)
+  || reg_set_between_p (set, def_insn, insn))
+return true;
+
+  return false;
+}
+
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static unsigned int
+rtx_is_zext_p (rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+{
+  rtx set = XEXP (insn, 0);
+  if (REG_P (set))
+   {
+ if (XEXP (insn, 1) == const1_rtx)
+   return 1;
+   }
+  else
+   return 0;
+}
+
+  return 0;
+}
+
+/* Return TRUE if OP can be considered a zero extension from one or
+   more sub-word modes to larger modes up to a full word.
+
+   For example (and:DI (reg) (const_int X))
+
+   Depending on the value of X could be considered a zero extension
+   from QI, HI and SI to larger modes up to DImode.  */
+
+static unsigned int
+rtx_is_zext_p (rtx_insn *insn)
+{
+  rtx body = single_set (insn);
+
+  if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC (body), 0);
+
+ if (REG_P (set) && GET_MODE (SET_DEST (body)) == GET_MODE (set))
+   {
+if (XEXP (SET_SRC (body), 1) == const1_rtx)
+  return 1;
+   }
+ else
+  return 0;
+   }
+
+   return 0;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -319,7 +384,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -359,27 +424,41 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else if (GET_CODE (orig_src) == cand->code)
 {
   /* Here is a sequence of two extensions.  Try to merge them.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
+  rtx temp_extension = NULL_RTX;
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_AND (cand->mode, XEXP (orig_src, 0), XEXP (orig_src, 1));
+  else
+   temp_extension
+= gen_rtx_fmt_e (cand->code, cand->mode, XEXP (orig_src, 0));
   rtx simplified_temp_extension = simplify_rtx (temp_extension);
   if (simplified_temp_extension)
 temp_extension = simplified_temp_extension;
+
   new_set = gen_rtx_SET (new_reg, temp_extension);
 }
   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
 {
   /* 

[PATCH v3 2/4] ree : Code movement to avoid adding prototype to improve ree pass for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is the patch-2 to improve ree pass for rs6000 target.
Bootstrapped and regtested on powerpc64-gnu-linux.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

For rs6000 target we see redundant zero and sign
extension and done to improve ree pass to eliminate
 such redundant zero and sign extension.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (get_defs): Move function to avoid prototype.
---
 gcc/ree.cc | 94 +++---
 1 file changed, 47 insertions(+), 47 deletions(-)

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..93d300a9e96 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,53 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Get all the reaching definitions of an instruction.  The definitions are
+   desired for REG used in INSN.  Return the definition list or NULL if a
+   definition is missing.  If DEST is non-NULL, additionally push the INSN
+   of the definitions onto DEST.  */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg, vec *dest)
+{
+  df_ref use;
+  struct df_link *ref_chain, *ref_link;
+
+  FOR_EACH_INSN_USE (use, insn)
+{
+  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+   return NULL;
+  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+   break;
+}
+
+  if (use == NULL)
+return NULL;
+
+  ref_chain = DF_REF_CHAIN (use);
+
+  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+{
+  /* Problem getting some definition for this instruction.  */
+  if (ref_link->ref == NULL)
+   return NULL;
+  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+   return NULL;
+  /* As global regs are assumed to be defined at each function call
+dataflow can report a call_insn as being a definition of REG.
+But we can't do anything with that in this pass so proceed only
+if the instruction really sets REG in a way that can be deduced
+from the RTL structure.  */
+  if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+   return NULL;
+}
+
+  if (dest)
+for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+  dest->safe_push (DF_REF_INSN (ref_link->ref));
+
+  return ref_chain;
+}
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -454,53 +501,6 @@ transform_ifelse (ext_cand *cand, rtx_insn *def_insn)
   return false;
 }
 
-/* Get all the reaching definitions of an instruction.  The definitions are
-   desired for REG used in INSN.  Return the definition list or NULL if a
-   definition is missing.  If DEST is non-NULL, additionally push the INSN
-   of the definitions onto DEST.  */
-
-static struct df_link *
-get_defs (rtx_insn *insn, rtx reg, vec *dest)
-{
-  df_ref use;
-  struct df_link *ref_chain, *ref_link;
-
-  FOR_EACH_INSN_USE (use, insn)
-{
-  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
-return NULL;
-  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
-   break;
-}
-
-  gcc_assert (use != NULL);
-
-  ref_chain = DF_REF_CHAIN (use);
-
-  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
-{
-  /* Problem getting some definition for this instruction.  */
-  if (ref_link->ref == NULL)
-return NULL;
-  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
-return NULL;
-  /* As global regs are assumed to be defined at each function call
-dataflow can report a call_insn as being a definition of REG.
-But we can't do anything with that in this pass so proceed only
-if the instruction really sets REG in a way that can be deduced
-from the RTL structure.  */
-  if (global_regs[REGNO (reg)]
- && !set_of (reg, DF_REF_INSN (ref_link->ref)))
-   return NULL;
-}
-
-  if (dest)
-for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
-  dest->safe_push (DF_REF_INSN (ref_link->ref));
-
-  return ref_chain;
-}
-
 /* Get all the reaching uses of an instruction.  The uses are desired for REG
set in INSN.  Return use list or NULL if a use is missing or irregular.  */
 
-- 
2.31.1



[PATCH v3 1/4] ree: Default ree pass for O2 and above for rs6000 target.

2023-04-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This is the patch-1 for improving ree pass for rs6000 target.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

ree: Improve ree pass for rs6000 target.

Add ree pass as a default pass for rs6000 target.

2023-04-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
---
 gcc/common/config/rs6000/rs6000-common.cc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
-- 
2.31.1



[PATCH] tree-ssa-sink: Improve code sinking pass.

2023-04-16 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch improves code sinking pass to sink the blocks before calls
in the use blocks or immediate dominator blocks that reduces register pressure.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Improve code sinking pass.

Code Sinking sinks the blocks after call. This increases
register pressure for callee-saved registers. Improves
code sinking before call in the use blocks or immediate
dominator of use blocks.

2023-04-16  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Modifed to
move statements before calls.
(block_call_p): New function.
(def_use_same_block): New function.
(select_best_block): Add heuristics to select the best
blocks in the immediate post dominator.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c |  16 +++
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c |  20 +++
 gcc/tree-ssa-sink.cc| 134 +++-
 3 files changed, 164 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..716bc1f9257
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized 
-fdump-tree-sink-stats" } */
+
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..ff41e2ea8ae
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */ 
+/* { dg-options "-O2 -fdump-tree-sink-stats -fdump-tree-sink-stats" } */
+
+void bar();
+int j, x;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  if (b != 3)
+x = 3;
+  else
+x = 5;
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink" } } */
diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index 87b1d40c174..12babf73321 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -171,6 +171,70 @@ nearest_common_dominator_of_uses (def_operand_p def_p, 
bool *debug_stmts)
   return commondom;
 }
 
+/* Check def and use stmts are in same block.  */
+
+bool
+def_use_same_block (gimple *use)
+{
+  use_operand_p use_p;
+  def_operand_p def_p;
+  imm_use_iterator imm_iter;
+  ssa_op_iter iter;
+
+  FOR_EACH_SSA_DEF_OPERAND (def_p, use, iter, SSA_OP_DEF)
+{
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
+   {
+ if (is_gimple_debug (USE_STMT (use_p)))
+   continue;
+
+ if (use_p
+ && (gimple_bb(USE_STMT (use_p)) == gimple_bb (use)))
+   return true;
+   }
+ }
+  return false;
+}
+
+/* Check if the block has only calls.  */
+
+bool
+block_call_p (basic_block bb)
+{
+  int i = 0;
+  bool is_call = false;
+  gimple_stmt_iterator gsi = gsi_last_bb (bb);
+  gimple *last_stmt = gsi_stmt (gsi);
+
+  if (last_stmt && gimple_code (last_stmt) == GIMPLE_COND)
+{
+  if (!gsi_end_p (gsi))
+   gsi_prev ();
+
+   for (; !gsi_end_p (gsi);)
+{
+  gimple *stmt = gsi_stmt (gsi);
+
+  if (is_gimple_debug (stmt))
+return false;
+
+  if (is_gimple_call (stmt))
+is_call = true;
+  else
+return false;
+
+  if (!gsi_end_p (gsi))
+gsi_prev ();
+
+   ++i;
+   }
+ }
+  if (is_call && i == 1)
+return true;
+
+  return false;
+}
+
 /* Given EARLY_BB and LATE_BB, two blocks in a path through the dominator
tree, return the best basic block between them (inclusive) to place
statements.
@@ -190,7 +254,8 @@ nearest_common_dominator_of_uses (def_operand_p def_p, bool 
*debug_stmts)
 static basic_block
 select_best_block (basic_block early_bb,
   basic_block late_bb,
-  gimple *stmt)
+  gimple *stmt,
+  gimple *use = 0)
 {
   basic_block best_bb = late_bb;
   basic_block temp_bb = late_bb;
@@ -230,7 +295,28 @@ select_best_block (basic_block early_bb,
   if (threshold > 100)
threshold = 100;
 }
+  if (bb_loop_depth (best_bb) == bb_loop_depth (early_bb)
+  && !(best_bb->count * 100 >= 

Re: PATCH] tree-ssa-sink: Add heuristics for code sinking

2023-04-14 Thread Ajit Agarwal via Gcc-patches
Hello Richard:

On 14/04/23 2:29 pm, Richard Biener wrote:
> On Fri, Apr 14, 2023 at 10:42 AM Ajit Agarwal via Gcc-patches
>  wrote:
>>
>> Hello All:
>>
>> This patch add heuristics for code sinking opportunities.
>> Bootstrapped and regtested for powerpc64-linux-gnu.
>>
>> Thanks & Regards
>> Ajit
>>
>> tree-ssa-sink: Add heuristics for code sinking.
>>
>> Add following code sinking heuristics:
>>
>> 1. from code block dominates the call.
>> 2. To Code block have uses inside the function call.
>> 3. Loop headers.
>> 4. Sinking from code block after call increases register
>> pressure.
>> 5. Sinking calls.
>>
>> 2023-04-14  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * tree-ssa-sink.cc (statement_sink_location): Add heuristics
>> for code sinking.
>> ---
>>  gcc/tree-ssa-sink.cc | 33 +
>>  1 file changed, 33 insertions(+)
>>
>> diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
>> index 87b1d40c174..8de88b259a3 100644
>> --- a/gcc/tree-ssa-sink.cc
>> +++ b/gcc/tree-ssa-sink.cc
>> @@ -465,6 +465,39 @@ statement_sink_location (gimple *stmt, basic_block 
>> frombb,
>>   if (sinkbb == frombb)
>> return false;
>>
>> + auto_vec h;
>> + h = get_all_dominated_blocks (CDI_DOMINATORS,
>> +   frombb);
>> + bool is_call = false;
>> + while (h.length ())
>> +   {
>> + basic_block bb = h.pop ();
>> +
>> + if (bb == frombb)
>> +   continue;
>> +
>> + for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p 
>> (gsi);)
>> +   {
>> + gimple *stmt = gsi_stmt (gsi);
>> +
>> + if (is_gimple_call (stmt))
>> +   {
>> + is_call = true;
>> + break;
>> +   }
>> +
>> +  if (!gsi_end_p (gsi))
>> +gsi_prev ();
>> +   }
>> +}
>> +
>> +   if (!is_gimple_call (stmt)
>> +   && (gimple_bb (use) != frombb)
>> +   && !is_gimple_call (use)
>> +   && dominated_by_p (CDI_DOMINATORS, sinkbb, frombb)
>> +   && is_call)
>> +  return false;
>> +
> 
> Sorry, but this lacks a comment, it doesn't explain why the existing 
> heuristics
> are not enough (select_best_block), it repeats dominance computing.
> 
> More so it lacks a testcase demonstrating the effect.
> 

Added testscases and comments in the code.
The heuristics are added to relieve from register pressure.

Thanks & Regards
Ajit

Here is the patch.

tree-ssa-sink: Add heuristics for code sinking.

Add following code sinking heuristics:

1. from code block dominates the call.
2. To Code block have uses inside the function call.
3. Loop headers.
4. Sinking from code block after call increases register
pressure.
5. Sinking calls.

2023-04-14  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Add heuristics
for code sinking.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/ssa-sink-20.c: New testcase.
* gcc.dg/tree-ssa/ssa-sink-21.c: New testcase.
---
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c | 16 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c | 20 
 gcc/tree-ssa-sink.cc|  6 ++
 3 files changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
new file mode 100644
index 000..ed2aefc01aa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-20.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-sink -fdump-tree-optimized" } */
+
+void bar();
+int j;
+void foo(int a, int b, int c, int d, int e, int f)
+{
+  int l;
+  l = a + b + c + d +e + f;
+  if (a != 5)
+{
+  bar();
+  j = l;
+}
+}
+/* { dg-final { scan-tree-dump-times "Sunk statements: 1" 0 "sink1" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-21.c
new file mode 100644
index 000..a39724df8e

PATCH] tree-ssa-sink: Add heuristics for code sinking

2023-04-14 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch add heuristics for code sinking opportunities.
Bootstrapped and regtested for powerpc64-linux-gnu.

Thanks & Regards
Ajit

tree-ssa-sink: Add heuristics for code sinking.

Add following code sinking heuristics:

1. from code block dominates the call.
2. To Code block have uses inside the function call.
3. Loop headers.
4. Sinking from code block after call increases register
pressure.
5. Sinking calls.

2023-04-14  Ajit Kumar Agarwal  

gcc/ChangeLog:

* tree-ssa-sink.cc (statement_sink_location): Add heuristics
for code sinking.
---
 gcc/tree-ssa-sink.cc | 33 +
 1 file changed, 33 insertions(+)

diff --git a/gcc/tree-ssa-sink.cc b/gcc/tree-ssa-sink.cc
index 87b1d40c174..8de88b259a3 100644
--- a/gcc/tree-ssa-sink.cc
+++ b/gcc/tree-ssa-sink.cc
@@ -465,6 +465,39 @@ statement_sink_location (gimple *stmt, basic_block frombb,
  if (sinkbb == frombb)
return false;
 
+ auto_vec h;
+ h = get_all_dominated_blocks (CDI_DOMINATORS,
+   frombb);
+ bool is_call = false;
+ while (h.length ())
+   {
+ basic_block bb = h.pop ();
+
+ if (bb == frombb)
+   continue;
+
+ for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p 
(gsi);)
+   {
+ gimple *stmt = gsi_stmt (gsi);
+
+ if (is_gimple_call (stmt))
+   {
+ is_call = true;
+ break;
+   }
+
+  if (!gsi_end_p (gsi))
+gsi_prev ();
+   }
+}
+
+   if (!is_gimple_call (stmt)
+   && (gimple_bb (use) != frombb)
+   && !is_gimple_call (use)
+   && dominated_by_p (CDI_DOMINATORS, sinkbb, frombb)
+   && is_call)
+  return false;
+
  if (sinkbb == gimple_bb (use))
*togsi = gsi_for_stmt (use);
  else
-- 
2.31.1



[PATCH v2] ree: Improve ree pass for rs6000 target.

2023-04-06 Thread Ajit Agarwal via Gcc-patches
Hello All:

Eliminate unnecessary redundant extension within basic and across basic blocks. 
For rs6000 target we see redundant zero and sign extension and done to improve 
ree pass to eliminate such redundant zero and sign extension.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


ree: Improve ree pass for rs6000 target.

Eliminate unnecessary redundant extension within basic
and across basic blocks. For rs6000 target we see
redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign
extension.

2023-04-06  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(combline_reaching_defs): Add zero_extend and sign_extend.
Add FUNCTION_ARG_REGNO_P abi interfaces calls and
FUNCTION_VALUE_REGNO_P support.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(insn_is_zext_p): New function.
(add_removable_extension): Add FUNCTION_ARG_REGNO_P abi
interface calls.
* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/zext-elim.C: New testcase.
* g++.target/powerpc/zext-elim-1.C: New testcase.
* g++.target/powerpc/zext-elim-2.C: New testcase.
* g++.target/powerpc/zext-elim-3.C: New testcase.
* g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/common/config/rs6000/rs6000-common.cc |   2 +
 gcc/ree.cc| 655 ++
 gcc/testsuite/g++.target/powerpc/sext-elim.C  |  18 +
 .../g++.target/powerpc/zext-elim-1.C  |  19 +
 .../g++.target/powerpc/zext-elim-2.C  |  11 +
 .../g++.target/powerpc/zext-elim-3.C  |  16 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C  |  30 +
 7 files changed, 606 insertions(+), 145 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-1.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-2.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim-3.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..968db215028 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -34,6 +34,8 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
 { OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Enable -munroll-only-small-loops with -funroll-loops to unroll small
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..8057f0325f4 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,101 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Get all the reaching definitions of an instruction.  The definitions are
+   desired for REG used in INSN.  Return the definition list or NULL if a
+   definition is missing.  If DEST is non-NULL, additionally push the INSN
+   of the definitions onto DEST.  */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg, vec *dest)
+{
+  df_ref use;
+  struct df_link *ref_chain, *ref_link;
+
+  FOR_EACH_INSN_USE (use, insn)
+{
+  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+   return NULL;
+  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+   break;
+}
+
+  if (use == NULL)
+return NULL;
+
+  ref_chain = DF_REF_CHAIN (use);
+
+  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+{
+  /* Problem getting some definition for this instruction.  */
+  if (ref_link->ref == NULL)
+   return NULL;
+  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+   return NULL;
+  /* As global regs are assumed to be defined at each function call
+dataflow can report a call_insn as being a definition of REG.
+But we can't do anything with that in this pass so proceed only
+if the instruction really sets REG in a way that can be deduced
+from the RTL structure.  */
+  if (global_regs[REGNO (reg)]
+ && !set_of 

[PATCH] ree: Improvement of ree pass for rs6000 target.

2023-04-04 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch eliminates unnecessary redundant extension within basic and across 
basic blocks. For rs6000 target we see
redundant zero and sign extension and done to improve ree pass to eliminate 
such redundant zero and sign extension.

Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit


ree: Improvement of ree pass for rs6000 target.

Eliminate unnecessary redundant extension within basic
and across basic blocks. For rs6000 target we see
redundant zero and sign extension and done to improve
ree pass to eliminate such redundant zero and sign
extension.

2023-04-04  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc (eliminate_across_bbs_p): Add checks to enable extension
elimination across and within basic blocks.
(def_arith_p): New function to check definition has arithmetic
operation.
(combine_set_extension): Modification to incorporate AND
and current zero_extend and sign_extend instruction.
(combline_reaching_defs): Add zero_extend and sign_extend.
Add FUNCTION_ARG_REGNO_P abi interfaces calls and
FUNCTION_VALUE_REGNO_P support.
(merge_def_and_ext): Add calls to eliminate_across_bbs_p and
zero_extend sign_extend and AND instruction.
(insn_is_zext_p): New function.
(add_removable_extension): Add FUNCTION_ARG_REGNO_P abi
interface calls.
* common/config/rs6000/rs6000-common.cc: Add REE pass as a
default rs6000 target pass for O2 and above.
testsuite/g++.target/powerpc/zext-elim.C: New testcase.
testsuite/g++.target/powerpc/sext-elim.C: New testcase.
---
 gcc/common/config/rs6000/rs6000-common.cc|   4 +-
 gcc/ree.cc   | 662 ++-
 gcc/testsuite/g++.target/powerpc/sext-elim.C |  18 +
 gcc/testsuite/g++.target/powerpc/zext-elim.C |  30 +
 4 files changed, 563 insertions(+), 151 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..a9f518478a4 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -30,6 +30,8 @@
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options rs6000_option_optimization_table[] =
   {
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Split multi-word types early.  */
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
@@ -38,11 +40,9 @@ static const struct default_options 
rs6000_option_optimization_table[] =
loops at -O2 and above by default.  */
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
 { OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL, 1 },
-
 /* -frename-registers leads to non-optimal codegen and performance
on rs6000, turn it off by default.  */
 { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
-
 /* Double growth factor to counter reduced min jump length.  */
 { OPT_LEVELS_ALL, OPT__param_max_grow_copy_bb_insns_, NULL, 16 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..038bb71baaf 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,102 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Get all the reaching definitions of an instruction.  The definitions are
+   desired for REG used in INSN.  Return the definition list or NULL if a
+   definition is missing.  If DEST is non-NULL, additionally push the INSN
+   of the definitions onto DEST.  */
+
+static struct df_link *
+get_defs (rtx_insn *insn, rtx reg, vec *dest)
+{
+  df_ref use;
+  struct df_link *ref_chain, *ref_link;
+
+  FOR_EACH_INSN_USE (use, insn)
+{
+  if (GET_CODE (DF_REF_REG (use)) == SUBREG)
+   return NULL;
+  if (REGNO (DF_REF_REG (use)) == REGNO (reg))
+   break;
+}
+
+  if (use == NULL)
+return NULL;
+
+  ref_chain = DF_REF_CHAIN (use);
+
+  for (ref_link = ref_chain; ref_link; ref_link = ref_link->next)
+{
+  /* Problem getting some definition for this instruction.  */
+  if (ref_link->ref == NULL)
+   return NULL;
+  if (DF_REF_INSN_INFO (ref_link->ref) == NULL)
+   return NULL;
+  /* As global regs are assumed to be defined at each function call
+dataflow can report a call_insn as being a definition of REG.
+But we can't do anything with that in this pass so proceed only
+if the instruction really sets REG in a way that can be deduced
+from the RTL structure.  */
+  if (global_regs[REGNO (reg)]
+ && !set_of (reg, DF_REF_INSN (ref_link->ref)))
+   

Re: [PATCH v2] rtl-optimization: ppc backend generates unnecessary extension.

2023-03-30 Thread Ajit Agarwal via Gcc-patches
 && GET_CODE (SET_SRC (PATTERN (def_insn))) == ASHIFT)
+   || GET_CODE (PATTERN (def_insn)) == PARALLEL)
+  return false;
+ }
+   XDELETEVEC (dest);
+  }
+
return true;
 }
 
@@ -873,7 +956,8 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, 
ext_state *state)
 
   if (!feasible) return false;
 
-  if (((!copy_needed && (insn_is_zext_p (cand->insn))
+  if (((!copy_needed && (insn_is_zext_p (cand->insn)
+   || (cand->code == ZERO_EXTEND && ext_src_mode == QImode))
&& (GET_MODE (SET_DEST (*sub_rtx)) != ext_src_mode
&& state->modified[INSN_UID (def_insn)].kind == EXT_MODIFIED_NONE))
|| ((state->modified[INSN_UID (def_insn)].kind
@@ -1211,15 +1295,22 @@ combine_reaching_defs (ext_cand *cand, const_rtx 
set_pat, ext_state *state)
 definitions could be merged.  */
   if (apply_change_group ())
 {
-  if (dump_file)
-fprintf (dump_file, "All merges were successful.\n");
+ if (state->modified_list.length() == 0) return false;
+
+ if (cand->code == ZERO_EXTEND
+ && GET_CODE (PATTERN (state->modified_list[0])) == SET
+ && GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != XOR)
+return false;
+
+  if (dump_file)
+fprintf (dump_file, "All merges were successful.\n");
 
  FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
{
  ext_modified *modified = >modified[INSN_UID (def_insn)];
  if (modified->kind == EXT_MODIFIED_NONE)
modified->kind = (cand->code == ZERO_EXTEND  ? EXT_MODIFIED_ZEXT
-   : 
EXT_MODIFIED_SEXT);
+: 
EXT_MODIFIED_SEXT);
 
  if (copy_needed)
modified->do_not_reextend = 1;
@@ -1228,6 +1319,26 @@ combine_reaching_defs (ext_cand *cand, const_rtx 
set_pat, ext_state *state)
 }
else
  {
+   if (state->modified_list.length() == 0) return false;
+
+   if (cand->code == ZERO_EXTEND
+   && GET_CODE (PATTERN(state->modified_list[0])) == SET
+   && GET_CODE (SET_SRC (PATTERN (state->modified_list[0]))) != 
XOR)
+ return false;
+
+   if (cand->code == ZERO_EXTEND)
+ {
+   FOR_EACH_VEC_ELT (state->modified_list, i, def_insn)
+ {
+   ext_modified *modified = >modified[INSN_UID 
(def_insn)];
+   if (modified->kind == EXT_MODIFIED_NONE)
+ modified->kind = (cand->code == ZERO_EXTEND ? 
EXT_MODIFIED_ZEXT
+ : 
EXT_MODIFIED_SEXT);
+
+modified->do_not_reextend = 1;
+  }
+     return true;
+     }
/* Changes need not be cancelled explicitly as apply_change_group
does it.  Print list of definitions in the dump_file for debug
purposes.  This extension cannot be deleted.  */
-- 
2.31.1



On 28/03/23 10:19 pm, Ajit Agarwal via Gcc-patches wrote:
> Hello All:
> 
> This patch makes REE pass as a default pass in rs6000 target. And
> add necessary subroutines to eliminate extensions across basic blocks.
> 
> Bootstrapped and regtested on powerpc64-linu-gnu.
> 
> Thanks & Regards
> Ajit
> 
> 
>   rtl-optimization: ppc backend generates unnecessary
>   extension.
> 
>   Eliminate unnecessary redundant zero extension across basic
>   blocks.
> 
>   2023-03-28  Ajit Kumar Agarwal  
> 
> gcc/ChangeLog:
> 
>   * ree.cc(insn_s_zext_p): New function.
>   * ree.cc(is_feasible_elim_across_basic_blocks):
>   New function.
>   * ree.cc(merge_def_and_ext): Add call to
>   is_feasible_elim_across_basic_blocks to check feasibility
>   of extension elimination across basic blocks.
>   * common/config/rs6000/rs6000-common.cc: Add free pass
>   as default pass in rs6000 target.
> ---
>  gcc/common/config/rs6000/rs6000-common.cc |   3 +-
>  gcc/ree.cc| 269 +-
>  2 files changed, 209 insertions(+), 63 deletions(-)
> 
> diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
> b/gcc/common/config/rs6000/rs6000-common.cc
> index 2140c442ba9..e7780dc0c5d 100644
> --- a/gcc/common/config/rs6000/rs6000-common.cc
> +++ b/gcc/common/config/rs6000/rs6000-common.cc
> @@ -30,6 +30,8 @@
>  /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
>  static const struct default_options rs6000_option_optimization_table[] =
>{
> +/* Enable -f

[PATCH v2] rtl-optimization: ppc backend generates unnecessary extension.

2023-03-28 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch makes REE pass as a default pass in rs6000 target. And
add necessary subroutines to eliminate extensions across basic blocks.

Bootstrapped and regtested on powerpc64-linu-gnu.

Thanks & Regards
Ajit


rtl-optimization: ppc backend generates unnecessary
extension.

Eliminate unnecessary redundant zero extension across basic
blocks.

2023-03-28  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc(insn_s_zext_p): New function.
* ree.cc(is_feasible_elim_across_basic_blocks):
New function.
* ree.cc(merge_def_and_ext): Add call to
is_feasible_elim_across_basic_blocks to check feasibility
of extension elimination across basic blocks.
* common/config/rs6000/rs6000-common.cc: Add free pass
as default pass in rs6000 target.
---
 gcc/common/config/rs6000/rs6000-common.cc |   3 +-
 gcc/ree.cc| 269 +-
 2 files changed, 209 insertions(+), 63 deletions(-)

diff --git a/gcc/common/config/rs6000/rs6000-common.cc 
b/gcc/common/config/rs6000/rs6000-common.cc
index 2140c442ba9..e7780dc0c5d 100644
--- a/gcc/common/config/rs6000/rs6000-common.cc
+++ b/gcc/common/config/rs6000/rs6000-common.cc
@@ -30,6 +30,8 @@
 /* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
 static const struct default_options rs6000_option_optimization_table[] =
   {
+/* Enable -free for zero extension and sign extension elimination.*/
+{ OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
 /* Split multi-word types early.  */
 { OPT_LEVELS_ALL, OPT_fsplit_wide_types_early, NULL, 1 },
 /* Enable -fsched-pressure for first pass instruction scheduling.  */
@@ -42,7 +44,6 @@ static const struct default_options 
rs6000_option_optimization_table[] =
 /* -frename-registers leads to non-optimal codegen and performance
on rs6000, turn it off by default.  */
 { OPT_LEVELS_ALL, OPT_frename_registers, NULL, 0 },
-
 /* Double growth factor to counter reduced min jump length.  */
 { OPT_LEVELS_ALL, OPT__param_max_grow_copy_bb_insns_, NULL, 16 },
 { OPT_LEVELS_NONE, 0, NULL, 0 }
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 63d8cf9f237..d05d37f9a23 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -253,6 +253,53 @@ struct ext_cand
 
 static int max_insn_uid;
 
+/* Identify instruction AND with identical zero extension.  */
+
+static unsigned int
+insn_is_zext_p(rtx insn)
+{
+  if (GET_CODE (insn) == AND)
+   {
+ rtx set = XEXP (insn, 0);
+ if (REG_P(set))
+   {
+if (CONST_INT_P (XEXP (insn, 1))
+&& INTVAL (XEXP (insn, 1)) == 1)
+  return 1;
+   }
+ else
+   return 0;
+   }
+
+  return 0;
+}
+
+/* Identify instruction AND with identical zero extension.  */
+
+static unsigned int
+insn_is_zext_p(rtx_insn * insn)
+{
+  rtx body = PATTERN (insn);
+
+  if (GET_CODE (body) == PARALLEL) return 0;
+
+  if (GET_CODE(body) == SET && GET_CODE (SET_SRC (body)) == AND)
+   {
+ rtx set = XEXP (SET_SRC(body), 0);
+
+ if (REG_P(set) && GET_MODE(SET_DEST(body))
+== GET_MODE(set))
+   {
+if (CONST_INT_P (XEXP (SET_SRC (body), 1))
+ && INTVAL (XEXP (SET_SRC (body), 1)) == 1)
+  return 1;
+   }
+ else
+   return 0;
+   }
+   return 0;
+}
+
 /* Update or remove REG_EQUAL or REG_EQUIV notes for INSN.  */
 
 static bool
@@ -297,6 +344,31 @@ update_reg_equal_equiv_notes (rtx_insn *insn, machine_mode 
new_mode,
   return true;
 }
 
+/* Return true if INSN is
+ (SET (reg REGNO (def_reg)) (if_then_else (cond) (REG x1) (REG x2)))
+   and store x1 and x2 in REG_1 and REG_2.  */
+
+static bool
+is_cond_copy_insn (rtx_insn *insn, rtx *reg1, rtx *reg2)
+{
+  rtx expr = single_set (insn);
+
+  if (expr != NULL_RTX
+  && GET_CODE (expr) == SET
+  && GET_CODE (SET_DEST (expr)) == REG
+  && GET_CODE (SET_SRC (expr))  == IF_THEN_ELSE
+  && GET_CODE (XEXP (SET_SRC (expr), 1)) == REG
+  && GET_CODE (XEXP (SET_SRC (expr), 2)) == REG)
+{
+  *reg1 = XEXP (SET_SRC (expr), 1);
+  *reg2 = XEXP (SET_SRC (expr), 2);
+  return true;
+}
+
+  return false;
+}
+
+
 /* Given a insn (CURR_INSN), an extension candidate for removal (CAND)
and a pointer to the SET rtx (ORIG_SET) that needs to be modified,
this code modifies the SET rtx to a new SET rtx that extends the
@@ -321,6 +393,9 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
   rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
+  if (insn_is_zext_p(cand->insn)
+   && CONST_INT_P (orig_src) && INTVAL (orig_src) != 0)
+return false;
 
   /* If the extension's source/destination registers are not the same
  then we need to change the original load to reference the destination
@@ -359,8 +434,14 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   

Re: [PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.

2023-03-23 Thread Ajit Agarwal via Gcc-patches



On 23/03/23 7:17 pm, Jeff Law wrote:
> 
> 
> On 3/23/23 04:38, Ajit Agarwal wrote:
>>
>> Hello All:
>>
>> This patch removed unnecessary signed extension elimination in ree pass.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>>
>> Thanks & Regards
>> Ajit
>>
>> rtl-optimization: ppc backend generates unnecessary signed extension.
>>
>> Eliminate unnecessary redundant signed extension.
>>
>> 2023-03-23  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * ree.cc: Modification for  AND opcode support to eliminate
>> unnecessary signed extension.
>> * testsuite/g++.target/powerpc/sext-elim.C: New tests.
> Just a note.  I'll look at this once the trunk is open for gcc-14 
> development.  It's really not appropriate for gcc-13.

Thanks Jeff.
> 
> jeff


Re: [PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.

2023-03-23 Thread Ajit Agarwal via Gcc-patches
Hello Peter:

On 23/03/23 6:08 pm, Peter Bergner wrote:
> On 3/23/23 5:38 AM, Ajit Agarwal wrote:
>> This patch removed unnecessary signed extension elimination in ree pass.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>>
>> Thanks & Regards
>> Ajit
>>
>>  rtl-optimization: ppc backend generates unnecessary signed extension.
>>
>>  Eliminate unnecessary redundant signed extension.
>>
>>  2023-03-23  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>>  * ree.cc: Modification for  AND opcode support to eliminate
>>  unnecessary signed extension.
>>  * testsuite/g++.target/powerpc/sext-elim.C: New tests.
> 
> Not a review of the patch, but we talked offline about other bugzillas
> regarding unnecessary sign and zero extensions.  Doing a quick scan, I
> see the following bugs.  Please have a look at 1) whether these are
> still a problem with unpatched trunk, and if they are, 2) whether your
> patch fixes them or could fix them.  Thanks.
> 
> https://gcc.gnu.org/PR41742

These are not addressed in the trunk patch, because int c is not initialized 
with registers and for this reason we cannot eliminate them. If we initialize 
int c then zero extension goes away.

> https://gcc.gnu.org/PR65010
> https://gcc.gnu.org/PR82940
> https://gcc.gnu.org/PR107949
>

My patch fixes these PR's which were not fixed in trunk patch.

Thanks & Regards
Ajit
 
> Peter
> 


Re: [PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.

2023-03-23 Thread Ajit Agarwal via Gcc-patches
Hello Peter:

On 23/03/23 6:08 pm, Peter Bergner wrote:
> On 3/23/23 5:38 AM, Ajit Agarwal wrote:
>> This patch removed unnecessary signed extension elimination in ree pass.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>
>>
>> Thanks & Regards
>> Ajit
>>
>>  rtl-optimization: ppc backend generates unnecessary signed extension.
>>
>>  Eliminate unnecessary redundant signed extension.
>>
>>  2023-03-23  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>>  * ree.cc: Modification for  AND opcode support to eliminate
>>  unnecessary signed extension.
>>  * testsuite/g++.target/powerpc/sext-elim.C: New tests.
> 
> Not a review of the patch, but we talked offline about other bugzillas
> regarding unnecessary sign and zero extensions.  Doing a quick scan, I
> see the following bugs.  Please have a look at 1) whether these are
> still a problem with unpatched trunk, and if they are, 2) whether your
> patch fixes them or could fix them.  Thanks.
> 
> https://gcc.gnu.org/PR41742

These are not addressed in the trunk patch, because int c is not initialized 
with registers and for this reason we cannot eliminate them. If we initialize 
int c then zero extension goes away.

> https://gcc.gnu.org/PR65010
> https://gcc.gnu.org/PR82940
> https://gcc.gnu.org/PR107949
>

My patch fixes these PR's which were not fixed in trunk patch.

Thanks & Regards
Ajit
 
> Peter
> 


[PATCH] rtl-optimization: ppc backend generates unnecessary signed extension.

2023-03-23 Thread Ajit Agarwal via Gcc-patches


Hello All:

This patch removed unnecessary signed extension elimination in ree pass.
Bootstrapped and regtested on powerpc64-linux-gnu.


Thanks & Regards
Ajit

rtl-optimization: ppc backend generates unnecessary signed extension.

Eliminate unnecessary redundant signed extension.

2023-03-23  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc: Modification for  AND opcode support to eliminate
unnecessary signed extension.
* testsuite/g++.target/powerpc/sext-elim.C: New tests.
---
 gcc/ree.cc   | 24 +---
 gcc/testsuite/g++.target/powerpc/sext-elim.C | 19 
 2 files changed, 40 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/powerpc/sext-elim.C

diff --git a/gcc/ree.cc b/gcc/ree.cc
index d09f55149b1..63d8cf9f237 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -364,6 +364,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
   rtx simplified_temp_extension = simplify_rtx (temp_extension);
   if (simplified_temp_extension)
 temp_extension = simplified_temp_extension;
+
   new_set = gen_rtx_SET (new_reg, temp_extension);
 }
   else if (GET_CODE (orig_src) == IF_THEN_ELSE)
@@ -375,11 +376,21 @@ combine_set_extension (ext_cand *cand, rtx_insn 
*curr_insn, rtx *orig_set)
   else
 {
   /* This is the normal case.  */
-  rtx temp_extension
-   = gen_rtx_fmt_e (cand->code, cand->mode, orig_src);
+  rtx temp_extension = NULL_RTX;
+
+  if (GET_CODE (SET_SRC (cand_pat)) == AND)
+   temp_extension
+   = gen_rtx_fmt_ee (cand->code, cand->mode,orig_src,
+ XEXP (SET_SRC (cand_pat), 1));
+  else
+   temp_extension
+   = gen_rtx_fmt_e (cand->code, cand->mode,orig_src);
+
   rtx simplified_temp_extension = simplify_rtx (temp_extension);
+
   if (simplified_temp_extension)
 temp_extension = simplified_temp_extension;
+
   new_set = gen_rtx_SET (new_reg, temp_extension);
 }
 
@@ -1047,7 +1058,14 @@ combine_reaching_defs (ext_cand *cand, const_rtx 
set_pat, ext_state *state)
 cannot be merged, we entirely give up.  In the future, we should allow
 extensions to be partially eliminated along those paths where the
 definitions could be merged.  */
-  if (apply_change_group ())
+   int num_clobbers = 0;
+   int icode = recog (cand->insn, cand->insn,
+ (GET_CODE (cand->expr) == SET
+  && ! reload_completed
+  && ! reload_in_progress)
+  ? _clobbers : 0);
+
+  if (apply_change_group () || (icode < 0))
 {
   if (dump_file)
 fprintf (dump_file, "All merges were successful.\n");
diff --git a/gcc/testsuite/g++.target/powerpc/sext-elim.C 
b/gcc/testsuite/g++.target/powerpc/sext-elim.C
new file mode 100644
index 000..1180b9ce268
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/sext-elim.C
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 -free" } */
+
+unsigned long c2l(unsigned char* p)
+{
+  unsigned long res = *p + *(p+1);
+  return res;
+}
+
+long c2sl(signed char* p)
+{
+  long res = *p + *(p+1);
+  return res;
+}
+
+/* { dg-final { scan-assembler-not "rldicl" } } */
+/* { dg-final { scan-assembler-not "extsw" } } */
-- 
2.31.1



[PATCH testsuite] rs6000: suboptimal code for returning bool value on target ppc.

2023-03-19 Thread Ajit Agarwal via Gcc-patches

Hello All:

This patch add new test to check unnecessary zero extension removal.
Regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: suboptimal code for returning bool value on target ppc.

Tests to check unnecessary redundant zero extension removal.

2023-03-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* testsuite/g++.target/powerpc/zext-elim.C: New test.
---
 gcc/testsuite/g++.target/powerpc/zext-elim.C | 30 
 1 file changed, 30 insertions(+)
 create mode 100644 gcc/testsuite/g++.target/powerpc/zext-elim.C

diff --git a/gcc/testsuite/g++.target/powerpc/zext-elim.C 
b/gcc/testsuite/g++.target/powerpc/zext-elim.C
new file mode 100644
index 000..56eabbe0c19
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/zext-elim.C
@@ -0,0 +1,30 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mcpu=power9 -O2 -free" } */
+
+#include 
+
+bool foo (int a, int b)
+{
+  if (a > 2)
+return false;
+
+  if (b < 10)
+return true;
+
+  return true;
+}
+
+int bar (int a, int b)
+{
+  if (a > 2)
+return 0;
+
+  if (b < 10)
+return 1;
+
+  return 0;
+}
+
+/* { dg-final { scan-assembler-not "rldicl" } } */
-- 
2.31.1



[PATCH v2] rs6000: suboptimal code for returning bool value on target ppc

2023-03-19 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch eliminates unncessary zero extension with ree pass.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: suboptimal code for returning bool value on 
target ppc.

Eliminate unnecessary redundantzero extension.

2023-03-19  Ajit Kumar Agarwal  

gcc/ChangeLog:

* ree.cc: Add support of AND opcode to eliminate unnecessary
zero extension.
---
 gcc/ree.cc | 22 +-
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..d09f55149b1 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -319,7 +319,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   rtx orig_src = SET_SRC (*orig_set);
   machine_mode orig_mode = GET_MODE (SET_DEST (*orig_set));
-  rtx new_set;
+  rtx new_set = NULL_RTX;
   rtx cand_pat = single_set (cand->insn);
 
   /* If the extension's source/destination registers are not the same
@@ -370,7 +370,7 @@ combine_set_extension (ext_cand *cand, rtx_insn *curr_insn, 
rtx *orig_set)
 {
   /* Only IF_THEN_ELSE of phi-type copies are combined.  Otherwise,
  in general, IF_THEN_ELSE should not be combined.  */
-  return false;
+  return true;
 }
   else
 {
@@ -713,12 +713,15 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, 
ext_state *state)
   if (sub_rtx == NULL)
 return false;
 
-  if (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
+  bool copy_needed
+= (REGNO (SET_DEST (cand->expr)) != REGNO (XEXP (SET_SRC (cand->expr), 
0)));
+
+  if (!copy_needed || (GET_MODE (SET_DEST (*sub_rtx)) == ext_src_mode
  || ((state->modified[INSN_UID (def_insn)].kind
-  == (cand->code == ZERO_EXTEND
+  == (cand->code == ZERO_EXTEND || cand->code == AND
   ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT))
  && state->modified[INSN_UID (def_insn)].mode
-== ext_src_mode))
+== ext_src_mode)))
 {
   if (GET_MODE_UNIT_SIZE (GET_MODE (SET_DEST (*sub_rtx)))
  >= GET_MODE_UNIT_SIZE (cand->mode))
@@ -744,7 +747,8 @@ merge_def_and_ext (ext_cand *cand, rtx_insn *def_insn, 
ext_state *state)
 static inline rtx
 get_extended_src_reg (rtx src)
 {
-  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND)
+  while (GET_CODE (src) == SIGN_EXTEND || GET_CODE (src) == ZERO_EXTEND
+   || GET_CODE (src) == AND)
 src = XEXP (src, 0);
   gcc_assert (REG_P (src));
   return src;
@@ -993,7 +997,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
   machine_mode mode;
 
   if (state->modified[INSN_UID (cand->insn)].kind
- != (cand->code == ZERO_EXTEND
+ != (cand->code == ZERO_EXTEND || cand->code == AND
  ? EXT_MODIFIED_ZEXT : EXT_MODIFIED_SEXT)
  || state->modified[INSN_UID (cand->insn)].mode != cand->mode
  || (set == NULL_RTX))
@@ -1052,7 +1056,7 @@ combine_reaching_defs (ext_cand *cand, const_rtx set_pat, 
ext_state *state)
{
  ext_modified *modified = >modified[INSN_UID (def_insn)];
  if (modified->kind == EXT_MODIFIED_NONE)
-   modified->kind = (cand->code == ZERO_EXTEND ? EXT_MODIFIED_ZEXT
+   modified->kind = (cand->code == ZERO_EXTEND || cand->code == 
AND  ? EXT_MODIFIED_ZEXT
: 
EXT_MODIFIED_SEXT);
 
  if (copy_needed)
@@ -1106,7 +1110,7 @@ add_removable_extension (const_rtx expr, rtx_insn *insn,
   mode = GET_MODE (dest);
 
   if (REG_P (dest)
-  && (code == SIGN_EXTEND || code == ZERO_EXTEND)
+  && (code == SIGN_EXTEND || code == ZERO_EXTEND || code == AND)
   && REG_P (XEXP (src, 0)))
 {
   rtx reg = XEXP (src, 0);
-- 
2.31.1



Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-17 Thread Ajit Agarwal via Gcc-patches
Hello Jeff:

On 16/03/23 8:18 pm, Jeff Law wrote:
> 
> 
> On 3/16/23 04:11, Ajit Agarwal via Gcc-patches wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
>>>>
>>>>
>>>>
>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
>>>>> wrote:
>>>>>>
>>>>>> Hello Richard:
>>>>>>
>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>>  wrote:
>>>>>>>>
>>>>>>>> Hello All:
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch eliminates unnecessary zero extension instruction from 
>>>>>>>> power generated assembly.
>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>
>>>>>>> What makes this so special that we cannot deal with it from generic 
>>>>>>> code?
>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>> knowledge neccessary
>>>>>>> to eliminate the extension?
>>>>>>>
>>>>>>
>>>>>> For returning bool values and comparision with integers generates the 
>>>>>> following by all the rtl passes.
>>>>>>
>>>>>> set compare (subreg)
>>>>>> set if_then_else
>>>>>> Convert SImode -> QImode
>>>>>> set zero_extend to SImode from QImode
>>>>>> set return value 0 in one path of cfg.
>>>>>> set return value 1 in other path of cfg.
>>>>>>
>>>>>> This pass replaces the above zero extension and conversion from QImode 
>>>>>> to DImode with copy operation to keep QImode in 64 bit registers in 
>>>>>> powerpc target.
>>>>>
>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>> cannot even try to see what the actual RTL
>>>>> looks like (without the pass).
>>>>>
>>>>
>>>> Here is the PR with bugzilla.
>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>
>>>> I can add the attached testcase with this PR in the patch.
>>>
>>> I don't see any zero-extends there.
>>>
>>
>> Here is the testcase.
>>
>>
>> bool (int a, int b)
>> {
>>    if (a > 2)
>>    return false;
>>     if (b < 10)
>>     return true;
>>   return false;
>> }
>>
>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>
>> Here is the rtl after cse.
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (insn 15 12 16 3 (set (reg:CC 123)
>>  (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>  (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>   (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>  (nil)))
>> (insn 16 15 17 3 (set (reg:SI 124)
>>  (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>   (nil))
>> (insn 17 16 18 3 (set (reg:SI 122)
>>  (if_then_else:SI (gt (reg:CC 123)
>>  (const_int 0 [0]))
>>  (const_int 0 [0])
>>  (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>   (expr_list:REG_DEAD (reg:SI 124)
>>  (expr_list:REG_DEAD (reg:CC 123)
>>  (nil
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>>  (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>   (expr_list:REG_DEAD (reg:SI 122)
>>  (nil)))
>>    ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>>  (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>   (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>>  (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
>> {zero_extendqidi2}
>&

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 4:26 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:43 AM Ajit Agarwal  wrote:
>>
>>
>>
>> On 16/03/23 4:00 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  
>>> wrote:
>>>>
>>>>
>>>> Hello Richard:
>>>>
>>>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  
>>>>> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
>>>>>>> wrote:
>>>>>>>>
>>>>>>>> Hello Richard:
>>>>>>>>
>>>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>>>>  wrote:
>>>>>>>>>>
>>>>>>>>>> Hello All:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> This patch eliminates unnecessary zero extension instruction from 
>>>>>>>>>> power generated assembly.
>>>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>>>
>>>>>>>>> What makes this so special that we cannot deal with it from generic 
>>>>>>>>> code?
>>>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>>>> knowledge neccessary
>>>>>>>>> to eliminate the extension?
>>>>>>>>>
>>>>>>>>
>>>>>>>> For returning bool values and comparision with integers generates the 
>>>>>>>> following by all the rtl passes.
>>>>>>>>
>>>>>>>> set compare (subreg)
>>>>>>>> set if_then_else
>>>>>>>> Convert SImode -> QImode
>>>>>>>> set zero_extend to SImode from QImode
>>>>>>>> set return value 0 in one path of cfg.
>>>>>>>> set return value 1 in other path of cfg.
>>>>>>>>
>>>>>>>> This pass replaces the above zero extension and conversion from QImode 
>>>>>>>> to DImode with copy operation to keep QImode in 64 bit registers in 
>>>>>>>> powerpc target.
>>>>>>>
>>>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>>>> cannot even try to see what the actual RTL
>>>>>>> looks like (without the pass).
>>>>>>>
>>>>>>
>>>>>> Here is the PR with bugzilla.
>>>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>>>
>>>>>> I can add the attached testcase with this PR in the patch.
>>>>>
>>>>> I don't see any zero-extends there.
>>>>>
>>>>
>>>> Here is the testcase.
>>>>
>>>>
>>>> bool (int a, int b)
>>>> {
>>>>   if (a > 2)
>>>>   return false;
>>>>if (b < 10)
>>>>return true;
>>>>  return false;
>>>> }
>>>>
>>>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>>>
>>>> Here is the rtl after cse.
>>>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>>>> (insn 15 12 16 3 (set (reg:CC 123)
>>>> (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>>>> (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>>>  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>>>> (nil)))
>>>> (insn 16 15 17 3 (set (reg:SI 124)
>>>> (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>>>  (nil))
>>>> (insn 17 16 18 3 (set (reg:SI 122)
>>>> (if_then_else:SI (gt (reg:CC 123)
>>>> (const_int 0 [0]))
>>>> (const_int 0 [0])
>>>> (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>>>  (expr_list:REG_DEAD (reg:SI 1

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 4:00 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 11:12 AM Ajit Agarwal  wrote:
>>
>>
>> Hello Richard:
>>
>> On 16/03/23 3:22 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
>>>>
>>>>
>>>>
>>>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  
>>>>> wrote:
>>>>>>
>>>>>> Hello Richard:
>>>>>>
>>>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>>>  wrote:
>>>>>>>>
>>>>>>>> Hello All:
>>>>>>>>
>>>>>>>>
>>>>>>>> This patch eliminates unnecessary zero extension instruction from 
>>>>>>>> power generated assembly.
>>>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>>>
>>>>>>> What makes this so special that we cannot deal with it from generic 
>>>>>>> code?
>>>>>>> In particular we do have the REE pass, why is target specific
>>>>>>> knowledge neccessary
>>>>>>> to eliminate the extension?
>>>>>>>
>>>>>>
>>>>>> For returning bool values and comparision with integers generates the 
>>>>>> following by all the rtl passes.
>>>>>>
>>>>>> set compare (subreg)
>>>>>> set if_then_else
>>>>>> Convert SImode -> QImode
>>>>>> set zero_extend to SImode from QImode
>>>>>> set return value 0 in one path of cfg.
>>>>>> set return value 1 in other path of cfg.
>>>>>>
>>>>>> This pass replaces the above zero extension and conversion from QImode 
>>>>>> to DImode with copy operation to keep QImode in 64 bit registers in 
>>>>>> powerpc target.
>>>>>
>>>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>>>> cannot even try to see what the actual RTL
>>>>> looks like (without the pass).
>>>>>
>>>>
>>>> Here is the PR with bugzilla.
>>>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>>>
>>>> I can add the attached testcase with this PR in the patch.
>>>
>>> I don't see any zero-extends there.
>>>
>>
>> Here is the testcase.
>>
>>
>> bool (int a, int b)
>> {
>>   if (a > 2)
>>   return false;
>>if (b < 10)
>>return true;
>>  return false;
>> }
>>
>> compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.
>>
>> Here is the rtl after cse.
>> (note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
>> (insn 15 12 16 3 (set (reg:CC 123)
>> (compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
>> (const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
>>  (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
>> (nil)))
>> (insn 16 15 17 3 (set (reg:SI 124)
>> (const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
>>  (nil))
>> (insn 17 16 18 3 (set (reg:SI 122)
>> (if_then_else:SI (gt (reg:CC 123)
>> (const_int 0 [0]))
>> (const_int 0 [0])
>> (reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
>>  (expr_list:REG_DEAD (reg:SI 124)
>> (expr_list:REG_DEAD (reg:CC 123)
>> (nil
>> (insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
>> (subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
>>  (expr_list:REG_DEAD (reg:SI 122)
>> (nil)))
>>   ; pc falls through to BB 5
>> (code_label 32 18 31 4 3 (nil) [1 uses])
>> (note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
>> (insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
>> (const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
>>  (nil))
>> (code_label 19 5 20 5 2 (nil) [0 uses])
>> (note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
>> (insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
>> (zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 
>> {zero_extendqidi2}
>>  (expr_list:REG_

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches


Hello Richard:

On 16/03/23 3:22 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:19 AM Ajit Agarwal  wrote:
>>
>>
>>
>> On 16/03/23 1:44 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:
>>>>
>>>> Hello Richard:
>>>>
>>>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>>>  wrote:
>>>>>>
>>>>>> Hello All:
>>>>>>
>>>>>>
>>>>>> This patch eliminates unnecessary zero extension instruction from power 
>>>>>> generated assembly.
>>>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>>>
>>>>> What makes this so special that we cannot deal with it from generic code?
>>>>> In particular we do have the REE pass, why is target specific
>>>>> knowledge neccessary
>>>>> to eliminate the extension?
>>>>>
>>>>
>>>> For returning bool values and comparision with integers generates the 
>>>> following by all the rtl passes.
>>>>
>>>> set compare (subreg)
>>>> set if_then_else
>>>> Convert SImode -> QImode
>>>> set zero_extend to SImode from QImode
>>>> set return value 0 in one path of cfg.
>>>> set return value 1 in other path of cfg.
>>>>
>>>> This pass replaces the above zero extension and conversion from QImode to 
>>>> DImode with copy operation to keep QImode in 64 bit registers in powerpc 
>>>> target.
>>>
>>> Sorry, I can't parse that - as there's no testcase with the patch I
>>> cannot even try to see what the actual RTL
>>> looks like (without the pass).
>>>
>>
>> Here is the PR with bugzilla.
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784
>>
>> I can add the attached testcase with this PR in the patch.
> 
> I don't see any zero-extends there.
>

Here is the testcase.


bool (int a, int b)
{ 
  if (a > 2)
  return false;
   if (b < 10)
   return true;
 return false;
}

compiled with gcc -O3 -m64 testcase.cc -mcpu=power9 -save-temps.

Here is the rtl after cse.
(note 12 11 15 3 [bb 3] NOTE_INSN_BASIC_BLOCK)
(insn 15 12 16 3 (set (reg:CC 123)
(compare:CC (subreg/s/u:SI (reg/v:DI 120 [ b ]) 0)
(const_int 9 [0x9]))) "ext.cc":5:5 796 {*cmpsi_signed}
 (expr_list:REG_DEAD (reg/v:DI 120 [ b ])
(nil)))
(insn 16 15 17 3 (set (reg:SI 124)
(const_int 1 [0x1])) "ext.cc":5:5 555 {*movsi_internal1}
 (nil))
(insn 17 16 18 3 (set (reg:SI 122)
(if_then_else:SI (gt (reg:CC 123)
(const_int 0 [0]))
(const_int 0 [0])
(reg:SI 124))) "ext.cc":5:5 344 {isel_cc_si}
 (expr_list:REG_DEAD (reg:SI 124)
(expr_list:REG_DEAD (reg:CC 123)
(nil
(insn 18 17 32 3 (set (reg:QI 117 [ _1 ])
(subreg:QI (reg:SI 122) 0)) "ext.cc":5:5 562 {*movqi_internal}
 (expr_list:REG_DEAD (reg:SI 122)
(nil)))
  ; pc falls through to BB 5
(code_label 32 18 31 4 3 (nil) [1 uses])
(note 31 32 5 4 [bb 4] NOTE_INSN_BASIC_BLOCK)
(insn 5 31 19 4 (set (reg:QI 117 [ _1 ])
(const_int 0 [0])) "ext.cc":4:16 562 {*movqi_internal}
 (nil))
(code_label 19 5 20 5 2 (nil) [0 uses])
(note 20 19 21 5 [bb 5] NOTE_INSN_BASIC_BLOCK)
(insn 21 20 22 5 (set (reg:DI 126 [ _1 ])
(zero_extend:DI (reg:QI 117 [ _1 ]))) "ext.cc":8:1 5 {zero_extendqidi2}
 (expr_list:REG_DEAD (reg:QI 117 [ _1 ])
(nil)))
(insn 22 21 26 5 (set (reg:DI 118 [  ])
(reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 126 [ _1 ])
(nil)))
(insn 26 22 27 5 (set (reg/i:DI 3 3)
(reg:DI 126 [ _1 ])) "ext.cc":8:1 681 {*movdi_internal64}
 (expr_list:REG_DEAD (reg:DI 118 [  ])
(nil)))
(insn 27 26 0 5 (use (reg/i:DI 3 3)) "ext.cc":8:1 -1
 (nil))


Thanks & Regards
Ajit
 
>> Thanks & Regards
>> Ajit
>>> Richard.
>>>
>>>> Thanks & Regards
>>>> Ajit
>>>>>> +  In cfgexpand pass QImode is generated with
>>>>>> +  bool register value and this pass uses QI
>>>>>> +  as 64 bit registers.
>>>>>> +
>>>>
>>>>>> rs6000: suboptimal code for returning bool value on target ppc.
>>>>>>
>>>>>> New pass to eliminate unnecessary zer

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches



On 16/03/23 1:44 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 9:11 AM Ajit Agarwal  wrote:
>>
>> Hello Richard:
>>
>> On 16/03/23 1:10 pm, Richard Biener wrote:
>>> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>>>  wrote:
>>>>
>>>> Hello All:
>>>>
>>>>
>>>> This patch eliminates unnecessary zero extension instruction from power 
>>>> generated assembly.
>>>> Bootstrapped and regtested on powerpc64-linux-gnu.
>>>
>>> What makes this so special that we cannot deal with it from generic code?
>>> In particular we do have the REE pass, why is target specific
>>> knowledge neccessary
>>> to eliminate the extension?
>>>
>>
>> For returning bool values and comparision with integers generates the 
>> following by all the rtl passes.
>>
>> set compare (subreg)
>> set if_then_else
>> Convert SImode -> QImode
>> set zero_extend to SImode from QImode
>> set return value 0 in one path of cfg.
>> set return value 1 in other path of cfg.
>>
>> This pass replaces the above zero extension and conversion from QImode to 
>> DImode with copy operation to keep QImode in 64 bit registers in powerpc 
>> target.
> 
> Sorry, I can't parse that - as there's no testcase with the patch I
> cannot even try to see what the actual RTL
> looks like (without the pass).
> 

Here is the PR with bugzilla. 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103784

I can add the attached testcase with this PR in the patch.

Thanks & Regards
Ajit 
> Richard.
> 
>> Thanks & Regards
>> Ajit
>>>> +  In cfgexpand pass QImode is generated with
>>>> +  bool register value and this pass uses QI
>>>> +  as 64 bit registers.
>>>> +
>>
>>>> rs6000: suboptimal code for returning bool value on target ppc.
>>>>
>>>> New pass to eliminate unnecessary zero extension. This pass
>>>> is registered after cse rtl pass.
>>>>
>>>> 2023-03-16  Ajit Kumar Agarwal  
>>>>
>>>> gcc/ChangeLog:
>>>>
>>>> * config/rs6000/rs6000-passes.def: Registered zero elimination
>>>> pass.
>>>> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>>>> * config.gcc: Add new executable.
>>>> * config/rs6000/rs6000-protos.h: Add new prototype for zero
>>>> elimination pass.
>>>> * config/rs6000/rs6000.cc: Add new prototype for zero
>>>> elimination pass.
>>>> * config/rs6000/t-rs6000: Add new rule.
>>>> * expr.cc: Modified gcc assert.
>>>> * explow.cc: Modified gcc assert.
>>>> * optabs.cc: Modified gcc assert.
>>>> ---
>>>>  gcc/config.gcc|   4 +-
>>>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>>>  gcc/config/rs6000/rs6000-protos.h |   1 +
>>>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>>>>  gcc/config/rs6000/rs6000.cc   |   2 +
>>>>  gcc/config/rs6000/t-rs6000|   5 +
>>>>  gcc/explow.cc |   3 +-
>>>>  gcc/expr.cc   |   4 +-
>>>>  gcc/optabs.cc |   3 +-
>>>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>>>
>>>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>>>> index da3a6d3ba1f..e8ac9d882f0 100644
>>>> --- a/gcc/config.gcc
>>>> +++ b/gcc/config.gcc
>>>> @@ -503,7 +503,7 @@ or1k*-*-*)
>>>> ;;
>>>>  powerpc*-*-*)
>>>> cpu_type=rs6000
>>>> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>>>> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>>>> rs6000-logue.o"
>>>> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>>>> extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>>>> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>>>> @@ -538,7 +538,7 @@ riscv*)
>>>> ;;
>>>>  rs6000*-*-*)
>>>> extra_options="${extra_options} g.opt fused-madd.opt 
>>>> rs6000

Re: [PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-16 Thread Ajit Agarwal via Gcc-patches
Hello Richard:

On 16/03/23 1:10 pm, Richard Biener wrote:
> On Thu, Mar 16, 2023 at 6:21 AM Ajit Agarwal via Gcc-patches
>  wrote:
>>
>> Hello All:
>>
>>
>> This patch eliminates unnecessary zero extension instruction from power 
>> generated assembly.
>> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> What makes this so special that we cannot deal with it from generic code?
> In particular we do have the REE pass, why is target specific
> knowledge neccessary
> to eliminate the extension?
>

For returning bool values and comparision with integers generates the following 
by all the rtl passes.
 
set compare (subreg)
set if_then_else
Convert SImode -> QImode
set zero_extend to SImode from QImode
set return value 0 in one path of cfg.
set return value 1 in other path of cfg.

This pass replaces the above zero extension and conversion from QImode to 
DImode with copy operation to keep QImode in 64 bit registers in powerpc target.

Thanks & Regards
Ajit
>> +  In cfgexpand pass QImode is generated with
>> +  bool register value and this pass uses QI
>> +  as 64 bit registers.
>> +

>> rs6000: suboptimal code for returning bool value on target ppc.
>>
>> New pass to eliminate unnecessary zero extension. This pass
>> is registered after cse rtl pass.
>>
>> 2023-03-16  Ajit Kumar Agarwal  
>>
>> gcc/ChangeLog:
>>
>> * config/rs6000/rs6000-passes.def: Registered zero elimination
>> pass.
>> * config/rs6000/rs6000-zext-elim.cc: Add new pass.
>> * config.gcc: Add new executable.
>> * config/rs6000/rs6000-protos.h: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/rs6000.cc: Add new prototype for zero
>> elimination pass.
>> * config/rs6000/t-rs6000: Add new rule.
>> * expr.cc: Modified gcc assert.
>> * explow.cc: Modified gcc assert.
>> * optabs.cc: Modified gcc assert.
>> ---
>>  gcc/config.gcc|   4 +-
>>  gcc/config/rs6000/rs6000-passes.def   |   2 +
>>  gcc/config/rs6000/rs6000-protos.h |   1 +
>>  gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
>>  gcc/config/rs6000/rs6000.cc   |   2 +
>>  gcc/config/rs6000/t-rs6000|   5 +
>>  gcc/explow.cc |   3 +-
>>  gcc/expr.cc   |   4 +-
>>  gcc/optabs.cc |   3 +-
>>  9 files changed, 379 insertions(+), 6 deletions(-)
>>  create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc
>>
>> diff --git a/gcc/config.gcc b/gcc/config.gcc
>> index da3a6d3ba1f..e8ac9d882f0 100644
>> --- a/gcc/config.gcc
>> +++ b/gcc/config.gcc
>> @@ -503,7 +503,7 @@ or1k*-*-*)
>> ;;
>>  powerpc*-*-*)
>> cpu_type=rs6000
>> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>> rs6000-logue.o"
>> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>> extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
>> extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
>> @@ -538,7 +538,7 @@ riscv*)
>> ;;
>>  rs6000*-*-*)
>> extra_options="${extra_options} g.opt fused-madd.opt 
>> rs6000/rs6000-tables.opt"
>> -   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
>> +   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
>> rs6000-logue.o"
>> extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
>> target_gtfiles="$target_gtfiles 
>> \$(srcdir)/config/rs6000/rs6000-logue.cc 
>> \$(srcdir)/config/rs6000/rs6000-call.cc"
>> target_gtfiles="$target_gtfiles 
>> \$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
>> diff --git a/gcc/config/rs6000/rs6000-passes.def 
>> b/gcc/config/rs6000/rs6000-passes.def
>> index ca899d5f7af..d7500feddf1 100644
>> --- a/gcc/config/rs6000/rs6000-passes.def
>> +++ b/gcc/config/rs6000/rs6000-passes.def
>> @@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
>>   The power8 does not have instructions that automaticaly do the byte 
>> swaps
>>   for loads and stores.  */
>>INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
>> +  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
>> +
>>
>>  

[PATCH] rs6000: suboptimal code for returning bool value on target ppc

2023-03-15 Thread Ajit Agarwal via Gcc-patches
Hello All:


This patch eliminates unnecessary zero extension instruction from power 
generated assembly.
Bootstrapped and regtested on powerpc64-linux-gnu.

Thanks & Regards
Ajit

rs6000: suboptimal code for returning bool value on target ppc.

New pass to eliminate unnecessary zero extension. This pass
is registered after cse rtl pass.

2023-03-16  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000-passes.def: Registered zero elimination
pass.
* config/rs6000/rs6000-zext-elim.cc: Add new pass.
* config.gcc: Add new executable.
* config/rs6000/rs6000-protos.h: Add new prototype for zero
elimination pass.
* config/rs6000/rs6000.cc: Add new prototype for zero
elimination pass.
* config/rs6000/t-rs6000: Add new rule.
* expr.cc: Modified gcc assert.
* explow.cc: Modified gcc assert.
* optabs.cc: Modified gcc assert.
---
 gcc/config.gcc|   4 +-
 gcc/config/rs6000/rs6000-passes.def   |   2 +
 gcc/config/rs6000/rs6000-protos.h |   1 +
 gcc/config/rs6000/rs6000-zext-elim.cc | 361 ++
 gcc/config/rs6000/rs6000.cc   |   2 +
 gcc/config/rs6000/t-rs6000|   5 +
 gcc/explow.cc |   3 +-
 gcc/expr.cc   |   4 +-
 gcc/optabs.cc |   3 +-
 9 files changed, 379 insertions(+), 6 deletions(-)
 create mode 100644 gcc/config/rs6000/rs6000-zext-elim.cc

diff --git a/gcc/config.gcc b/gcc/config.gcc
index da3a6d3ba1f..e8ac9d882f0 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -503,7 +503,7 @@ or1k*-*-*)
;;
 powerpc*-*-*)
cpu_type=rs6000
-   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
extra_objs="${extra_objs} rs6000-builtins.o rs6000-builtin.o"
extra_headers="ppc-asm.h altivec.h htmintrin.h htmxlintrin.h"
@@ -538,7 +538,7 @@ riscv*)
;;
 rs6000*-*-*)
extra_options="${extra_options} g.opt fused-madd.opt 
rs6000/rs6000-tables.opt"
-   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-logue.o"
+   extra_objs="rs6000-string.o rs6000-p8swap.o rs6000-zext-elim.o 
rs6000-logue.o"
extra_objs="${extra_objs} rs6000-call.o rs6000-pcrel-opt.o"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-logue.cc 
\$(srcdir)/config/rs6000/rs6000-call.cc"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/rs6000/rs6000-pcrel-opt.cc"
diff --git a/gcc/config/rs6000/rs6000-passes.def 
b/gcc/config/rs6000/rs6000-passes.def
index ca899d5f7af..d7500feddf1 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -28,6 +28,8 @@ along with GCC; see the file COPYING3.  If not see
  The power8 does not have instructions that automaticaly do the byte swaps
  for loads and stores.  */
   INSERT_PASS_BEFORE (pass_cse, 1, pass_analyze_swaps);
+  INSERT_PASS_AFTER (pass_cse, 1, pass_analyze_zext);
+
 
   /* Pass to do the PCREL_OPT optimization that combines the load of an
  external symbol's address along with a single load or store using that
diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index 1a4fc1df668..f6cf2d673d4 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -340,6 +340,7 @@ namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_pass_analyze_swaps (gcc::context *);
+extern rtl_opt_pass *make_pass_analyze_zext (gcc::context *);
 extern rtl_opt_pass *make_pass_pcrel_opt (gcc::context *);
 extern bool rs6000_sum_of_two_registers_p (const_rtx expr);
 extern bool rs6000_quadword_masked_address_p (const_rtx exp);
diff --git a/gcc/config/rs6000/rs6000-zext-elim.cc 
b/gcc/config/rs6000/rs6000-zext-elim.cc
new file mode 100644
index 000..777c7a5a387
--- /dev/null
+++ b/gcc/config/rs6000/rs6000-zext-elim.cc
@@ -0,0 +1,361 @@
+/* Subroutine to eliminate redundant zero extend for power architecture.
+   Copyright (C) 1991-2023 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+/* This pass remove unnecessary zero 

[PATCH] rs6000: Inline lrint and lrintf

2023-02-27 Thread Ajit Agarwal via Gcc-patches
Hello All:

Here is the patch for Inline lrint and lrintf. Currently glibc don't
use __builtin_lrint as they inline lrint with fctid/fctiw instruction.
With the below changes such inlines are not required and lrint builtin
can be used.

Bootstrapped and regtested on powerpc64-linux-gnu.

rs6000: Inline lrint,lrintf

For hard-float powerpc, GCC should support inline code generation
for the lrint or lrintf built-in functions, subject only to
-fno-math-errno (the condition -fno-math-errno is already checked
in builtins.c:expand_builtin_int_roundingfn_2, so the back end's
lrint insn patterns do not need to check that condition).

TARGET_FPRND has nothing to do with fctid and fctiw.
Remove the TARGET_FPRND from lrintdi2 pattern.

2023-02-27  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000.md (lrintdi2): Remove TARGET_FPRND
condition from pattern.
---
 gcc/config/rs6000/rs6000.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 81bffb04ceb..65c851e11fb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -6654,7 +6654,7 @@ (define_insn "lrintdi2"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
(unspec:DI [(match_operand:SFDF 1 "gpc_reg_operand" "")]
   UNSPEC_FCTID))]
-  "TARGET_HARD_FLOAT && TARGET_FPRND"
+  "TARGET_HARD_FLOAT"
   "fctid %0,%1"
   [(set_attr "type" "fp")])
 
-- 
2.31.1




[PATCH v2] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-25 Thread Ajit Agarwal via Gcc-patches
Hello All:

Here is the patch that uses xxlor instead of fmr where possible.
Performance results shows that fmr is better in power9 and 
power10 architectures whereas xxlor is better in power7 and
power 8 architectures. fmr is the only option before p7.

Bootstrapped and regtested on powerpc64-linux-gnu

Thanks & Regards
Ajit

rs6000: Use xxlor instead of fmr where possible

Replaces fmr with xxlor instruction for power7 and power8
architectures whereas for power9 and power10 keep fmr
instruction.

Perf measurement results:

Power9 fmr:  201,847,661 cycles.
Power9 xxlor: 201,877,78 cycles.
Power8 fmr: 200,901,043 cycles.
Power8 xxlor: 201,020,518 cycles.
Power7 fmr: 201,059,524 cycles.
Power7 xxlor: 201,042,851 cycles.

2023-02-25  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor for power7
and power8 and fmr for power9 and power10.
---
 gcc/config/rs6000/rs6000.md | 44 +++--
 1 file changed, 28 insertions(+), 16 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 81bffb04ceb..e101f7f5fc1 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -354,7 +354,7 @@ (define_attr "cpu"
   (const (symbol_ref "(enum attr_cpu) rs6000_tune")))
 
 ;; The ISA we implement.
-(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
+(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p7p8v,p9,p9v,p9kf,p9tf,p10"
   (const_string "any"))
 
 ;; Is this alternative enabled for the current CPU/ISA/etc.?
@@ -402,6 +402,11 @@ (define_attr "enabled" ""
  (and (eq_attr "isa" "p10")
  (match_test "TARGET_POWER10"))
  (const_int 1)
+  
+ (and (eq_attr "isa" "p7p8v")
+ (match_test "TARGET_VSX && !TARGET_P9_VECTOR"))
+ (const_int 1)
+
 ] (const_int 0)))
 
 ;; If this instruction is microcoded on the CELL processor
@@ -8436,27 +8441,29 @@ (define_insn "*mov_softfloat32"
 
 (define_insn "*mov_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
-   "=m,   d,  d,  ,   wY,
- ,Z,  ,  ,  !r,
+   "=m,   d,  ,  ,   wY,
+ ,Z,  wa, ,  !r,
  YZ,  r,  !r, *c*l,   !r,
-*h,   r,  ,   wa")
+*h,   r,  ,   d,  wn,
+wa")
(match_operand:FMOVE64 1 "input_operand"
-"d,   m,  d,  wY, ,
- Z,   ,   ,  ,  ,
+"d,   m,  ,  wY, ,
+ Z,   ,   wa, ,  ,
  r,   YZ, r,  r,  *h,
- 0,   ,   r,  eP"))]
+ 0,   ,   r,  d,  wn,
+ eP"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode))"
   "@
stfd%U0%X0 %1,%0
lfd%U1%X1 %0,%1
-   fmr %0,%1
+   xxlor %x0,%x1,%x1
lxsd %0,%1
stxsd %1,%0
lxsdx %x0,%y1
stxsdx %x1,%y0
-   xxlor %x0,%x1,%x1
+   fmr %0,%1
xxlxor %x0,%x0,%x0
li %0,0
std%U0%X0 %1,%0
@@ -8467,23 +8474,28 @@ (define_insn "*mov_hardfloat64"
nop
mfvsrd %0,%x1
mtvsrd %x0,%1
+   fmr %0,%1
+   fmr %0,%1
#"
   [(set_attr "type"
-"fpstore, fpload, fpsimple,   fpload, fpstore,
+"fpstore, fpload, veclogical, fpload, fpstore,
  fpload,  fpstore,veclogical, veclogical, integer,
  store,   load,   *,  mtjmpr, mfjmpr,
- *,   mfvsr,  mtvsr,  vecperm")
+ *,   mfvsr,  mtvsr,  fpsimple,   fpsimple,
+ vecperm")
(set_attr "size" "64")
(set_attr "isa"
-"*,   *,  *,  p9v,p9v,
- p7v, p7v,*,  *,  *,
- *,   *,  *,  *,  *,
- *,   p8v,p8v,p10")
+"*,   *,  p7p8v,p9v,p9v,
+ p7v, p7v,*,   *,  *,
+ *,   *,  *,   *,  *,
+ *,   p8v,p8v, *,  *,
+ p10")
(set_attr "prefixed"
 "*,   *,  *,  *,  *,
  *,   *,  *,  *,  *,
  *,   *,  *,  *,  *,
- *,   *,  *,  *")])
+ *,   *,  *,  *,  *,
+ *")])
 
 ;;   STD  LD   MR  MT MF G-const
 ;;

Re: [PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-24 Thread Ajit Agarwal via Gcc-patches
Hello Segher:

On 24/02/23 8:41 pm, Segher Boessenkool wrote:
> Hi!
> 
> For future patches: please don't send patches as replies to existing
> threads.  Just start a new thread for a new patch (series).  You can
> mark it as [PATCH v2] in the subject, if you want.
> 
> On Fri, Feb 24, 2023 at 01:41:49PM +0530, Ajit Agarwal wrote:
>> Here is the patch that uses xxlor instead of fmr where possible.
>> Performance results shows that fmr is better in power9 and 
>> power10 architectures whereas xxlor is better in power7 and
>> power 8 architectures.
> 
> And fmr is the only option before p7.
> 
>>  rs6000: Use xxlor instead of fmr where possible
>>
>>  This patch replaces fmr with xxlor instruction for power7
>>  and power8 architectures whereas for power9 and power10
>>  replaces xxlor with fmr instruction.
> 
> Saying "this patch" in a commit message reads strangely.  Just "Replace
> fmr with" etc.?
> 

I will correct this.

> The second part is just wrong, you cannot replace xxlor by fmr in
> general.
> 
>>  Perf measurement results:
>>
>>  Power9 fmr:  201,847,661 cycles.
>>  Power9 xxlor: 201,877,78 cycles.
>>  Power8 fmr: 201,057,795 cycles.
>> Power8 xxlor: 201,004,671 cycles.
> 
> What is this measuring?  100M insns back-to-back, each dependent on the
> previous one?
> 
Yes.

> What are the results on p7 and p10?
> 
> These numbers show there is no difference on p8 either.  Did you paste
> the wrong numbers maybe?
>

I will measure it again and update with a new patch.
 
>>  * config/rs6000/rs6000.md (*movdf_hardfloat64): Use xxlor
>>  for power7 and power8 and fmr for power9 and power10.
> 
> Please don't break lines early.  Changelogs lines can be 80 columns
> wide, just like source code lines.
> 
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -354,7 +354,7 @@ (define_attr "cpu"
>>(const (symbol_ref "(enum attr_cpu) rs6000_tune")))
>>  
>>  ;; The ISA we implement.
>> -(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p10"
>> +(define_attr "isa" "any,p5,p6,p7,p7v,p8v,p9,p9v,p9kf,p9tf,p7p8,p10"
> 
> p78v, and sort it after p8v please.
> 
>> + (and (eq_attr "isa" "p7p8")
>> +  (match_test "TARGET_VSX && !TARGET_P9_VECTOR"))
>> + (const_int 1)
> 
> Okay.
> 
>>  (define_insn "*mov_hardfloat64"
>>[(set (match_operand:FMOVE64 0 "nonimmediate_operand"
>> -   "=m,   d,  d,  ,   wY,
>> - ,Z,  ,  ,  !r,
>> - YZ,  r,  !r, *c*l,   !r,
>> -*h,   r,  ,   wa")
>> +   "=m,   d,  ,  ,   wY,
>> +, Z,  wa, ,  !r,
>> +YZ,   r,  !r, *c*l,   !r,
>> +*h,   r,  ,   d,  wn,
>> +wa")
>>  (match_operand:FMOVE64 1 "input_operand"
> 
> (You posted this mail as wrapping.  That means the patch cannot be
> applied non-manually, and that replies to your mail will be mangled.
> Just get a Real mail client, and configure it correctly :-) )
>

I am using Thunderbird as mail client and the settings are all correct.
I have set the mailnews.wrapLength 0.

 
>> -"d,   m,  d,  wY, ,
>> - Z,   ,   ,  ,  ,
>> +"d,   m,  ,  wY, ,
>> + Z,   ,   wa, ,  ,
>>   r,   YZ, r,  r,  *h,
>> - 0,   ,   r,  eP"))]
>> + 0,   ,   r,  d,  wn,
>> + eP"))]
> 
> No.  It is impossible to figure out what you changed here by just
> reading it.
> 
> There is no requirement there should be exactly five alternatives per
> line, and/or that there should be the same number everywhere.
> 
> If the indentation was incorrect, and you want to fix that, do that in a
> separate *earlier* patch in the series, please.
> 

I will Keep indentation as same.
>>"TARGET_POWERPC64 && TARGET_HARD_FLOAT
>> && (gpc_reg_operand (operands[0], mode)
>> || gpc_reg_operand (operands[1], mode))"
>>"@
>> stfd%U0%X0 %1,%0
>> lfd%U1%X1 %0,%1
>> -   fmr %0,%1
>> +   xxlor %x0,%x1,%x1
>> lxsd %0,%1
>> stxsd %1,%0
>> lxsdx %x0,%y1
>> stxsdx %x1,%y0
>> -   xxlor %x0,%x1,%x1
>> +   fmr %0,%1
>> xxlxor %x0,%x0,%x0
>> li %0,0
>> std%U0%X0 %1,%0
>> @@ -8467,23 +8474,28 @@ (define_insn "*mov_hardfloat64"
>> nop
>> mfvsrd %0,%x1
>> mtvsrd %x0,%1
>> +   fmr %0,%1
>> +   fmr %0,%1
>> #"
>>[(set_attr "type"
>> -"fpstore, fpload, fpsimple,   fpload, fpstore,
>> +"fpstore, fpload, veclogical, fpload, fpstore,
>>   fpload,  fpstore,veclogical, veclogical, integer,
>>   store,   load,   *,  mtjmpr, mfjmpr,
>> -   

Re: [PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-24 Thread Ajit Agarwal via Gcc-patches
   *,  *,
  *,   *,      *,      *,  *,
  *,   *,  *,  *,  *,
- *,   *,  *,  *")])
+ *,   *,  *,  *,  *,
+ *")])
 
 ;;   STD  LD   MR  MT MF G-const
 ;;   H-const  F-const  Special
-- 
2.31.1



On 22/02/23 3:58 pm, Ajit Agarwal via Gcc-patches wrote:
> 
> 
> On 21/02/23 7:39 pm, Segher Boessenkool wrote:
>> On Tue, Feb 21, 2023 at 06:00:52PM +0530, Ajit Agarwal wrote:
>>> On 21/02/23 4:34 pm, Segher Boessenkool wrote:
>>>> Please domn't use a switch, it isn't needed.  Instead use the "isa"
>>>> attribute (with p7v here), and put the preferred alternative first.
>>>
>>> I am not sure how this is possible without switch and using only "isa".
>>
>> You have the "p7v" "xxlor" alternative earlier than the "*" "fmr"
>> alternative.  You can have an "xxlor" for contraints "d", but probably
>> the best (and certainly the easiest) is to just move the existing
>> xxlor to before fmr.
>>
>> Oh, the existing xxlor alternative is implicitly isa p7v, the "wa"
>> constraint causes that.  It may be nicer to mark it explicitly p7v as
>> well, nicer for the reader.
>>
> 
> If I do the above, for power9 it selects xxlor instead of fmr.
> 
>> Btw, please update the other similar patterns at the same time?  There
>> are eight patterns with fmr in rs6000.md (the four in dfp.md should
>> probably not be touched); not all are similar so should be in separate
>> patches, if changed at all, but a bunch are completely analogous so
>> should not diverge.
>>
>> (It is fine to first do this one pattern only, until we have worked out
>> all kinks, but all should be committed at the same time).
>>
>> Thanks,
>>
>>
>> Segher


Re: [PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-22 Thread Ajit Agarwal via Gcc-patches



On 21/02/23 7:39 pm, Segher Boessenkool wrote:
> On Tue, Feb 21, 2023 at 06:00:52PM +0530, Ajit Agarwal wrote:
>> On 21/02/23 4:34 pm, Segher Boessenkool wrote:
>>> Please domn't use a switch, it isn't needed.  Instead use the "isa"
>>> attribute (with p7v here), and put the preferred alternative first.
>>
>> I am not sure how this is possible without switch and using only "isa".
> 
> You have the "p7v" "xxlor" alternative earlier than the "*" "fmr"
> alternative.  You can have an "xxlor" for contraints "d", but probably
> the best (and certainly the easiest) is to just move the existing
> xxlor to before fmr.
> 
> Oh, the existing xxlor alternative is implicitly isa p7v, the "wa"
> constraint causes that.  It may be nicer to mark it explicitly p7v as
> well, nicer for the reader.
> 

If I do the above, for power9 it selects xxlor instead of fmr.

> Btw, please update the other similar patterns at the same time?  There
> are eight patterns with fmr in rs6000.md (the four in dfp.md should
> probably not be touched); not all are similar so should be in separate
> patches, if changed at all, but a bunch are completely analogous so
> should not diverge.
> 
> (It is fine to first do this one pattern only, until we have worked out
> all kinks, but all should be committed at the same time).
> 
> Thanks,
> 
> 
> Segher


Re: [PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-21 Thread Ajit Agarwal via Gcc-patches
 Hello Segher:

On 21/02/23 4:34 pm, Segher Boessenkool wrote:
> Hi!
> 
> On Tue, Feb 21, 2023 at 02:18:25PM +0530, Ajit Agarwal wrote:
>> This patch replaces fmr instruction 6 cycles with 2 cycles xxlor instruction
>> for p7 and p8 architecture.
>>
>> I have implemented with switch and cases otherwise it is difficult to 
>> accommodate
>> xxlor with p7 and p8 and fmr for other architectures.
> 
> Please domn't use a switch, it isn't needed.  Instead use the "isa"
> attribute (with p7v here), and put the preferred alternative first.

I am not sure how this is possible without switch and using only "isa".
> 
>>  rs6000: fmr gets used instead of faster xxlor [PR93571]
> 
> rs6000: Use xxlor instead of fmr where possible
> 
>>  This patch replaces 6 cycles fmr instruction with xxlor
>>  2 cycles in p8 and p7 architecture.
> 
> No, it also does it on all later architectures.
> 
> Do you have any actual timings (i.e. from hardware, not documentation)?
> 
>>  * config/rs6000/rs6000.md (*movdf_hardfloat64): Replace fmr with xxlor 
>> instruction.
> 
> Line too long.  And, that is not what the patch does.  Changelog should
> be totally boring just saying what the patch changes.  If the patch
> changes things other than what thechangelog says your reviewer will
> think something went missin somewhere :-)

I will correct this.
> 
>> -  "@
>> -   stfd%U0%X0 %1,%0
>> -   lfd%U1%X1 %0,%1
>> -   xxlor %0,%1,%1
> 
> That is not what is currently in trunk, so your patch cannot apply.
> 
>> +  switch (which_alternative) {
>> +case 0 :  return "stfd%U0%X0 %1,%0";
>> +case 1 :  return "lfd%U1%X1 %0,%1";
> 
> Formatting is all incorrect.  We dom't need or want a switch at all, but
> correct would be:
>   switch (which_alternative)
> {
>   case 0:
>   return "stfd%U0%X0 %1,%0";
>   case 1:
>   return "lfd%U1%X1 %0,%1";
> 
> etc.

I will correct that.
> 
>> +case 2 : if ((TARGET_VSX || TARGET_P8_VECTOR)
>> +  && !TARGET_P9_VECTOR
>> +  && !TARGET_POWER10)
>> +   return "xxlor %0,%1,%1";
>> +  else
>> +return "fmr %0,%1";
> 
> Ah, so you are excluding p9 and p10 here.  Hrm.  That should be written
> TARGET_VSX && !TARGET_P9_VECTOR, none of the rest is needed; but is that
> a good idea at all?
> 
> Please use %xN for VSX arguments whenever possible.  If this alternative
> allows only the low numbered vector registers, that is a hint that you
> probably should write this differently (and %xN is harmless then).
> 
>> +   return "unreachable";
> 
> No, never do that.  There is "gcc_unreachable ()" if you need it.
> 

I will also correct this.

> So, let's first do actual timings, and see if it is better on p9 and
> p10 as well (or at least not worse).
> 
> 
> Segher

Thanks & Regards
Ajit


Re: [PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-21 Thread Ajit Agarwal via Gcc-patches
Hello All:

This patch replaces fmr instruction 6 cycles with 2 cycles xxlor instruction
for p7 and p8 architecture.

I have implemented with switch and cases otherwise it is difficult to 
accommodate
xxlor with p7 and p8 and fmr for other architectures.

Bootstrapped and regtested.

Thanks & Regards
Ajit


rs6000: fmr gets used instead of faster xxlor [PR93571]

This patch replaces 6 cycles fmr instruction with xxlor
2 cycles in p8 and p7 architecture.

2023-02-21  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000.md (*movdf_hardfloat64): Replace fmr with xxlor 
instruction.
---
 gcc/config/rs6000/rs6000.md | 49 ++---
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index dfd6c73ffcb..ef587033367 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8433,26 +8433,35 @@ (define_insn "*mov_hardfloat64"
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
&& (gpc_reg_operand (operands[0], mode)
|| gpc_reg_operand (operands[1], mode))"
-  "@
-   stfd%U0%X0 %1,%0
-   lfd%U1%X1 %0,%1
-   xxlor %0,%1,%1
-   lxsd %0,%1
-   stxsd %1,%0
-   lxsdx %x0,%y1
-   stxsdx %x1,%y0
-   xxlor %x0,%x1,%x1
-   xxlxor %x0,%x0,%x0
-   li %0,0
-   std%U0%X0 %1,%0
-   ld%U1%X1 %0,%1
-   mr %0,%1
-   mt%0 %1
-   mf%1 %0
-   nop
-   mfvsrd %0,%x1
-   mtvsrd %x0,%1
-   #"
+{
+  switch (which_alternative) {
+case 0 :  return "stfd%U0%X0 %1,%0";
+case 1 :  return "lfd%U1%X1 %0,%1";
+case 2 : if ((TARGET_VSX || TARGET_P8_VECTOR)
+  && !TARGET_P9_VECTOR
+  && !TARGET_POWER10)
+   return "xxlor %0,%1,%1";
+  else
+return "fmr %0,%1";
+
+ case 3 : return "lxsd %0,%1";
+ case 4 : return "stxsd %1,%0";
+ case 5 : return "lxsdx %x0,%y1";
+ case 6 : return "stxsdx %x1,%y0";
+ case 7 : return "xxlor %x0,%x1,%x1";
+ case 8 : return "xxlxor %x0,%x0,%x0";
+ case 9 : return "li %0,0";
+ case 10 : return "std%U0%X0 %1,%0";
+ case 11 : return "ld%U1%X1 %0,%1";
+ case 12 : return "mr %0,%1";
+ case 13 : return "mt%0 %1";
+ case 14 : return "mf%1 %0";
+ case 15 : return "nop";
+ case 16: return "mfvsrd %0,%x1";
+ case 17 : return "mtvsrd %x0,%1";
+   }
+   return "unreachable";
+}
   [(set_attr "type"
 "fpstore, fpload, fpsimple,   fpload, fpstore,
  fpload,  fpstore,veclogical, veclogical, integer,
-- 
2.31.1


On 17/02/23 10:53 pm, Segher Boessenkool wrote:
> Hi!
> 
> On Fri, Feb 17, 2023 at 10:28:41PM +0530, Ajit Agarwal wrote:
>> This patch replaces fmr instruction (6 cycles) with xxlor instruction ( 2 
>> cycles)
>> Bootstrapped and regtested on powerpc64-linux-gnu.
> 
> You tested this on a CPU that does have VSX.  It is incorrect on other
> (older) CPUs.
> 
>> --- a/gcc/config/rs6000/rs6000.md
>> +++ b/gcc/config/rs6000/rs6000.md
>> @@ -8436,7 +8436,7 @@
>>"@
>> stfd%U0%X0 %1,%0
>> lfd%U1%X1 %0,%1
>> -   fmr %0,%1
>> +   xxlor %0,%1,%1
>> lxsd %0,%1
>> stxsd %1,%0
>> lxsdx %x0,%y1
> 
> This is the *mov_hardfloat64 pattern.  You can add some magic to
> your Git config so that will show in the patch: in .git/config:
> 
> [diff "md"]
> xfuncname = "^\\(define.*$"
> 
> (As it says in .gitattributes:
>   # Make diff on MD files use "(define" as a function marker.
>   # Use together with git config diff.md.xfuncname '^\(define.*$'
>   # which is run by contrib/gcc-git-customization.sh too.
>   *.md diff=md
> )
> 
> The third alternative to this insn, the fmr one, has "d" as both input
> and output constraint, and has "*" as isa attribute, so it will be used
> on any CPU that has floating point registers.  The eight alternative
> (the existing xxlor one) has "wa" constraints (via ) so it
> implicitly requires VSX to be enabled.  You need to do something similar
> for what you want, but you also need to still allow fmr.
> 
> 
> Segher


[PATCH] rs6000: fmr gets used instead of faster xxlor [PR93571]

2023-02-17 Thread Ajit Agarwal via Gcc-patches

Hello All:

This patch replaces fmr instruction (6 cycles) with xxlor instruction ( 2 
cycles)
Bootstrapped and regtested on powerpc64-linux-gnu.

copyright assignment form is still in the process of being sent.
 
Thanks & Regards
Ajit

rs6000: fmr gets used instead of faster xxlor [PR93571]

This patch replaces 6 cycles fmr instruction with xxlor
2 cycles.

2023-02-17  Ajit Kumar Agarwal  

gcc/ChangeLog:

* config/rs6000/rs6000.md (*movdf_hardfloat64): Replace fmr with xxlor 
instruction.
---
 gcc/config/rs6000/rs6000.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 4a7812fa592..dfd6c73ffcb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -8436,7 +8436,7 @@
   "@
stfd%U0%X0 %1,%0
lfd%U1%X1 %0,%1
-   fmr %0,%1
+   xxlor %0,%1,%1
lxsd %0,%1
stxsd %1,%0
lxsdx %x0,%y1
-- 
2.31.1