[PATCH] Add support for vector conitional not

2023-08-12 Thread Andrew Pinski via Gcc-patches
Like the support conditional neg (r12-4470-g20dcda98ed376cb61c74b2c71),
this just adds conditional not too.
Also we should be able to turn `(a ? -1 : 0) ^ b` into a conditional
not.

OK? Bootstrapped and tested on x86_64-linux-gnu and aarch64-linux-gnu.

gcc/ChangeLog:

* internal-fn.def (COND_NOT): New internal function.
* match.pd (UNCOND_UNARY, COND_UNARY): Add bit_not/not
to the lists.
(`vec (a ? -1 : 0) ^ b`): New pattern to convert
into conditional not.
* optabs.def (cond_one_cmpl): New optab.
(cond_len_one_cmpl): Likewise.

gcc/testsuite/ChangeLog:

PR target/110986
* gcc.target/aarch64/sve/cond_unary_9.c: New test.
---
 gcc/internal-fn.def   |  2 ++
 gcc/match.pd  | 15 --
 gcc/optabs.def|  2 ++
 .../gcc.target/aarch64/sve/cond_unary_9.c | 20 +++
 4 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c

diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index b3c410f4b6a..3e8693dfddb 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -69,6 +69,7 @@ along with GCC; see the file COPYING3.  If not see
  lround2.
 
- cond_binary: a conditional binary optab, such as cond_add
+   - cond_unary: a conditional unary optab, such as cond_neg
- cond_ternary: a conditional ternary optab, such as cond_fma_rev
 
- fold_left: for scalar = FN (scalar, vector), keyed off the vector mode
@@ -276,6 +277,7 @@ DEF_INTERNAL_COND_FN (FNMA, ECF_CONST, fnma, ternary)
 DEF_INTERNAL_COND_FN (FNMS, ECF_CONST, fnms, ternary)
 
 DEF_INTERNAL_COND_FN (NEG, ECF_CONST, neg, unary)
+DEF_INTERNAL_COND_FN (NOT, ECF_CONST, one_cmpl, unary)
 
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
diff --git a/gcc/match.pd b/gcc/match.pd
index 6791060891d..2ee6d24ccee 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -84,9 +84,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Unary operations and their associated IFN_COND_* function.  */
 (define_operator_list UNCOND_UNARY
-  negate)
+  negate bit_not)
 (define_operator_list COND_UNARY
-  IFN_COND_NEG)
+  IFN_COND_NEG IFN_COND_NOT)
 
 /* Binary operations and their associated IFN_COND_* function.  */
 (define_operator_list UNCOND_BINARY
@@ -8482,6 +8482,17 @@ and,
 && is_truth_type_for (op_type, TREE_TYPE (@0)))
  (cond_op (bit_not @0) @2 @1)
 
+/* `(a ? -1 : 0) ^ b` can be converted into a conditional not.  */
+(simplify
+ (bit_xor:c (vec_cond @0 uniform_integer_cst_p@1 uniform_integer_cst_p@2) @3)
+ (if (canonicalize_math_after_vectorization_p ()
+  && vectorized_internal_fn_supported_p (IFN_COND_NOT, type)
+  && is_truth_type_for (type, TREE_TYPE (@0)))
+ (if (integer_all_onesp (@1) && integer_zerop (@2))
+  (IFN_COND_NOT @0 @3 @3))
+  (if (integer_all_onesp (@2) && integer_zerop (@1))
+   (vec_cond (bit_not @0) @3 @3
+
 /* Simplify:
 
  a = a1 op a2
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 1ea1947b3b5..a58819bc665 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -254,6 +254,7 @@ OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
 OPTAB_D (cond_neg_optab, "cond_neg$a")
+OPTAB_D (cond_one_cmpl_optab, "cond_one_cmpl$a")
 OPTAB_D (cond_len_add_optab, "cond_len_add$a")
 OPTAB_D (cond_len_sub_optab, "cond_len_sub$a")
 OPTAB_D (cond_len_smul_optab, "cond_len_mul$a")
@@ -278,6 +279,7 @@ OPTAB_D (cond_len_fms_optab, "cond_len_fms$a")
 OPTAB_D (cond_len_fnma_optab, "cond_len_fnma$a")
 OPTAB_D (cond_len_fnms_optab, "cond_len_fnms$a")
 OPTAB_D (cond_len_neg_optab, "cond_len_neg$a")
+OPTAB_D (cond_len_one_cmpl_optab, "cond_len_one_cmpl$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c 
b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
new file mode 100644
index 000..d6bc0409630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_9.c
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -moverride=sve_width=256 
-fdump-tree-optimized" } */
+
+/* This is a reduced version of cond_unary_5.c */
+
+void __attribute__ ((noipa))
+f (short *__restrict r,
+   short *__restrict a,
+   short *__restrict pred)
+{
+  for (int i = 0; i < 1024; ++i)
+r[i] = pred[i] != 0 ? ~(a[i]) : a[i];
+}
+
+/* { dg-final { scan-assembler-times {\tnot\tz[0-9]+\.h, p[0-7]/m,} 1 } } */
+
+/* { dg-final { scan-assembler-not {\teor\tz} } } */
+/* { dg-final { scan-assembler-not {\tmov\tz[0-9]+\.h, p[0-7]/m, #-1} } } */
+
+/* { dg-final { scan-tree-dump-times ".COND_NOT " 1 "optimized" } } */
-- 
2.31.1



[PATCH v1] c++: follow DR 2386 and update implementation of get_tuple_size [PR110216]

2023-08-12 Thread gnaggnoyil via Gcc-patches
DR 2386 updated the tuple_size requirements for structured binding and
it now requires tuple_size to be considered only if
std::tuple_size names a complete class type with member value. GCC
before this patch does not follow the updated requrements, and this
patch is intended to implement it.

DR 2386
PR c++/110216

gcc/cp/ChangeLog:

* decl.cc (get_tuple_size): Update implemetation to follow DR 2386.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/decomp10.C: Update expected error message for DR 2386.
* g++.dg/cpp1z/pr110216.C: New test.

Signed-off-by: gnaggnoyil 
---

Hi,

As https://gcc.gnu.org/pipermail/gcc-patches/2023-August/626696.html mentioned,
I've updated the corresponding tests. I ran all tests in g++.dg and no
unexpected fail exists now.

 gcc/cp/decl.cc|  6 +-
 gcc/testsuite/g++.dg/cpp1z/decomp10.C |  2 +-
 gcc/testsuite/g++.dg/cpp1z/pr110216.C | 21 +
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/pr110216.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 792ab330dd0..923b81a33b0 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -8940,10 +8940,14 @@ get_tuple_size (tree type)
 /*context*/std_node,
 /*entering_scope*/false, tf_none);
   inst = complete_type (inst);
-  if (inst == error_mark_node || !COMPLETE_TYPE_P (inst))
+  if (inst == error_mark_node
+  || !COMPLETE_TYPE_P (inst)
+  || !CLASS_TYPE_P (type))
 return NULL_TREE;
   tree val = lookup_qualified_name (inst, value_identifier,
LOOK_want::NORMAL, /*complain*/false);
+  if (val == error_mark_node)
+return NULL_TREE;
   if (VAR_P (val) || TREE_CODE (val) == CONST_DECL)
 val = maybe_constant_value (val);
   if (TREE_CODE (val) == INTEGER_CST)
diff --git a/gcc/testsuite/g++.dg/cpp1z/decomp10.C 
b/gcc/testsuite/g++.dg/cpp1z/decomp10.C
index f0723f8d85f..af83a79e781 100644
--- a/gcc/testsuite/g++.dg/cpp1z/decomp10.C
+++ b/gcc/testsuite/g++.dg/cpp1z/decomp10.C
@@ -7,7 +7,7 @@ namespace std {
 
 struct A1 { int i,j; } a1;
 template<> struct std::tuple_size {  };
-void f1() { auto [ x ] = a1; } // { dg-error "is not an integral constant 
expression" }
+void f1() { auto [ x ] = a1; } // { dg-error "only 1 name provided" }
 
 struct A2 { int i,j; } a2;
 template<> struct std::tuple_size { enum { value = 5 }; };
diff --git a/gcc/testsuite/g++.dg/cpp1z/pr110216.C 
b/gcc/testsuite/g++.dg/cpp1z/pr110216.C
new file mode 100644
index 000..be4fd5f7053
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/pr110216.C
@@ -0,0 +1,21 @@
+// DR 2386
+// PR c++/110216
+// { dg-do compile { target c++17 } }
+
+
+namespace std{
+  template  struct tuple_size;
+}
+
+struct A {
+  int x = 0;
+};
+
+template <> struct std::tuple_size <::A> {};
+
+auto [x] = A{};
+
+int
+main ()
+{
+}
-- 
2.41.0



[PATCH] RISC-V: Fix autovec_length_operand predicate[PR110989]

2023-08-12 Thread Juzhe-Zhong
Currently, autovec_length_operand predicate incorrect configuration is
discovered in PR110989 since this following situation:

vect__6.24_107 = .MASK_LEN_LOAD (vectp.22_105, 32B, mask__49.21_99, 
POLY_INT_CST [2, 2], 0); ---> dummy length = VF.

The current autovec length operand failed to recognize the VF dummy length.

-march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=scalable -Ofast 
-fno-schedule-insns -fno-schedule-insns2:

Before this patch:

srlia4,s0,2
addia4,a4,-3
srlis0,s0,3
vsetvli a5,zero,e64,m1,ta,ma
vid.v   v1
vmul.vx v1,v1,a4
addia4,s0,-2
vadd.vx v1,v1,a4
addia4,s0,-1
vslide1up.vxv2,v1,a4
vmv.v.x v1,a4
vand.vv v1,v2,v1
vl1re64.v   v3,0(t2)
vrgather.vv v2,v3,v1
vmv.v.i v1,0
vmfeq.vvv0,v2,v1
vsetvli zero,s0,e32,mf2,ta,ma---> s0 = POLY (2,2)
vle32.v v3,0(t3),v0.t
vsetvli a5,zero,e64,m1,ta,ma
vmfne.vvv0,v2,v1
vsetvli zero,zero,e32,mf2,ta,ma
vfwcvt.f.x.vv1,v3
vsetvli zero,zero,e64,m1,ta,ma
vmerge.vvm  v1,v1,v2,v0
vslidedown.vx   v1,v1,a4
vfmv.f.sfa5,v1
j   .L6

After this patch:

srlia4,s0,2
addia4,a4,-3
srlis0,s0,3
vsetvli a5,zero,e64,m1,ta,ma
vid.v   v1
vmul.vx v1,v1,a4
addia4,s0,-2
vadd.vx v1,v1,a4
addis0,s0,-1
vslide1up.vxv2,v1,s0
vmv.v.x v1,s0
vand.vv v1,v2,v1
vl1re64.v   v3,0(t2)
vrgather.vv v2,v3,v1
vmv.v.i v1,0
vmfeq.vvv0,v2,v1
vle32.v v3,0(t3),v0.t
vmfne.vvv0,v2,v1
vsetvli zero,zero,e32,mf2,ta,ma
vfwcvt.f.x.vv1,v3
vsetvli zero,zero,e64,m1,ta,ma
vmerge.vvm  v1,v1,v2,v0
vslidedown.vx   v1,v1,s0
vfmv.f.sfa5,v1
j   .L6

2 vsetvli insns are reduced.

gcc/ChangeLog:

* config/riscv/predicates.md: Fix predicate.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/pr110989.c: Add vsetvli assembly check.

---
 gcc/config/riscv/predicates.md| 5 +
 gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c | 7 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 9db28c2def7..b6ebdcf55de 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -282,10 +282,7 @@
 
 (define_special_predicate "autovec_length_operand"
   (ior (match_operand 0 "pmode_register_operand")
-   (ior (match_operand 0 "const_csr_operand")
-(match_test "rtx_equal_p (op, gen_int_mode
- (GET_MODE_NUNITS (GET_MODE (op)),
-   Pmode))"
+   (match_code "const_int,const_poly_int")))
 
 (define_predicate "reg_or_mem_operand"
   (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
index cf3b247e604..6e163a55c56 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/pr110989.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-march=rv64gcv -mabi=lp64d 
--param=riscv-autovec-preference=scalable -Ofast" } */
+/* { dg-options "-march=rv64gcv -mabi=lp64d 
--param=riscv-autovec-preference=scalable -Ofast -fno-schedule-insns 
-fno-schedule-insns2" } */
 
 int a, b, c;
 double *d;
@@ -9,3 +9,8 @@ void e() {
 f = *d ?: *(&a + c);
   b = f;
 }
+
+/* { dg-final { scan-assembler-times {vsetvli} 3 } }  */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+[a-x0-9]+,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*zero,\s*e64,\s*m1,\s*t[au],\s*m[au]} 1 } } */
+/* { dg-final { scan-assembler-times 
{vsetvli\s+zero,\s*zero,\s*e32,\s*mf2,\s*t[au],\s*m[au]} 1 } } */
-- 
2.36.1



[PATCH] RISC-V: Add the missed half floating-point mode patterns of local_pic_load/store when only use zfhmin

2023-08-12 Thread Lehua Ding
Hi,

There is a new failed RISC-V 
testcase(testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c)
on the current trunk branch when use medany as default cmodel.
The reason is the load of half floating-point imm is convert from RTL 1 to RTL
2 as the cmodel be changed from medlow to medany. This change let insn 7 be
combineed with @pred_broadcast patterns (insn 8) at combine pass. However,
insn 6 and insn 7 are combined for SF and DF mode, but not for HF mode, and
the fail combined leads to insn 7 and insn 8 be combined. The reason of the
fail combined is the local_pic_loadhf pattern doesn't exist when only enable
zfhmin(implied by zvfh).

Therefore, when only zfhmin but not zfh is enabled, the define_insn of
*local_pic_load must also be able to produce the pattern for
*load_pic_loadhf pattern, since the zfhmin extension also includes a
half floating-point load/store instructions. So, I added an ANFLSF Iterator
and applied it to local_pic_load/store define_insns. I have checked other ANYF
usage scenarios and feel that this is the only place that needs to be corrected.
I may have missed something, please correct. Thanks.

RTL 1:

(insn 6 3 7 2 (set (reg:DI 137)
(high:DI (symbol_ref/u:DI ("*.LC0") [flags 0x82]))) 
"/work/home/lding/open-source/riscv-gnu-toolchain-push/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c":7:1
 discrim 3 179 {*movdi_64bit}
 (nil))
(insn 7 6 8 2 (set (reg:HF 136)
(mem/u/c:HF (lo_sum:DI (reg:DI 137)
(symbol_ref/u:DI ("*.LC0") [flags 0x82])) [0  S2 A16])) 
"/work/home/lding/open-source/riscv-gnu-toolchain-push/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c":7:1
 discrim 3 126 {*movhf_hardfloat}
 (expr_list:REG_EQUAL (const_double:HF 8.8828125e+0 [0x0.8e2p+4])
(nil)))

RTL 2:

(insn 6 3 7 2 (set (reg/f:DI 137)
(symbol_ref/u:DI ("*.LC0") [flags 0x82])) 
"/work/home/lding/open-source/riscv-gnu-toolchain-push/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c":7:1
 discrim 3 179 {*movdi_64bit}
 (nil))
(insn 7 6 8 2 (set (reg:HF 136)
(mem/u/c:HF (reg/f:DI 137) [0  S2 A16])) 
"/work/home/lding/open-source/riscv-gnu-toolchain-push/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c":7:1
 discrim 3 126 {*movhf_hardfloat}
 (expr_list:REG_EQUAL (const_double:HF 8.8828125e+0 [0x0.8e2p+4])
(nil)))
(insn 8 7 9 2 (set (reg:V2HF 135)
(if_then_else:V2HF (unspec:V2BI [
(const_vector:V2BI [
(const_int 1 [0x1]) repeated x2
])
(const_int 2 [0x2]) repeated x3
(const_int 0 [0])
(reg:SI 66 vl)
(reg:SI 67 vtype)
] UNSPEC_VPREDICATE)
(vec_duplicate:V2HF (reg:HF 136))
(unspec:V2HF [
(reg:SI 0 zero)
] UNSPEC_VUNDEF))) 
"/work/home/lding/open-source/riscv-gnu-toolchain-push/gcc/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/const-4.c":6:1
 discrim 3 1389 {*pred_broadcastv2hf}
 (nil))

Best,
Lehua

gcc/ChangeLog:

* config/riscv/iterators.md (TARGET_HARD_FLOAT || TARGET_ZFINX): New.
* config/riscv/pic.md (*local_pic_load): Change ANYF.
(*local_pic_load): To ANYLSF.
(*local_pic_load_32d): Ditto.
(*local_pic_load_32d): Ditto.
(*local_pic_store): Ditto.
(*local_pic_store): Ditto.
(*local_pic_store_32d): Ditto.
(*local_pic_store_32d): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/_Float16-zfhmin-4.c: New test.

---
 gcc/config/riscv/iterators.md |  5 +++
 gcc/config/riscv/pic.md   | 34 +--
 .../gcc.target/riscv/_Float16-zfhmin-4.c  | 11 ++
 3 files changed, 33 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/_Float16-zfhmin-4.c

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index d374a10810c..39c2dd629a2 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -67,6 +67,11 @@
(DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
(HF "TARGET_ZFH || TARGET_ZHINX")])
 
+;; Iterator for hardware-supported load/store floating-point modes.
+(define_mode_iterator ANYLSF [(SF "TARGET_HARD_FLOAT || TARGET_ZFINX")
+ (DF "TARGET_DOUBLE_FLOAT || TARGET_ZDINX")
+ (HF "TARGET_ZFHMIN || TARGET_ZHINX")])
+
 ;; Iterator for floating-point modes that can be loaded into X registers.
 (define_mode_iterator SOFTF [SF (DF "TARGET_64BIT") (HF "TARGET_ZFHMIN")])
 
diff --git a/gcc/config/riscv/pic.md b/gcc/config/riscv/pic.md
index 9507850455a..da636e31619 100644
--- a/gcc/config/riscv/pic.md
+++ b/gcc/config/riscv/pic.md
@@ -43,17 +43,17 @@
   "u\t%0,%1"
   [(set (attr "length") (const_int 8))])
 
-;; We can support ANYF loads into X register if there is

Re: IRA update_equiv_regs for (was Re: ICE for interim fix for PR/110748)

2023-08-12 Thread Jivan Hakobyan via Gcc-patches
Yes, as mentioned Jeff I have some work in that scope.

The first is related to address computation when it has a large constant
part.
Suppose we have this code:

int  consume (void *);
int foo (void) {
   int x[1000];
   return consume (x);
}

before IRA we have the following sequence
19: r140:DI=0xf000
20: r136:DI=r140:DI+0x60
  REG_EQUAL 0xf060
8: a0:DI=frame:DI+r136:DI
  REG_DEAD r136:DI

but during IRA (eliminate_regs_in_insn) insn 8 transforms to
   8: a0:DI=r136:DI+0xfa0+frame:DI
REG_DEAD r136:DI

and in the end, we get the wrong sequence.
   21: r136:DI=0xf060
  REG_EQUIV 0xf060
   25: r143:DI=0x1000
   26: r142:DI=r143:DI-0x60
  REG_DEAD r143:DI
  REG_EQUAL 0xfa0
   27: r142:DI=r142:DI+r136:DI
  REG_DEAD r136:DI
   8: a0:DI=r142:DI+frame:DI
  REG_DEAD r142:DI

My changes prevent that transformation.
I have tested on spec and did not get regressions.
Besides. executed 40B fewer instructions.

The second work related to hoisting out loop invariant code.
I have a test case where SP + const can be hoisted out.
..
.L3:
  call foo
  addi a5,sp,16
  sh3add a0,a0,a5
...

Before IRA that code is already out of the loop, but IRA moves back.
My approach was done in update_equiv_regs().
It prevents any move if its uses and defs are held in a single place, and
used in the loop.
Currently, that improvement is under evaluation.


On Sat, Aug 12, 2023 at 4:05 AM Jeff Law via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

>
>
> On 8/11/23 17:32, Vineet Gupta wrote:
> >
> > On 8/1/23 12:17, Vineet Gupta wrote:
> >> Hi Jeff,
> >>
> >> As discussed this morning, I'm sending over dumps for the optim of DF
> >> const -0.0 (PR/110748)  [1]
> >> For rv64gc_zbs build, IRA is undoing the split which eventually leads
> >> to ICE in final pass.
> >>
> >> [1] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110748#c15
> >>
> >> void znd(double *d) {  *d = -0.0;   }
> >>
> >>
> >> *split1*
> >>
> >> (insn 10 3 11 2 (set (reg:DI 136)
> >> (const_int [0x8000])) "neg.c":4:5 -1
> >>
> >> (insn 11 10 0 2 (set (mem:DF (reg:DI 135) [1 *d_2(D)+0 S8 A64])
> >> (subreg:DF (reg:DI 136) 0)) "neg.c":4:5 -1
> >>
> >> *ira*
> >>
> >> (insn 11 9 12 2 (set (mem:DF (reg:DI 135) [1 *d_2(D)+0 S8 A64])
> >> (const_double:DF -0.0 [-0x0.0p+0])) "neg.c":4:5 190
> >> {*movdf_hardfloat_rv64}
> >>  (expr_list:REG_DEAD (reg:DI 135)
> >>
> >>
> >> For the working case, the large const is not involved and not subject
> >> to IRA playing foul.
> >
> > I investigated this some more. So IRA update_equiv_regs () has code
> > identifying potential replacements: if a reg is referenced exactly
> > twice: set once and used once.
> >
> >if (REG_N_REFS (regno) == 2
> >&& (rtx_equal_p (replacement, src)
> >|| ! equiv_init_varies_p (src))
> >&& NONJUMP_INSN_P (insn)
> >&& equiv_init_movable_p (PATTERN (insn), regno))
> >  reg_equiv[regno].replace = 1;
> >  }
> >
> > And combine_and_move_insns () does the replacement, undoing the split1
> > above.
> Right.  This is as expected.  There was actually similar code that goes
> back even before the introduction of IRA -- like to the 80s and 90s.
>
> Conceptually the idea is a value with an equivalence that has a single
> set and single use isn't a good use of a hard register.  Better to
> narrow the live range to a single pair of instructions.
>
> It's not always a good tradeoff.  Consider if the equivalence was also a
> loop invariant and hoisted out of the loop and register pressure is low.
>
>
> >
> > In fact this is the reason for many more split1 being undone. See the
> > suboptimal codegen for large const for Andrew Pinski's test case [1]
> No doubt.  I think it's also a problem with some of Jivan's work.
>
>
> >
> > I'm wondering (naively) if there is some way to tune this - for a given
> > backend. In general it would make sense to do the replacement, but not
> > if the cost changes (e.g. consts could be embedded in x86 insn freely,
> > but not for RISC-V where this is costly and if something is split, it
> > might been intentional.
> I'm not immediately aware of a way to tune.
>
> When it comes to tuning, the toplevel questions are do we have any of
> the info we need to tune at the point where the transformation occurs.
> The two most obvious pieces here would be loop info an register pressure.
>
> ie, do we have enough loop structure to know if the def is at a
> shallower loop nest than the use.  There's a reasonable chance we have
> this information as my recollection is this analysis is done fairly
> early in IRA.
>
> But that means we likely don't have any sense of register pressure at
> the points between the def and use.   So the most useful metric for
> tuning isn't really available.
>
> The one thing that stands out is we don't do this transfor

[PATCH v4] Mode-Switching: Fix SET_SRC ICE for create_pre_exit

2023-08-12 Thread Pan Li via Gcc-patches
From: Pan Li 

In same cases, like gcc/testsuite/gcc.dg/pr78148.c in RISC-V, there will
be only 1 operand when SET_SRC in create_pre_exit. For example as below.

(insn 13 9 14 2 (clobber (reg/i:TI 10 a0)) 
"gcc/testsuite/gcc.dg/pr78148.c":24:1 -1
  (expr_list:REG_UNUSED (reg/i:TI 10 a0)
(nil)))

Unfortunately, SET_SRC requires at least 2 operands and then Segment
Fault here. For SH4 part result in Segment Fault, it looks like only
valid when the return_copy_pat is load or something like that. Thus,
this patch try to fix it by restrict the SET insn for SET_SRC.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* mode-switching.cc (create_pre_exit): Add SET insn check.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/mode-switch-ice-1.c: New test.
---
 gcc/mode-switching.cc |  1 +
 .../gcc.target/riscv/mode-switch-ice-1.c  | 22 +++
 2 files changed, 23 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/mode-switch-ice-1.c

diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 64ae2bc29c3..f483c831c35 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -411,6 +411,7 @@ create_pre_exit (int n_entities, int *entity_map, const int 
*num_modes)
   conflict with address reloads.  */
if (copy_start >= ret_start
&& copy_start + copy_num <= ret_end
+   && GET_CODE (return_copy_pat) == SET
&& OBJECT_P (SET_SRC (return_copy_pat)))
  forced_late_switch = true;
break;
diff --git a/gcc/testsuite/gcc.target/riscv/mode-switch-ice-1.c 
b/gcc/testsuite/gcc.target/riscv/mode-switch-ice-1.c
new file mode 100644
index 000..1b34a471904
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/mode-switch-ice-1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+struct A { char e, f; };
+
+struct B
+{
+  int g;
+  struct A h[4];
+};
+
+extern void bar (int, int);
+
+struct B foo (void)
+{
+  bar (2, 1);
+}
+
+void baz ()
+{
+  foo ();
+}
-- 
2.34.1