[PATCH 3/4] [RISC-V] resolve confilct between zcmp multi push/pop and shrink-wrap-separate

2023-06-06 Thread Fei Gao
Disable zcmp multi push/pop if shrink-wrap-separate is active.

So in -Os that prefers smaller code size, by default shrink-wrap-separate
is disabled while zcmp multi push/pop is enabled.

And in -O2 and others that prefers speed, by default shrink-wrap-separate
is enabled while zcmp multi push/pop is disabled. To force enabling zcmp multi
push/pop in this case, -fno-shrink-wrap-separate has to be explictly given.

The following TC shows the issues in -O2 before this patch with both
shrink-wrap-separate and zcmp multi push/pop active.
1. duplicated store of s regs.
2. cm.push pushes ra, s0-s11 in reverse order than what normal
   prologue does, causing stack corruption and failure to resotre s regs.

TC: zcmp_shrink_wrap_separate.c included in this patch.

output asm before this patch:
calc_func:
cm.push {ra, s0-s3}, -32
...
beq a5,zero,.L2
...
.L2:
...
sw  s1,20(sp) //issue here
sw  s3,12(sp) //issue here
...
sw  s2,16(sp) //issue here

output asm after this patch:
calc_func:
addisp,sp,-32
sw  s0,24(sp)
...
beq a5,zero,.L2
...
.L2:
...
sw  s1,20(sp)
sw  s3,12(sp)
...
sw  s2,16(sp)
gcc/ChangeLog:

* config/riscv/riscv.cc
(riscv_avoid_shrink_wrapping_separate): wrap the condition check in
riscv_avoid_shrink_wrapping_separate.
(riscv_avoid_multi_push): avoid multi push if shrink_wrapping_separate
  is active.
(riscv_get_separate_components): call 
riscv_avoid_shrink_wrapping_separate
* shrink-wrap.cc (try_shrink_wrapping_separate): call
  use_shrink_wrapping_separate.
(use_shrink_wrapping_separate):wrap the condition
  check in use_shrink_wrapping_separate 
* shrink-wrap.h (use_shrink_wrapping_separate): add to extern

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zcmp_shrink_wrap_separate.c: New test.
* gcc.target/riscv/zcmp_shrink_wrap_separate2.c: New test.

Signed-off-by: Fei Gao 
Co-Authored-By: Zhangjin Liao 
---
 gcc/config/riscv/riscv.cc | 19 +++-
 gcc/shrink-wrap.cc| 25 +++--
 gcc/shrink-wrap.h |  1 +
 .../riscv/zcmp_shrink_wrap_separate.c | 97 +++
 .../riscv/zcmp_shrink_wrap_separate2.c| 97 +++
 5 files changed, 228 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f60c241a526..b505cdeca34 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -64,6 +64,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cfghooks.h"
 #include "cfgloop.h"
 #include "cfgrtl.h"
+#include "shrink-wrap.h"
 #include "sel-sched.h"
 #include "fold-const.h"
 #include "gimple-iterator.h"
@@ -389,6 +390,7 @@ static const struct riscv_tune_param 
optimize_size_tune_info = {
   false,   /* use_divmod_expansion */
 };
 
+static bool riscv_avoid_shrink_wrapping_separate ();
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
 
@@ -4910,6 +4912,8 @@ riscv_avoid_multi_push(const struct riscv_frame_info 
*frame)
   || cfun->machine->interrupt_handler_p
   || cfun->machine->varargs_size != 0
   || crtl->args.pretend_args_size != 0
+  || (use_shrink_wrapping_separate ()
+  && !riscv_avoid_shrink_wrapping_separate ())
   || (frame->mask & ~ MULTI_PUSH_GPR_MASK))
 return true;
 
@@ -6077,6 +6081,17 @@ riscv_epilogue_uses (unsigned int regno)
   return false;
 }
 
+static bool
+riscv_avoid_shrink_wrapping_separate ()
+{
+  if (riscv_use_save_libcall (>machine->frame)
+  || cfun->machine->interrupt_handler_p
+  || !cfun->machine->frame.gp_sp_offset.is_constant ())
+return true;
+
+  return false;
+}
+
 /* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
 
 static sbitmap
@@ -6086,9 +6101,7 @@ riscv_get_separate_components (void)
   sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
   bitmap_clear (components);
 
-  if (riscv_use_save_libcall (>machine->frame)
-  || cfun->machine->interrupt_handler_p
-  || !cfun->machine->frame.gp_sp_offset.is_constant ())
+  if (riscv_avoid_shrink_wrapping_separate ())
 return components;
 
   offset = cfun->machine->frame.gp_sp_offset.to_constant ();
diff --git a/gcc/shrink-wrap.cc b/gcc/shrink-wrap.cc
index b8d7b557130..d534964321a 100644
--- a/gcc/shrink-wrap.cc
+++ b/gcc/shrink-wrap.cc
@@ -1776,16 +1776,14 @@ insert_prologue_epilogue_for_components (sbitmap 
components)
   commit_edge_insertions ();
 }
 
-/* The main entry point to this 

[PATCH 4/4] [RISC-V] support cm.mva01s cm.mvsa01 in zcmp

2023-06-06 Thread Fei Gao
From: Die Li 

Signed-off-by: Die Li 
Co-Authored-By: Fei Gao 

gcc/ChangeLog:

* config/riscv/peephole.md: New pattern.
* config/riscv/predicates.md (a0a1_reg_operand): New predicate.
(zcmp_mv_sreg_operand): New predicate.
* config/riscv/riscv.md: New predicate.
* config/riscv/zc.md (*mva01s): New pattern.
(*mvsa01): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/cm_mv_rv32.c: New test.
---
 gcc/config/riscv/peephole.md| 28 +
 gcc/config/riscv/predicates.md  | 11 
 gcc/config/riscv/riscv.md   |  1 +
 gcc/config/riscv/zc.md  | 22 
 gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c | 21 
 5 files changed, 83 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c

diff --git a/gcc/config/riscv/peephole.md b/gcc/config/riscv/peephole.md
index 67e7046d7e6..e8cb1ba4838 100644
--- a/gcc/config/riscv/peephole.md
+++ b/gcc/config/riscv/peephole.md
@@ -94,3 +94,31 @@
 {
   th_mempair_order_operands (operands, true, SImode);
 })
+
+;; ZCMP
+(define_peephole2
+  [(set (match_operand:X 0 "a0a1_reg_operand")
+(match_operand:X 1 "zcmp_mv_sreg_operand"))
+   (set (match_operand:X 2 "a0a1_reg_operand")
+(match_operand:X 3 "zcmp_mv_sreg_operand"))]
+  "TARGET_ZCMP
+   && (REGNO (operands[2]) != REGNO (operands[0]))"
+  [(parallel [(set (match_dup 0)
+   (match_dup 1))
+  (set (match_dup 2)
+   (match_dup 3))])]
+)
+
+(define_peephole2
+  [(set (match_operand:X 0 "zcmp_mv_sreg_operand")
+(match_operand:X 1 "a0a1_reg_operand"))
+   (set (match_operand:X 2 "zcmp_mv_sreg_operand")
+(match_operand:X 3 "a0a1_reg_operand"))]
+  "TARGET_ZCMP
+   && (REGNO (operands[0]) != REGNO (operands[2]))
+   && (REGNO (operands[1]) != REGNO (operands[3]))"
+  [(parallel [(set (match_dup 0)
+   (match_dup 1))
+  (set (match_dup 2)
+   (match_dup 3))])]
+)
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index a1b9367b997..6d5e8630cb5 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -207,6 +207,17 @@
   (and (match_code "const_int")
(match_test "riscv_zcmp_valid_stack_adj_bytes_p (INTVAL (op), 13)")))
 
+;; ZCMP predicates
+(define_predicate "a0a1_reg_operand"
+  (and (match_operand 0 "register_operand")
+   (match_test "IN_RANGE (REGNO (op), A0_REGNUM, A1_REGNUM)")))
+
+(define_predicate "zcmp_mv_sreg_operand"
+  (and (match_operand 0 "register_operand")
+   (match_test "TARGET_RVE ? IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
+: IN_RANGE (REGNO (op), S0_REGNUM, S1_REGNUM)
+|| IN_RANGE (REGNO (op), S2_REGNUM, S7_REGNUM)")))
+
 ;; Only use branch-on-bit sequences when the mask is not an ANDI immediate.
 (define_predicate "branch_on_bit_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 02802d2685d..25bc3e6ab4c 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -121,6 +121,7 @@
(S0_REGNUM  8)
(S1_REGNUM  9)
(A0_REGNUM  10)
+   (A1_REGNUM  11)
(S2_REGNUM  18)
(S3_REGNUM  19)
(S4_REGNUM  20)
diff --git a/gcc/config/riscv/zc.md b/gcc/config/riscv/zc.md
index 217e115035b..bb4975cd333 100644
--- a/gcc/config/riscv/zc.md
+++ b/gcc/config/riscv/zc.md
@@ -1433,3 +1433,25 @@
   "TARGET_ZCMP"
   "cm.push {ra, s0-s11}, %0"
 )
+
+;; ZCMP mv
+(define_insn "*mva01s"
+  [(set (match_operand:X 0 "a0a1_reg_operand" "=r")
+(match_operand:X 1 "zcmp_mv_sreg_operand" "r"))
+   (set (match_operand:X 2 "a0a1_reg_operand" "=r")
+(match_operand:X 3 "zcmp_mv_sreg_operand" "r"))]
+  "TARGET_ZCMP
+   && (REGNO (operands[2]) != REGNO (operands[0]))"
+  { return (REGNO (operands[0]) == 
A0_REGNUM)?"cm.mva01s\t%1,%3":"cm.mva01s\t%3,%1"; }
+  [(set_attr "mode" "")])
+
+(define_insn "*mvsa01"
+  [(set (match_operand:X 0 "zcmp_mv_sreg_operand" "=r")
+(match_operand:X 1 "a0a1_reg_operand" "r"))
+   (set (match_operand:X 2 "zcmp_mv_sreg_operand" "=r")
+(match_operand:X 3 "a0a1_reg_operand" "r"))]
+  "TARGET_ZCMP
+   && (REGNO (operands[0]) != REGNO (operands[2]))
+   && (REGNO (operands[1]) != REGNO (operands[3]))"
+  { return (REGNO (operands[1]) == 
A0_REGNUM)?"cm.mvsa01\t%0,%2":"cm.mvsa01\t%2,%0"; }
+  [(set_attr "mode" "")])
diff --git a/gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c 
b/gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c
new file mode 100644
index 000..49c94c01603
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options " -Os -march=rv32i_zca_zcmp -mabi=ilp32 " } */
+/* { dg-skip-if "" { *-*-* } {"-O0" 

[PATCH 1/4][V4][RISC-V] support cm.push cm.pop cm.popret in zcmp

2023-06-06 Thread Fei Gao
Zcmp can share the same logic as save-restore in stack allocation: 
pre-allocation
by cm.push, step 1 and step 2.

please be noted cm.push pushes ra, s0-s11 in reverse order than what 
save-restore does.
So adaption has been done in .cfi directives in my patch.

Signed-off-by: Fei Gao 

gcc/ChangeLog:

* config/riscv/iterators.md
slot0_offset: slot 0 offset in stack GPRs area in bytes
slot1_offset: slot 1 offset in stack GPRs area in bytes
slot2_offset: likewise
slot3_offset: likewise
slot4_offset: likewise
slot5_offset: likewise
slot6_offset: likewise
slot7_offset: likewise
slot8_offset: likewise
slot9_offset: likewise
slot10_offset: likewise
slot11_offset: likewise
slot12_offset: likewise
* config/riscv/predicates.md
(stack_push_up_to_ra_operand): predicates of stack adjust pushing ra
(stack_push_up_to_s0_operand): predicates of stack adjust pushing ra, s0
(stack_push_up_to_s1_operand): likewise
(stack_push_up_to_s2_operand): likewise
(stack_push_up_to_s3_operand): likewise
(stack_push_up_to_s4_operand): likewise
(stack_push_up_to_s5_operand): likewise
(stack_push_up_to_s6_operand): likewise
(stack_push_up_to_s7_operand): likewise
(stack_push_up_to_s8_operand): likewise
(stack_push_up_to_s9_operand): likewise
(stack_push_up_to_s11_operand): likewise
(stack_pop_up_to_ra_operand): predicates of stack adjust poping ra
(stack_pop_up_to_s0_operand): predicates of stack adjust poping ra, s0
(stack_pop_up_to_s1_operand): likewise
(stack_pop_up_to_s2_operand): likewise
(stack_pop_up_to_s3_operand): likewise
(stack_pop_up_to_s4_operand): likewise
(stack_pop_up_to_s5_operand): likewise
(stack_pop_up_to_s6_operand): likewise
(stack_pop_up_to_s7_operand): likewise
(stack_pop_up_to_s8_operand): likewise
(stack_pop_up_to_s9_operand): likewise
(stack_pop_up_to_s11_operand): likewise
* config/riscv/riscv-protos.h
(riscv_zcmp_valid_stack_adj_bytes_p):declaration
* config/riscv/riscv.cc (struct riscv_frame_info): comment change
(riscv_avoid_multi_push): helper function of riscv_use_multi_push
(riscv_use_multi_push): true if multi push is used
(riscv_multi_push_sregs_count): num of sregs in multi-push
(riscv_multi_push_regs_count): num of regs in multi-push
(riscv_16bytes_align): align to 16 bytes
(riscv_stack_align): moved to a better place
(riscv_save_libcall_count): no functional change
(riscv_compute_frame_info): add zcmp frame info
(riscv_adjust_multi_push_cfi_prologue): adjust cfi for cm.push
(riscv_gen_multi_push_pop_insn): gen function for multi push and pop
(riscv_expand_prologue): allocate stack by cm.push
(riscv_adjust_multi_pop_cfi_epilogue): adjust cfi for cm.pop[ret]
(riscv_expand_epilogue): allocate stack by cm.pop[ret]
(zcmp_base_adj): calculate stack adjustment base size
(zcmp_additional_adj): calculate stack adjustment additional size
(riscv_zcmp_valid_stack_adj_bytes_p): check if stack adjustment valid
* config/riscv/riscv.h (RETURN_ADDR_MASK): mask of ra
(S0_MASK): likewise
(S1_MASK): likewise
(S2_MASK): likewise
(S3_MASK): likewise
(S4_MASK): likewise
(S5_MASK): likewise
(S6_MASK): likewise
(S7_MASK): likewise
(S8_MASK): likewise
(S9_MASK): likewise
(S10_MASK): likewise
(S11_MASK): likewise
(MULTI_PUSH_GPR_MASK): GPR_MASK that cm.push can cover at most
(ZCMP_MAX_SPIMM): max spimm value
(ZCMP_SP_INC_STEP): zcmp sp increment step
(ZCMP_INVALID_S0S10_SREGS_COUNTS): num of s0-s10
(ZCMP_S0S11_SREGS_COUNTS): num of s0-s11
(ZCMP_MAX_GRP_SLOTS): max slots of pushing and poping in zcmp
* config/riscv/riscv.md: include zc.md
* config/riscv/zc.md: New file. machine description for zcmp

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rv32e_zcmp.c: New test.
* gcc.target/riscv/rv32i_zcmp.c: New test.
* gcc.target/riscv/zcmp_stack_alignment.c: New test.
---
 gcc/config/riscv/iterators.md |   15 +
 gcc/config/riscv/predicates.md|   96 ++
 gcc/config/riscv/riscv-protos.h   |1 +
 gcc/config/riscv/riscv.cc |  360 +-
 gcc/config/riscv/riscv.h  |   23 +
 gcc/config/riscv/riscv.md |2 +
 gcc/config/riscv/zc.md| 1042 +
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c   |  239 
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c   |  239 
 .../gcc.target/riscv/zcmp_stack_alignment.c   |   23 +
 10 files changed, 2000 insertions(+), 

[PATCH 2/4] [RISC-V] support cm.popretz in zcmp

2023-06-06 Thread Fei Gao
Generate cm.popretz instead of cm.popret if return value is 0.

Signed-off-by: Fei Gao 

gcc/ChangeLog:

* config/riscv/riscv.cc
(riscv_zcmp_can_use_popretz): true if popretz can be used
(riscv_gen_multi_pop_insn): interface to generate cm.pop[ret][z]
(riscv_expand_epilogue): expand cm.pop[ret][z] in epilogue
* config/riscv/riscv.md:
* config/riscv/zc.md
(@gpr_multi_popretz_up_to_ra_): md for popretz ra
(@gpr_multi_popretz_up_to_s0_): md for popretz ra, s0
(@gpr_multi_popretz_up_to_s1_): likewise
(@gpr_multi_popretz_up_to_s2_): likewise
(@gpr_multi_popretz_up_to_s3_): likewise
(@gpr_multi_popretz_up_to_s4_): likewise
(@gpr_multi_popretz_up_to_s5_): likewise
(@gpr_multi_popretz_up_to_s6_): likewise
(@gpr_multi_popretz_up_to_s7_): likewise
(@gpr_multi_popretz_up_to_s8_): likewise
(@gpr_multi_popretz_up_to_s9_): likewise
(@gpr_multi_popretz_up_to_s11_): likewise

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rv32e_zcmp.c: add testcase for cm.popretz in rv32e
* gcc.target/riscv/rv32i_zcmp.c: add testcase for cm.popretz in rv32i
---
 gcc/config/riscv/riscv.cc   | 114 --
 gcc/config/riscv/riscv.md   |   1 +
 gcc/config/riscv/zc.md  | 393 
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c |  12 +
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c |  12 +
 5 files changed, 508 insertions(+), 24 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index c476c699f4c..f60c241a526 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -435,6 +435,7 @@ typedef enum
   PUSH_IDX = 0,
   POP_IDX,
   POPRET_IDX,
+  POPRETZ_IDX,
   ZCMP_OP_NUM
 } riscv_zcmp_op_t;
 
@@ -5535,30 +5536,30 @@ riscv_emit_stack_tie (void)
 /*zcmp multi push and pop code_for_push_pop function ptr array  */
 const code_for_push_pop_t code_for_push_pop [ZCMP_MAX_GRP_SLOTS][ZCMP_OP_NUM] 
= {
   {code_for_gpr_multi_push_up_to_ra,code_for_gpr_multi_pop_up_to_ra,
-   code_for_gpr_multi_popret_up_to_ra},
+   code_for_gpr_multi_popret_up_to_ra,  code_for_gpr_multi_popretz_up_to_ra},
   {code_for_gpr_multi_push_up_to_s0,code_for_gpr_multi_pop_up_to_s0,
-   code_for_gpr_multi_popret_up_to_s0},
+   code_for_gpr_multi_popret_up_to_s0,  code_for_gpr_multi_popretz_up_to_s0},
   {code_for_gpr_multi_push_up_to_s1,code_for_gpr_multi_pop_up_to_s1,
-   code_for_gpr_multi_popret_up_to_s1},
+   code_for_gpr_multi_popret_up_to_s1,  code_for_gpr_multi_popretz_up_to_s1},
   {code_for_gpr_multi_push_up_to_s2,code_for_gpr_multi_pop_up_to_s2,
-   code_for_gpr_multi_popret_up_to_s2},
+   code_for_gpr_multi_popret_up_to_s2,  code_for_gpr_multi_popretz_up_to_s2},
   {code_for_gpr_multi_push_up_to_s3,code_for_gpr_multi_pop_up_to_s3,
-   code_for_gpr_multi_popret_up_to_s3},
+   code_for_gpr_multi_popret_up_to_s3,  code_for_gpr_multi_popretz_up_to_s3},
   {code_for_gpr_multi_push_up_to_s4,code_for_gpr_multi_pop_up_to_s4,
-   code_for_gpr_multi_popret_up_to_s4},
+   code_for_gpr_multi_popret_up_to_s4,  code_for_gpr_multi_popretz_up_to_s4},
   {code_for_gpr_multi_push_up_to_s5,code_for_gpr_multi_pop_up_to_s5,
-   code_for_gpr_multi_popret_up_to_s5},
+   code_for_gpr_multi_popret_up_to_s5,  code_for_gpr_multi_popretz_up_to_s5},
   {code_for_gpr_multi_push_up_to_s6,code_for_gpr_multi_pop_up_to_s6,
-   code_for_gpr_multi_popret_up_to_s6},
+   code_for_gpr_multi_popret_up_to_s6,  code_for_gpr_multi_popretz_up_to_s6},
   {code_for_gpr_multi_push_up_to_s7,code_for_gpr_multi_pop_up_to_s7,
-   code_for_gpr_multi_popret_up_to_s7},
+   code_for_gpr_multi_popret_up_to_s7,  code_for_gpr_multi_popretz_up_to_s7},
   {code_for_gpr_multi_push_up_to_s8,code_for_gpr_multi_pop_up_to_s8,
-   code_for_gpr_multi_popret_up_to_s8},
+   code_for_gpr_multi_popret_up_to_s8,  code_for_gpr_multi_popretz_up_to_s8},
   {code_for_gpr_multi_push_up_to_s9,code_for_gpr_multi_pop_up_to_s9,
-   code_for_gpr_multi_popret_up_to_s9},
-  {nullptr, nullptr, nullptr},
+   code_for_gpr_multi_popret_up_to_s9,  code_for_gpr_multi_popretz_up_to_s9},
+  {nullptr, nullptr, nullptr, nullptr},
   {code_for_gpr_multi_push_up_to_s11,   code_for_gpr_multi_pop_up_to_s11,
-   code_for_gpr_multi_popret_up_to_s11}};
+   code_for_gpr_multi_popret_up_to_s11, code_for_gpr_multi_popretz_up_to_s11}};
 
 static rtx
 riscv_gen_multi_push_pop_insn (riscv_zcmp_op_t op, HOST_WIDE_INT adj_size,
@@ -5747,6 +5748,80 @@ riscv_adjust_libcall_cfi_epilogue ()
   return dwarf;
 }
 
+/* return true if popretz pattern can be matched.
+   set (reg 10 a0) (const_int 0)
+   use (reg 10 a0)
+   NOTE_INSN_EPILOGUE_BEG  */
+static rtx_insn *
+riscv_zcmp_can_use_popretz(void)
+{
+  rtx_insn *insn = NULL, *use = NULL, *clear = NULL;
+
+  /* sequence stack for NOTE_INSN_EPILOGUE_BEG*/
+  struct sequence_stack * outer_seq = get_current_sequence ()->next;
+  

[PATCH 0/4] [RISC-V] support zcmp extention

2023-06-06 Thread Fei Gao
please be noted the series depend on the zcmp switch that Jiawei posted
https://gcc.gnu.org/pipermail/gcc-patches/2023-April/615289.html

The 1st patch is a follow up on Kito's V3 review. 
Others are new.

Fei Gao (4):
  [RISC-V] support cm.push cm.pop cm.popret in zcmp
  [RISC-V] support cm.popretz in zcmp
  [RISC-V] resolve confilct between zcmp multi push/pop and shrink-wrap-separate
  [RISC-V] support cm.mva01s cm.mvsa01 in zcmp

 gcc/config/riscv/iterators.md |   15 +
 gcc/config/riscv/peephole.md  |   28 +
 gcc/config/riscv/predicates.md|  107 ++
 gcc/config/riscv/riscv-protos.h   |1 +
 gcc/config/riscv/riscv.cc |  445 -
 gcc/config/riscv/riscv.h  |   23 +
 gcc/config/riscv/riscv.md |4 +
 gcc/config/riscv/zc.md| 1457 +
 gcc/shrink-wrap.cc|   25 +-
 gcc/shrink-wrap.h |1 +
 gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c   |   21 +
 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c   |  251 +++
 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c   |  251 +++
 .../riscv/zcmp_shrink_wrap_separate.c |   97 ++
 .../riscv/zcmp_shrink_wrap_separate2.c|   97 ++
 .../gcc.target/riscv/zcmp_stack_alignment.c   |   23 +
 16 files changed, 2795 insertions(+), 51 deletions(-)
 create mode 100644 gcc/config/riscv/zc.md
 create mode 100644 gcc/testsuite/gcc.target/riscv/cm_mv_rv32.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rv32e_zcmp.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rv32i_zcmp.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_shrink_wrap_separate2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zcmp_stack_alignment.c

-- 
2.17.1



[PATCH] rs6000: Remove redundant initialization [PR106907]

2023-06-06 Thread P Jeevitha via Gcc-patches
PR106907 has few warnings spotted from cppcheck. In that addressing
redundant initialization issue. Here the initialized value of 'new_addr'
was overwritten before it was read. Updated the source by removing the
unnecessary initialization of 'new_addr'.

2023-06-07  Jeevitha Palanisamy  

gcc/
PR target/106907
* gcc/config/rs6000/rs6000.cc (rs6000_expand_vector_extract): Remove 
redundant
initialization of new_addr.


diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 42f49e4a56b..d994e004bd3 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -7660,12 +7660,11 @@ rs6000_expand_vector_extract (rtx target, rtx vec, rtx 
elt)
 {
   unsigned int ele_size = GET_MODE_SIZE (inner_mode);
   rtx num_ele_m1 = GEN_INT (GET_MODE_NUNITS (mode) - 1);
-  rtx new_addr = gen_reg_rtx (Pmode);
 
   elt = gen_rtx_AND (Pmode, elt, num_ele_m1);
   if (ele_size > 1)
elt = gen_rtx_MULT (Pmode, elt, GEN_INT (ele_size));
-  new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
+  rtx new_addr = gen_rtx_PLUS (Pmode, XEXP (mem, 0), elt);
   new_addr = change_address (mem, inner_mode, new_addr);
   emit_move_insn (target, new_addr);
 }



[Bug fortran/99350] [10/11/12/13/14 Regression] ICE in gfc_get_symbol_decl, at fortran/trans-decl.c:1869

2023-06-06 Thread pault at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99350

Paul Thomas  changed:

   What|Removed |Added

 Blocks|87477   |
 CC||pault at gcc dot gnu.org

--- Comment #5 from Paul Thomas  ---
program p
   character((0.)/0), target :: c = 'abc'
   print *, c
end

Is a sufficient reproducer and so this is not an associate bug.

I am removing it from the blocks PR87477 list but will keep at it. For whatever
reason, the parentheses block the identification of the divide by zero.

Removing the decl.cc chunk in the patch identified in comment #1 (thanks again
Martin!) fixes this problem but changes the error messages in the testcases.

Paul


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87477
[Bug 87477] [meta-bug] [F03] issues concerning the ASSOCIATE statement

[Bug modula2/110126] Variables are reported as unused when only referenced by ASM statements

2023-06-06 Thread admin--- via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110126

--- Comment #2 from Thorsten Otto  ---
But even if i rewrite it like that, i still get the warning.

I avoided doing anything machine specific in the first example, but if i
actually do, and use something like:

PROCEDURE test;
BEGIN
  ASM("movl %1,%%eax; addl $1,%%eax; movl %%eax,%0" : "=r"(x) : "0"(x) :
"eax");
END test;

i still get the warning.

BTW, is there a reason why the string in the asm statement can't be
concatenated?

eg. especially if the statement becomes longer, you would usually want to write
it something like

  ASM("movl %1,%%eax;" +
  "addl $1,%%eax;" +
  "movl %%eax,%0"
 : "=r"(x) : "0"(x) : "eax");

Re: [committed] libstdc++: Update list of known symbol versions for abi-check

2023-06-06 Thread François Dumont via Gcc-patches



On 06/06/2023 17:59, Jonathan Wakely via Libstdc++ wrote:

Tested x86_64-linux and powerpc64le-linux. Pushed to trunk.

-- >8 --

Add the recently added CXXABI_1.3.15 version. Also remove two "frozen"
versions from the latestp list, as no more symbols should be added to
those now.

libstdc++-v3/ChangeLog:

* testsuite/util/testsuite_abi.cc (check_version): Add
CXXABI_1.3.15 symver and make it the latestp. Remove
GLIBCXX_IEEE128_3.4.31 and GLIBCXX_LDBL_3.4.31 from latestp.
---
  libstdc++-v3/testsuite/util/testsuite_abi.cc | 7 ++-
  1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/libstdc++-v3/testsuite/util/testsuite_abi.cc 
b/libstdc++-v3/testsuite/util/testsuite_abi.cc
index cea6c217433..59615dd701e 100644
--- a/libstdc++-v3/testsuite/util/testsuite_abi.cc
+++ b/libstdc++-v3/testsuite/util/testsuite_abi.cc
@@ -233,7 +233,7 @@ check_version(symbol& test, bool added)
known_versions.push_back("CXXABI_1.3.11");
known_versions.push_back("CXXABI_1.3.12");
known_versions.push_back("CXXABI_1.3.13");
-  known_versions.push_back("CXXABI_1.3.14");
+  known_versions.push_back("CXXABI_1.3.15");


Did you really want to remove CXXABI_1.3.14 here ? ChangeLog says you 
just add CXXABI_1.3.15.




Re: [PATCH v3] RISC-V: Refactor requirement of ZVFH and ZVFHMIN.

2023-06-06 Thread juzhe.zh...@rivai.ai
HI,  

+  (VNx1SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")

Add TARGET_VECTOR_ELEN_FP_32 here, for FP16->FP32 conversion,
we need both ELEN_FP16 and ELEN_FP32 enable.





juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-06-07 11:00
To: gcc-patches
CC: juzhe.zhong; kito.cheng; pan2.li; yanzhang.wang
Subject: [PATCH v3] RISC-V: Refactor requirement of ZVFH and ZVFHMIN.
From: Pan Li 
 
This patch would like to refactor the requirement of both the ZVFH
and ZVFHMIN. The related define_insn and iterator will take the
requirement based on the ZVFHMIN and ZVFH.
 
Please note the ZVFH will cover the ZVFHMIN instructions. This patch
add one test for this.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/vector-iterators.md: Add requirement to VF,
VWEXTF and VWCONVERTI, add V_CONVERT_F and VCONVERTF.
* config/riscv/vector.md: Adjust FP convert to V_CONVERT_F
and VCONVERTF.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: New test.
---
gcc/config/riscv/vector-iterators.md  | 68 +--
gcc/config/riscv/vector.md| 46 ++---
.../riscv/rvv/base/zvfh-over-zvfhmin.c| 25 +++
3 files changed, 97 insertions(+), 42 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c
 
diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index f4946d84449..1dc82bd44d3 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -296,13 +296,13 @@ (define_mode_iterator VWI_ZVE32 [
])
(define_mode_iterator VF [
-  (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
-  (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+  (VNx1HF "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2HF "TARGET_ZVFH")
+  (VNx4HF "TARGET_ZVFH")
+  (VNx8HF "TARGET_ZVFH")
+  (VNx16HF "TARGET_ZVFH")
+  (VNx32HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx64HF "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
   (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
   (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
@@ -497,12 +497,12 @@ (define_mode_iterator VWEXTI [
])
(define_mode_iterator VWEXTF [
-  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
-  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
-  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+  (VNx1SF "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_ZVFH")
+  (VNx4SF "TARGET_ZVFH")
+  (VNx8SF "TARGET_ZVFH")
+  (VNx16SF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
   (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
   (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
@@ -512,12 +512,12 @@ (define_mode_iterator VWEXTF [
])
(define_mode_iterator VWCONVERTI [
-  (VNx1SI "TARGET_MIN_VLEN < 128 && TARGET_VECTOR_ELEN_FP_16")
-  (VNx2SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx8SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx16SI "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (VNx32SI "TARGET_MIN_VLEN >= 128 && TARGET_VECTOR_ELEN_FP_16")
+  (VNx1SI "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2SI "TARGET_ZVFH")
+  (VNx4SI "TARGET_ZVFH")
+  (VNx8SI "TARGET_ZVFH")
+  (VNx16SI "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx32SI "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
   (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && 
TARGET_MIN_VLEN < 128")
   (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
@@ -526,6 +526,21 @@ (define_mode_iterator VWCONVERTI [
   (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && 
TARGET_MIN_VLEN >= 128")
])
+(define_mode_iterator VCONVERTF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+
+  (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
+  (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+])
+
(define_mode_iterator VQEXTI [
   (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI 
"TARGET_MIN_VLEN > 32") (VNx32SI 

[PATCH v1] LoongArch:Change the default value of LARCH_CALL_RATIO to 6 on the LoongArch architecture.

2023-06-06 Thread chen xiaolong via Gcc-patches
During the regression testing of the LoongArch architecture GCC, it was
found that the tests in the pr90883.C file failed. The problem was modulated and
found that the error was caused by setting the macro LARCH_CALL_RATIO to a too
large value. Combined with the actual LoongArch architecture, the different
thresholds for meeting the test conditions were tested using the engineering 
method
(SPEC CPU 2006), and the results showed that its optimal threshold should be set
to 6.

gcc/ChangeLog:

* config/loongarch/loongarch.h (LARCH_CALL_RATIO):
---
 gcc/config/loongarch/loongarch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index f8167875646..adea7333925 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1055,7 +1055,7 @@ typedef struct {
 /* The base cost of a memcpy call, for MOVE_RATIO and friends.  These
values were determined experimentally by benchmarking with CSiBE.
 */
-#define LARCH_CALL_RATIO 8
+#define LARCH_CALL_RATIO 6
 
 /* Any loop-based implementation of cpymemsi will have at least
LARCH_MAX_MOVE_BYTES_STRAIGHT / UNITS_PER_WORD memory-to-memory
-- 
2.38.0



RE: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

2023-06-06 Thread Wang, Yanzhang via Gcc-patches
Hi Jeff,

Thanks your comments. I have few questions that I don't quite understand.

> One of the things that needs to be upstreamed is long jump support within
> a function.  Essentially once a function reaches 1M in size we have the
> real possibility that a direct jump may not reach its target.
> 
> To support this I expect that $ra is going to become a fixed register (ie,
> not available to the register allocator as a temporary).  It'll be used
> as a scratch register for long jump sequences.
> 
> One of the consequences of this is $ra will need to be saved in leaf
> functions that are near or over 1M in size.
> 
> Note that at the time when we have to lay out the stack, we do not know
> the precise length of the function.  So there's a degree of "fuzz" in the
> decision whether or not to save $ra in a function that is close to the 1M
> limit.

Do you mean that, long jump to more than 1M offset will need multiple jal
and each jal will save the $ra ?

If yes, I'm confused about what's the influence of the $ra saving for
function prologue. We will save the fp+ra at the prologue, the next $ra 
saving seems will not modify the $ra already saved.

> I don't think you can reliably know if $ra is valid in an arbitrary leaf
> function or not.  You could implement some heuristics by looking at the
> symbol table (which I'm guessing you don't want to do) or by
> disassembling the prologue (again, I'm guessing you don't want to do that
> either).

I think it's yes (not valid) when we want to get the return address to parent
function from $ra directly in the function body. But we can get the right
return address from fp with offset if we save them at prologue, is it right ?

> Meaning that what you really want is to be using -fno-omit-frame-pointer
> and for $ra to always be saved in the stack, even in a leaf function.

This is also another solution but will change the default behavior of
-fno-omit-frame-pointer.

> Presumably you're not suggesting any of these options be used in general
> -- they're going to be used for things like embedded devices or firmware?
> Also note there are low overhead unwinding schemes out there that are
> already supported in various tools -- ORC & SFRAME come
> immediately to mind.   Those may be better than building a bespoke
> solution for the embedded space.

Yes. You're right, I forget to introduce background of the requirement. It
will be used in the firmware where the dwarf or unwinding maybe not acceptable.

Yanzhang

> -Original Message-
> From: Jeff Law 
> Sent: Wednesday, June 7, 2023 10:13 AM
> To: Wang, Yanzhang ; gcc-patches@gcc.gnu.org
> Cc: juzhe.zh...@rivai.ai; kito.ch...@sifive.com; Li, Pan2
> 
> Subject: Re: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.
> 
> 
> 
> On 6/4/23 20:49, Wang, Yanzhang wrote:
> > Hi Jeff,
> >
> > Yes, there's a requirement to support backtrace based on the fp+ra.
> > And the unwind/cfa is not acceptable because it will add additional
> > sections to the binary. Currently, -fno-omit-frame-pointer can not
> > save the ra for the leaf function. So we need to add another option
> > like ARM/X86 to support consistent fp+ra stack layout for the leaf and
> > non-leaf functions.
> One of the things that needs to be upstreamed is long jump support within
> a function.  Essentially once a function reaches 1M in size we have the
> real possibility that a direct jump may not reach its target.
> 
> To support this I expect that $ra is going to become a fixed register (ie,
> not available to the register allocator as a temporary).  It'll be used
> as a scratch register for long jump sequences.
> 
> One of the consequences of this is $ra will need to be saved in leaf
> functions that are near or over 1M in size.
> 
> Note that at the time when we have to lay out the stack, we do not know
> the precise length of the function.  So there's a degree of "fuzz" in the
> decision whether or not to save $ra in a function that is close to the 1M
> limit.
> 
> I don't think you can reliably know if $ra is valid in an arbitrary leaf
> function or not.  You could implement some heuristics by looking at the
> symbol table (which I'm guessing you don't want to do) or by
> disassembling the prologue (again, I'm guessing you don't want to do that
> either).
> 
> Meaning that what you really want is to be using -fno-omit-frame-pointer
> and for $ra to always be saved in the stack, even in a leaf function.
> 
> Presumably you're not suggesting any of these options be used in general
> -- they're going to be used for things like embedded devices or firmware?
> Also note there are low overhead unwinding schemes out there that are
> already supported in various tools -- ORC & SFRAME come
> immediately to mind.   Those may be better than building a bespoke
> solution for the embedded space.
> 
> 
> 
> Jeff


RE: [PATCH] Handle FMA friendly in reassoc pass

2023-06-06 Thread Di Zhao OS via Gcc-patches
Hello Lili Cui,

Since I'm also trying to improve this lately, I've tested your patch on
several aarch64 machines we have, including neoverse-n1 and ampere1
architectures. However, I haven't reproduced the 6.00% improvement of 
503.bwaves_r single copy run you mentioned. Could you share more information
about the aarch64 CPU and compile options you tested? The option
I'm using is "-Ofast", with or without "--param avoid-fma-max-bits=512".

Additionally, we found some spec2017 cases with regressions, including
4% on 527.cam4_r (neoverse-n1).

> -Original Message-
> From: Gcc-patches  bounces+dizhao=os.amperecomputing@gcc.gnu.org> On Behalf Of Cui, Lili via
> Gcc-patches
> Sent: Thursday, May 25, 2023 7:30 AM
> To: gcc-patches@gcc.gnu.org
> Cc: richard.guent...@gmail.com; li...@linux.ibm.com; Lili Cui
> 
> Subject: [PATCH] Handle FMA friendly in reassoc pass
> 
> From: Lili Cui 
> 
> Make some changes in reassoc pass to make it more friendly to fma pass later.
> Using FMA instead of mult + add reduces register pressure and insruction
> retired.
> 
> There are mainly two changes
> 1. Put no-mult ops and mult ops alternately at the end of the queue, which is
> conducive to generating more fma and reducing the loss of FMA when breaking
> the chain.
> 2. Rewrite the rewrite_expr_tree_parallel function to try to build parallel
> chains according to the given correlation width, keeping the FMA chance as
> much as possible.
> 
> With the patch applied
> 
> On ICX:
> 507.cactuBSSN_r: Improved by 1.7% for multi-copy .
> 503.bwaves_r   : Improved by  0.60% for single copy .
> 507.cactuBSSN_r: Improved by  1.10% for single copy .
> 519.lbm_r  : Improved by  2.21% for single copy .
> no measurable changes for other benchmarks.
> 
> On aarch64
> 507.cactuBSSN_r: Improved by 1.7% for multi-copy.
> 503.bwaves_r   : Improved by 6.00% for single-copy.
> no measurable changes for other benchmarks.
> 
> TEST1:
> 
> float
> foo (float a, float b, float c, float d, float *e)
> {
>return  *e  + a * b + c * d ;
> }
> 
> For "-Ofast -mfpmath=sse -mfma" GCC generates:
> vmulss  %xmm3, %xmm2, %xmm2
> vfmadd132ss %xmm1, %xmm2, %xmm0
> vaddss  (%rdi), %xmm0, %xmm0
> ret
> 
> With this patch GCC generates:
> vfmadd213ss   (%rdi), %xmm1, %xmm0
> vfmadd231ss   %xmm2, %xmm3, %xmm0
> ret
> 
> TEST2:
> 
> for (int i = 0; i < N; i++)
> {
>   a[i] += b[i]* c[i] + d[i] * e[i] + f[i] * g[i] + h[i] * j[i] + k[i] * l[i]
> + m[i]* o[i] + p[i];
> }
> 
> For "-Ofast -mfpmath=sse -mfma"  GCC generates:
>   vmovapd e(%rax), %ymm4
>   vmulpd  d(%rax), %ymm4, %ymm3
>   addq$32, %rax
>   vmovapd c-32(%rax), %ymm5
>   vmovapd j-32(%rax), %ymm6
>   vmulpd  h-32(%rax), %ymm6, %ymm2
>   vmovapd a-32(%rax), %ymm6
>   vaddpd  p-32(%rax), %ymm6, %ymm0
>   vmovapd g-32(%rax), %ymm7
>   vfmadd231pd b-32(%rax), %ymm5, %ymm3
>   vmovapd o-32(%rax), %ymm4
>   vmulpd  m-32(%rax), %ymm4, %ymm1
>   vmovapd l-32(%rax), %ymm5
>   vfmadd231pd f-32(%rax), %ymm7, %ymm2
>   vfmadd231pd k-32(%rax), %ymm5, %ymm1
>   vaddpd  %ymm3, %ymm0, %ymm0
>   vaddpd  %ymm2, %ymm0, %ymm0
>   vaddpd  %ymm1, %ymm0, %ymm0
>   vmovapd %ymm0, a-32(%rax)
>   cmpq$8192, %rax
>   jne .L4
>   vzeroupper
>   ret
> 
> with this patch applied GCC breaks the chain with width = 2 and generates 6
> fma:
> 
>   vmovapd a(%rax), %ymm2
>   vmovapd c(%rax), %ymm0
>   addq$32, %rax
>   vmovapd e-32(%rax), %ymm1
>   vmovapd p-32(%rax), %ymm5
>   vmovapd g-32(%rax), %ymm3
>   vmovapd j-32(%rax), %ymm6
>   vmovapd l-32(%rax), %ymm4
>   vmovapd o-32(%rax), %ymm7
>   vfmadd132pd b-32(%rax), %ymm2, %ymm0
>   vfmadd132pd d-32(%rax), %ymm5, %ymm1
>   vfmadd231pd f-32(%rax), %ymm3, %ymm0
>   vfmadd231pd h-32(%rax), %ymm6, %ymm1
>   vfmadd231pd k-32(%rax), %ymm4, %ymm0
>   vfmadd231pd m-32(%rax), %ymm7, %ymm1
>   vaddpd  %ymm1, %ymm0, %ymm0
>   vmovapd %ymm0, a-32(%rax)
>   cmpq$8192, %rax
>   jne .L2
>   vzeroupper
>   ret
> 
> gcc/ChangeLog:
> 
>   PR gcc/98350
>   * tree-ssa-reassoc.cc
>   (rewrite_expr_tree_parallel): Rewrite this function.
>   (rank_ops_for_fma): New.
>   (reassociate_bb): Handle new function.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR gcc/98350
>   * gcc.dg/pr98350-1.c: New test.
>   * gcc.dg/pr98350-2.c: Ditto.
> ---
>  gcc/testsuite/gcc.dg/pr98350-1.c |  31 
>  gcc/testsuite/gcc.dg/pr98350-2.c |  11 ++
>  gcc/tree-ssa-reassoc.cc  | 256 +--
>  3 files changed, 215 insertions(+), 83 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-1.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr98350-2.c
> 
> diff --git a/gcc/testsuite/gcc.dg/pr98350-1.c b/gcc/testsuite/gcc.dg/pr98350-
> 1.c
> new file mode 100644
> 

Re: [PATCH] LoongArch: Change jumptable's register constraint to 'q' [PR110136]

2023-06-06 Thread Lulu Cheng



在 2023/6/7 上午11:26, WANG Xuerui 写道:

Hi,

On 2023/6/7 10:31, Lulu Cheng wrote:
If the $ra register is modified during the jump to the jump table, 
the hardware
branch prediction function will be broken, resulting in a significant 
increase

in the branch false prediction rate and affecting performance.


Thanks for the insight! This is the kind of improvement that will 
probably become a lot harder to even *sight* without uarch details 
available.


However, I think it's better to also include a minimized test case to 
ensure the compiled code doesn't regress. (Comparison of relevant 
statistics, e.g. output of perf stat, would be even nicer to have!)


There was no way I could find a small test case that would replicate 
this problem. This occurs when compiling spec2006 400.perlbench. And it 
only appears when '-flto' is added.:-(


But I paid for reproducible programs and compilation methods under the 
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110136.




gcc/ChangeLog:

* config/loongarch/loongarch.md: Change register constraint to 'q'.
---
  gcc/config/loongarch/loongarch.md | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md

index 816a943d155..f9b64173104 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2926,9 +2926,11 @@ (define_expand "tablejump"
    DONE;
  })
  +;; Jump to the jump table Avoid using the $r1 register to prevent
+;; affecting hardware branch prediction.
  (define_insn "@tablejump"
    [(set (pc)
-    (match_operand:P 0 "register_operand" "r"))
+    (match_operand:P 0 "register_operand" "q"))
 (use (label_ref (match_operand 1 "" "")))]
    ""
    "jr\t%0"




Re: [PATCH] LoongArch: Change jumptable's register constraint to 'q' [PR110136]

2023-06-06 Thread WANG Xuerui

Hi,

On 2023/6/7 10:31, Lulu Cheng wrote:

If the $ra register is modified during the jump to the jump table, the hardware
branch prediction function will be broken, resulting in a significant increase
in the branch false prediction rate and affecting performance.


Thanks for the insight! This is the kind of improvement that will 
probably become a lot harder to even *sight* without uarch details 
available.


However, I think it's better to also include a minimized test case to 
ensure the compiled code doesn't regress. (Comparison of relevant 
statistics, e.g. output of perf stat, would be even nicer to have!)




gcc/ChangeLog:

* config/loongarch/loongarch.md: Change register constraint to 'q'.
---
  gcc/config/loongarch/loongarch.md | 4 +++-
  1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 816a943d155..f9b64173104 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2926,9 +2926,11 @@ (define_expand "tablejump"
DONE;
  })
  
+;; Jump to the jump table Avoid using the $r1 register to prevent

+;; affecting hardware branch prediction.
  (define_insn "@tablejump"
[(set (pc)
-   (match_operand:P 0 "register_operand" "r"))
+   (match_operand:P 0 "register_operand" "q"))
 (use (label_ref (match_operand 1 "" "")))]
""
"jr\t%0"


Re: Re: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization

2023-06-06 Thread juzhe.zh...@rivai.ai
Hi, Thanks kito..

I have added comments as you suggested.

>> Do we have check builder.npatterns () must be power of 2 in somewhere?
I also added:
  /* We don't enable SLP for non-power of 2 NPATTERNS.  */
  if (!pow2p_hwi (d->perm.encoding().npatterns ()))
return false;

too.

To make sure we won't break and cause ICE.

Committed soon.


juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-06-07 10:38
To: juzhe.zh...@rivai.ai
CC: gcc-patches; Kito.cheng; palmer; palmer; jeffreyalaw; Robin Dapp; pan2.li
Subject: Re: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization
Few comments, but all comments are asking adding more comment :P
 
> @@ -398,6 +410,48 @@ rvv_builder::get_merge_scalar_mask (unsigned int 
> index_in_pattern) const
>return gen_int_mode (mask, inner_int_mode ());
>  }
>
> +/* Return true if the variable-length vector is single step.  */
> +bool
> +rvv_builder::single_step_npatterns_p () const
 
what is single_step_npatterns? could you have more comment?
 
> +{
> +  if (nelts_per_pattern () != 3)
> +return false;
> +
> +  poly_int64 step
> += rtx_to_poly_int64 (elt (npatterns ())) - rtx_to_poly_int64 (elt (0));
> +  for (unsigned int i = 0; i < npatterns (); i++)
> +{
> +  poly_int64 ele0 = rtx_to_poly_int64 (elt (i));
> +  poly_int64 ele1 = rtx_to_poly_int64 (elt (npatterns () + i));
> +  poly_int64 ele2 = rtx_to_poly_int64 (elt (npatterns () * 2 + i));
> +  poly_int64 diff1 = ele1 - ele0;
> +  poly_int64 diff2 = ele2 - ele1;
> +  if (maybe_ne (step, diff1) || maybe_ne (step, diff2))
> +   return false;
> +}
> +  return true;
> +}
> +
> +/* Return true if all elements of NPATTERNS are equal.
> +
> +   E.g. NPATTERNS = 4:
> + { 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, ... }
> +   E.g. NPATTERNS = 8:
> + { 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, ... }
> +*/
> +bool
> +rvv_builder::npatterns_all_equal_p () const
> +{
> +  poly_int64 ele0 = rtx_to_poly_int64 (elt (0));
> +  for (unsigned int i = 1; i < npatterns (); i++)
> +{
> +  poly_int64 ele = rtx_to_poly_int64 (elt (i));
> +  if (!known_eq (ele, ele0))
> +   return false;
> +}
> +  return true;
> +}
> +
>  static unsigned
>  get_sew (machine_mode mode)
>  {
> @@ -425,7 +479,7 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT 
> minval,
> future.  */
>
>  static bool
> -const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT 
> maxval)
> +const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval)
>  {
>if (!CONST_VECTOR_P (vec)
>|| GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
> @@ -440,8 +494,10 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, 
> HOST_WIDE_INT maxval)
>for (int i = 0; i < nunits; i++)
>  {
>rtx vec_elem = CONST_VECTOR_ELT (vec, i);
> -  if (!CONST_INT_P (vec_elem)
> - || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
> +  poly_int64 value;
> +  if (!poly_int_rtx_p (vec_elem, )
> + || maybe_lt (value, minval)
> + || maybe_gt (value, maxval))
> return false;
>  }
>return true;
> @@ -453,7 +509,7 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, 
> HOST_WIDE_INT maxval)
> future.  */
>
>  static rtx
> -gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
> +gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
>rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
>return gen_const_vec_duplicate (mode, c);
> @@ -727,7 +783,10 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
>rtx elt;
>insn_code icode;
>machine_mode data_mode = GET_MODE (target);
> -  if (const_vec_duplicate_p (sel, ))
> +  machine_mode sel_mode = GET_MODE (sel);
> +  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
> +icode = code_for_pred_gatherei16 (data_mode);
> +  else if (const_vec_duplicate_p (sel, ))
>  {
>icode = code_for_pred_gather_scalar (data_mode);
>sel = elt;
> @@ -744,7 +803,10 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, 
> rtx sel, rtx mask)
>rtx elt;
>insn_code icode;
>machine_mode data_mode = GET_MODE (target);
> -  if (const_vec_duplicate_p (sel, ))
> +  machine_mode sel_mode = GET_MODE (sel);
> +  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
> +icode = code_for_pred_gatherei16 (data_mode);
> +  else if (const_vec_duplicate_p (sel, ))
>  {
>icode = code_for_pred_gather_scalar (data_mode);
>sel = elt;
> @@ -895,11 +957,130 @@ expand_const_vector (rtx target, rtx src)
>return;
>  }
>
> -  /* TODO: We only support const duplicate vector for now. More cases
> - will be supported when we support auto-vectorization:
> +  /* Handle variable-length vector.  */
> +  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
> +  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
> +  rvv_builder builder (mode, npatterns, 

[PATCH V2] RISC-V: Support RVV VLA SLP auto-vectorization

2023-06-06 Thread juzhe . zhong
From: Juzhe-Zhong 

This patch enables basic VLA SLP auto-vectorization.
Consider this following case:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8 + 0] = b[i * 8 + 7] + 1;
  a[i * 8 + 1] = b[i * 8 + 7] + 2;
  a[i * 8 + 2] = b[i * 8 + 7] + 8;
  a[i * 8 + 3] = b[i * 8 + 7] + 4;
  a[i * 8 + 4] = b[i * 8 + 7] + 5;
  a[i * 8 + 5] = b[i * 8 + 7] + 6;
  a[i * 8 + 6] = b[i * 8 + 7] + 7;
  a[i * 8 + 7] = b[i * 8 + 7] + 3;
}
}

To enable VLA SLP auto-vectorization, we should be able to handle this 
following const vector:

1. NPATTERNS = 8, NELTS_PER_PATTERN = 3.
{ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 
16, ... }

2. NPATTERNS = 8, NELTS_PER_PATTERN = 1. 
{ 1, 2, 8, 4, 5, 6, 7, 3, ... }

And these vector can be generated at prologue.

After this patch, we end up with this following codegen:

Prologue:
...
vsetvli a7,zero,e16,m2,ta,ma
vid.v   v4
vsrl.vi v4,v4,3
li  a3,8
vmul.vx v4,v4,a3  ===> v4 = { 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 
8, 8, 16, 16, 16, 16, 16, 16, 16, 16, ... }
...
li  t1,67633152
addit1,t1,513
li  a3,50790400
addia3,a3,1541
sllia3,a3,32
add a3,a3,t1
vsetvli t1,zero,e64,m1,ta,ma
vmv.v.x v3,a3   ===> v3 = { 1, 2, 8, 4, 5, 6, 7, 3, ... }
...
LoopBody:
...
min a3,...
vsetvli zero,a3,e8,m1,ta,ma
vle8.v  v2,0(a6)
vsetvli a7,zero,e8,m1,ta,ma
vrgatherei16.vv v1,v2,v4
vadd.vv v1,v1,v3
vsetvli zero,a3,e8,m1,ta,ma
vse8.v  v1,0(a2)
add a6,a6,a4
add a2,a2,a4
mv  a3,a5
add a5,a5,t1
bgtua3,a4,.L3
...

Note: we need to use "vrgatherei16.vv" instead of "vrgather.vv" for SEW = 8 
since "vrgatherei16.vv" can cover larger
  range than "vrgather.vv" (which only can maximum element index = 255).
Epilogue:
lbu a5,799(a1)
addiw   a4,a5,1
sb  a4,792(a0)
addiw   a4,a5,2
sb  a4,793(a0)
addiw   a4,a5,8
sb  a4,794(a0)
addiw   a4,a5,4
sb  a4,795(a0)
addiw   a4,a5,5
sb  a4,796(a0)
addiw   a4,a5,6
sb  a4,797(a0)
addiw   a4,a5,7
sb  a4,798(a0)
addiw   a5,a5,3
sb  a5,799(a0)
ret

There is one more last thing we need to do is the "Epilogue auto-vectorization" 
which needs VLS modes support.
I will support VLS modes for "Epilogue auto-vectorization" in the future.

gcc/ChangeLog:

* config/riscv/riscv-protos.h (expand_vec_perm_const): New function.
* config/riscv/riscv-v.cc 
(rvv_builder::can_duplicate_repeating_sequence_p): Support POLY handling.
(rvv_builder::single_step_npatterns_p): New function.
(rvv_builder::npatterns_all_equal_p): Ditto.
(const_vec_all_in_range_p): Support POLY handling.
(gen_const_vector_dup): Ditto.
(emit_vlmax_gather_insn): Add vrgatherei16.
(emit_vlmax_masked_gather_mu_insn): Ditto.
(expand_const_vector): Add VLA SLP const vector support.
(expand_vec_perm): Support POLY.
(struct expand_vec_perm_d): New struct.
(shuffle_generic_patterns): New function.
(expand_vec_perm_const_1): Ditto.
(expand_vec_perm_const): Ditto.
* config/riscv/riscv.cc (riscv_vectorize_vec_perm_const): Ditto.
(TARGET_VECTORIZE_VEC_PERM_CONST): New targethook.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/autovec/scalable-1.c: Adapt testcase for VLA 
vectorizer.
* gcc.target/riscv/rvv/autovec/v-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64d-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64f-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-2.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-3.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-4.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-5.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-6.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-7.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-2.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-3.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp_run-4.c: New test.
* 

Re: [PATCH] Fortran: add Fortran 2018 IEEE_{MIN,MAX} functions

2023-06-06 Thread Steve Kargl via Gcc-patches
On Tue, Jun 06, 2023 at 09:35:26PM +0200, FX Coudert wrote:
> Hi Steve,
> 
> I am not subscribed to the list (too little time, sadly), please keep me in 
> CC of your responses.
> 

Unfortunately, fx is using a gmail account.  Emails from my
system to @gmail.com users are routinely and silently rejected.


> > 1. You added fmin, fmax, and friends.  Are these used 
> >internally by gfortran in support of the IEEE_*
> >functions or are these exposed to the user?
> 
> The math builtins are added to the front-end, and use for
> code generation.  In conv_intrinsic_ieee_minmax(), you can
> see we find the right function using things like:
> builtin_decl_explicit (BUILT_IN_ISSIGNALING)


This answers my question 1.  I think the patch can be committed
after you've given time for Harald to answer your question about
REAL(17).  I think isn't important as all of the other ieee
testcase would be broken on powerpc.

-- 
Steve


[Bug target/97784] Expressions evaluated as long chain instead of as tree or the like

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=97784

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |NEW
 Ever confirmed|0   |1
   Last reconfirmed||2023-06-07

[Bug rtl-optimization/68274] __builtin_unreachable pessimizes code

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68274

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |13.0
 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #4 from Andrew Pinski  ---
Fixed in GCC 13 by vrp2 removing the __builtin_unreachable at that point and
allowing a later phiopt to do the job.

[Bug middle-end/110117] [14 Regression] ICE on valid code at -O1 with "-ftree-vrp -fno-tree-ccp -fno-tree-forwprop": in as_a, at machmode.h:381

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110117

Andrew Pinski  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #10 from Andrew Pinski  ---
Fixed.

[Bug middle-end/110117] [14 Regression] ICE on valid code at -O1 with "-ftree-vrp -fno-tree-ccp -fno-tree-forwprop": in as_a, at machmode.h:381

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110117

--- Comment #9 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:3f085e45755643f13d4fa45a12a6ade45be98f95

commit r14-1601-g3f085e45755643f13d4fa45a12a6ade45be98f95
Author: Andrew Pinski 
Date:   Sun Jun 4 19:42:08 2023 -0700

Handle const_int in expand_single_bit_test

After expanding directly to rtl instead of
creating a tree, we could end up with
a const_int which is not ready to be handled
by extract_bit_field.
So need to the constant folding here instead.

OK? bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/110117

gcc/ChangeLog:

* expr.cc (expand_single_bit_test): Handle
const_int from expand_expr.

gcc/testsuite/ChangeLog:

* gcc.dg/pr110117-1.c: New test.
* gcc.dg/pr110117-2.c: New test.

[Bug middle-end/110117] [14 Regression] ICE on valid code at -O1 with "-ftree-vrp -fno-tree-ccp -fno-tree-forwprop": in as_a, at machmode.h:381

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110117

--- Comment #8 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:e60593f3881c72a96a3fa4844d73e8a2cd14f670

commit r14-1600-ge60593f3881c72a96a3fa4844d73e8a2cd14f670
Author: Andrew Pinski 
Date:   Sun Jun 4 19:21:05 2023 -0700

Improve do_store_flag for single bit when there is no non-zero bits

In r14-1534-g908e5ab5c11c, I forgot you could turn off CCP or
turn off the bit tracking part of CCP so we would lose out
what TER was able to do before hand. This moves around the
TER code so that it is used instead of just the nonzerobits.
It also makes it easier to remove the TER part of the code
later on too.

OK? Bootstrapped and tested on x86_64-linux-gnu.

Note it reintroduces PR 110117 (which was accidently fixed after
r14-1534-g908e5ab5c11c). The next patch in series will fix that.

gcc/ChangeLog:

* expr.cc (do_store_flag): Rearrange the
TER code so that it overrides the nonzero bits
info if we had `a & POW2`.

RE: [PATCH v1] RISC-V: Refactor ZVFHMIN to separated iterator and pattern

2023-06-06 Thread Li, Pan2 via Gcc-patches
Update the PATCH v3 with rvv.exp/riscv.exp all passed as below.

https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620855.html

Pan

-Original Message-
From: Gcc-patches  On Behalf 
Of Li, Pan2 via Gcc-patches
Sent: Tuesday, June 6, 2023 11:34 PM
To: 钟居哲 ; gcc-patches 
Cc: kito.cheng ; Wang, Yanzhang 
Subject: RE: [PATCH v1] RISC-V: Refactor ZVFHMIN to separated iterator and 
pattern

Update the PATCH V2 as below.

https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620787.html

Pan

From: Li, Pan2
Sent: Tuesday, June 6, 2023 10:34 PM
To: 钟居哲 ; gcc-patches 
Cc: kito.cheng ; Wang, Yanzhang 
Subject: RE: [PATCH v1] RISC-V: Refactor ZVFHMIN to separated iterator and 
pattern

IMO, TARGET_ZVFH || TARGET_ZVFHMIN may be a little readable compares to FP_16, 
or some context I missed.
Anyway as we discussed offline, will refine this part and add zvfh part in V2.

Pan

From: 钟居哲 mailto:juzhe.zh...@rivai.ai>>
Sent: Tuesday, June 6, 2023 10:07 PM
To: Li, Pan2 mailto:pan2...@intel.com>>; gcc-patches 
mailto:gcc-patches@gcc.gnu.org>>
Cc: kito.cheng mailto:kito.ch...@sifive.com>>; Li, Pan2 
mailto:pan2...@intel.com>>; Wang, Yanzhang 
mailto:yanzhang.w...@intel.com>>
Subject: Re: [PATCH v1] RISC-V: Refactor ZVFHMIN to separated iterator and 
pattern

+(define_mode_iterator V_ZVFHMIN_SF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN >= 128")
+])

why not just use "TARGET_VECTOR_ELEN_FP_16"
instead of TARGET_ZVFH || TARGET_ZVFHMIN ?



juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-06-06 20:36
To: gcc-patches
CC: juzhe.zhong; 
kito.cheng; pan2.li; 
yanzhang.wang
Subject: [PATCH v1] RISC-V: Refactor ZVFHMIN to separated iterator and pattern
From: Pan Li mailto:pan2...@intel.com>>

This patch would like to refactor the ZVFHMIN implementation by separated 
iterator and pattern. Thus, we can tell the sub extension between the ZVFHMIN 
and ZVFH.

Please note the ZVFH will cover the ZVFHMIN instructions. This patch add one 
test for this.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>

gcc/ChangeLog:

* config/riscv/vector-iterators.md: Move ZVFHMIN related items to separated 
iterators.
* config/riscv/vector.md (@pred_extend): New pattern for the ZVFHMIN 
instruction.
(@pred_trunc): Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: New test.
---
gcc/config/riscv/vector-iterators.md  | 28 -
gcc/config/riscv/vector.md| 40 +++
.../riscv/rvv/base/zvfh-over-zvfhmin.c| 25 
3 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index f4946d84449..986195489f2 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -497,13 +497,6 @@ (define_mode_iterator VWEXTI [
])
(define_mode_iterator VWEXTF [
-  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
-  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
-  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
-
   (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
   (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
   (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
@@ -511,6 +504,15 @@ (define_mode_iterator VWEXTF [
   (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
])
+(define_mode_iterator V_ZVFHMIN_SF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || 
+TARGET_ZVFHMIN)")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && (TARGET_ZVFH || TARGET_ZVFHMIN) 
+&& TARGET_MIN_VLEN >= 128")
+])
+
(define_mode_iterator VWCONVERTI [
   (VNx1SI "TARGET_MIN_VLEN < 128 && TARGET_VECTOR_ELEN_FP_16")
   (VNx2SI "TARGET_VECTOR_ELEN_FP_16")
@@ -1175,12 +1177,19 @@ (define_mode_attr 

Re: [RFA] Improve strcmp expansion when one input is a constant string.

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/6/23 00:47, Richard Biener wrote:



I wonder if there's some more generic target macro we can key the
behavior off - SLOW_BYTE_ACCESS isn't a good fit, WORD_REGISTER_OPERATIONS
is maybe closer but it's exact implications are unknown to me.  Maybe
there's something else as well ...
LOAD_EXTEND_OP might help here, at least on some targets.  Though not on 
x86.




The point about OPTAB_WIDEN above was that I wonder why we
extend 'op0' and 'op1' before emitting the binop when we allow WIDEN
anyway. 
Ahh.  I misunderstood.  However, I think dropping the pre-widening will 
result in byte ops on x86 which may not be wise given the partial 
register stall problem that exists on some variants.




 Yes, we want the result in 'mode' (but why?  As you say we

can extend at the end) and there's likely no way to tell expand_simple_binop
to "expand as needed and not narrow the result".  So I wonder if we should
emulate that somehow (also taking into consideration the compare).
That's what I felt I was starting to build.  Essentially looking at 
costing (and probably other stuff eventually, like the ability to 
compare/branch on narrower modes) to make a determination about whether 
or not to do the operations in narrow or wider modes.  With the costing 
so mucked up on x86 though, I'm hesitant to pursue this path further at 
this time.


Jeff


[Bug tree-optimization/14753] [tree-ssa] some missed forward propagation opportunities

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=14753
Bug 14753 depends on bug 110134, which changed state.

Bug 110134 Summary: [10/11/12/13/14 Regression] (-unsigned1) != CST is not 
optimized to unsigned1 != CST at the gimple level
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110134

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug tree-optimization/110134] [10/11/12/13/14 Regression] (-unsigned1) != CST is not optimized to unsigned1 != CST at the gimple level

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110134

Andrew Pinski  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
   Target Milestone|10.5|14.0
 Resolution|--- |FIXED

--- Comment #6 from Andrew Pinski  ---
Fixed on the trunk for GCC 14, the performance regression due to this bug I
doubt is noticable since it had not been noticed since GCC 6 even (well PR
14753 noticed it but it was similar to this one, it was not noticed in real
code yet).

[PATCH v3] RISC-V: Refactor requirement of ZVFH and ZVFHMIN.

2023-06-06 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to refactor the requirement of both the ZVFH
and ZVFHMIN. The related define_insn and iterator will take the
requirement based on the ZVFHMIN and ZVFH.

Please note the ZVFH will cover the ZVFHMIN instructions. This patch
add one test for this.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/vector-iterators.md: Add requirement to VF,
VWEXTF and VWCONVERTI, add V_CONVERT_F and VCONVERTF.
* config/riscv/vector.md: Adjust FP convert to V_CONVERT_F
and VCONVERTF.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c: New test.
---
 gcc/config/riscv/vector-iterators.md  | 68 +--
 gcc/config/riscv/vector.md| 46 ++---
 .../riscv/rvv/base/zvfh-over-zvfhmin.c| 25 +++
 3 files changed, 97 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvfh-over-zvfhmin.c

diff --git a/gcc/config/riscv/vector-iterators.md 
b/gcc/config/riscv/vector-iterators.md
index f4946d84449..1dc82bd44d3 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -296,13 +296,13 @@ (define_mode_iterator VWI_ZVE32 [
 ])
 
 (define_mode_iterator VF [
-  (VNx1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
-  (VNx2HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx8HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx16HF "TARGET_VECTOR_ELEN_FP_16")
-  (VNx32HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
-  (VNx64HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+  (VNx1HF "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2HF "TARGET_ZVFH")
+  (VNx4HF "TARGET_ZVFH")
+  (VNx8HF "TARGET_ZVFH")
+  (VNx16HF "TARGET_ZVFH")
+  (VNx32HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx64HF "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
 
   (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
   (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
@@ -497,12 +497,12 @@ (define_mode_iterator VWEXTI [
 ])
 
 (define_mode_iterator VWEXTF [
-  (VNx1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN < 128")
-  (VNx2SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx4SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx8SF "TARGET_VECTOR_ELEN_FP_32")
-  (VNx16SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
-  (VNx32SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+  (VNx1SF "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_ZVFH")
+  (VNx4SF "TARGET_ZVFH")
+  (VNx8SF "TARGET_ZVFH")
+  (VNx16SF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
 
   (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
   (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
@@ -512,12 +512,12 @@ (define_mode_iterator VWEXTF [
 ])
 
 (define_mode_iterator VWCONVERTI [
-  (VNx1SI "TARGET_MIN_VLEN < 128 && TARGET_VECTOR_ELEN_FP_16")
-  (VNx2SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx4SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx8SI "TARGET_VECTOR_ELEN_FP_16")
-  (VNx16SI "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (VNx32SI "TARGET_MIN_VLEN >= 128 && TARGET_VECTOR_ELEN_FP_16")
+  (VNx1SI "TARGET_ZVFH && TARGET_MIN_VLEN < 128")
+  (VNx2SI "TARGET_ZVFH")
+  (VNx4SI "TARGET_ZVFH")
+  (VNx8SI "TARGET_ZVFH")
+  (VNx16SI "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (VNx32SI "TARGET_ZVFH && TARGET_MIN_VLEN >= 128")
 
   (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && 
TARGET_MIN_VLEN < 128")
   (VNx2DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
@@ -526,6 +526,21 @@ (define_mode_iterator VWCONVERTI [
   (VNx16DI "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32 && 
TARGET_MIN_VLEN >= 128")
 ])
 
+(define_mode_iterator VCONVERTF [
+  (VNx1SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN < 128")
+  (VNx2SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx4SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx8SF "TARGET_VECTOR_ELEN_FP_16")
+  (VNx16SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (VNx32SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN >= 128")
+
+  (VNx1DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN < 128")
+  (VNx2DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx4DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx8DF "TARGET_VECTOR_ELEN_FP_64")
+  (VNx16DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+])
+
 (define_mode_iterator VQEXTI [
   (VNx1SI "TARGET_MIN_VLEN < 128") VNx2SI VNx4SI VNx8SI (VNx16SI 
"TARGET_MIN_VLEN > 32") (VNx32SI "TARGET_MIN_VLEN >= 128")
   (VNx1DI "TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN < 128") (VNx2DI 
"TARGET_VECTOR_ELEN_64")
@@ -1181,6 +1196,21 @@ (define_mode_attr V_DOUBLE_TRUNC [
   (VNx16DF "VNx16SF")
 ])
 
+(define_mode_attr V_CONVERT_F [
+  (VNx1SF "VNx1HF")
+  (VNx2SF "VNx2HF")
+  (VNx4SF "VNx4HF")
+  (VNx8SF "VNx8HF")
+  (VNx16SF "VNx16HF")
+  (VNx32SF "VNx32HF")
+
+  (VNx1DF "VNx1SF")
+  (VNx2DF "VNx2SF")
+  (VNx4DF "VNx4SF")
+  (VNx8DF "VNx8SF")
+  (VNx16DF "VNx16SF")
+])
+
 (define_mode_attr V_QUAD_TRUNC [
   (VNx1SI "VNx1QI") (VNx2SI "VNx2QI") (VNx4SI "VNx4QI") (VNx8SI 

[Bug tree-optimization/110134] [10/11/12/13/14 Regression] (-unsigned1) != CST is not optimized to unsigned1 != CST at the gimple level

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110134

--- Comment #5 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:cc155ff9c38848a8e6a7125dd0b66ac0aef47880

commit r14-1599-gcc155ff9c38848a8e6a7125dd0b66ac0aef47880
Author: Andrew Pinski 
Date:   Mon Jun 5 19:12:43 2023 -0700

For the `-A CMP -B -> B CMP A` pattern allow EQ/NE for all integer types

I noticed while looking at some code generation issue, that forwprop
was not handling `-a == 0` for unsigned types and I was confused why
it was not.
r6-1814-g66e1cacf608045 removed these from fold because they
were supposed to be already handled by the match.pd patterns
but it was missed that the match.pd patterns checked
TYPE_OVERFLOW_UNDEFINED while fold didn't do that for NE/EQ.
This patch removes the restriction on NE/EQ on TYPE_OVERFLOW_UNDEFINED.

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/110134
* match.pd (-A CMP -B -> B CMP A): Allow EQ/NE for all integer
types.
(-A CMP CST -> B CMP (-CST)): Likewise.

gcc/testsuite/ChangeLog:

PR tree-optimization/110134
* gcc.dg/tree-ssa/negneq-1.c: New test.
* gcc.dg/tree-ssa/negneq-2.c: New test.
* gcc.dg/tree-ssa/negneq-3.c: New test.
* gcc.dg/tree-ssa/negneq-4.c: New test.

Re: [PATCH 2/2] Handle const_int in expand_single_bit_test

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/4/23 23:53, Andrew Pinski via Gcc-patches wrote:

After expanding directly to rtl instead of
creating a tree, we could end up with
a const_int which is not ready to be handled
by extract_bit_field.
So need to the constant folding here instead.

OK? bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR middle-end/110117

gcc/ChangeLog:

* expr.cc (expand_single_bit_test): Handle
const_int from expand_expr.

gcc/testsuite/ChangeLog:

* gcc.dg/pr110117-1.c: New test.
* gcc.dg/pr110117-2.c: New test.
---
  gcc/expr.cc   | 10 +++---
  gcc/testsuite/gcc.dg/pr110117-1.c | 31 +++
  gcc/testsuite/gcc.dg/pr110117-2.c |  7 +++
  3 files changed, 45 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/gcc.dg/pr110117-1.c
  create mode 100644 gcc/testsuite/gcc.dg/pr110117-2.c

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ca008cd453e..868d812eb1a 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -12958,12 +12958,16 @@ expand_single_bit_test (location_t loc, enum 
tree_code code,
  
rtx inner0 = expand_expr (inner, NULL_RTX, VOIDmode, EXPAND_NORMAL);
  
+  if (CONST_SCALAR_INT_P (inner0))

+{
+  wide_int t = rtx_mode_t (inner0, operand_mode);
+  bool setp = (wi::lrshift(t, bitnum) & 1) != 0;

Formatting nit.  Space before the open paren for wi::lrshift's args.

OK with that change.
jeff


[Bug tree-optimization/110151] New: warning: 'strncpy' output truncated copying 10 bytes from a string of length 26 [-Wstringop-truncation]

2023-06-06 Thread yinyuefengyi at gmail dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110151

Bug ID: 110151
   Summary: warning: 'strncpy' output truncated copying 10 bytes
from a string of length 26 [-Wstringop-truncation]
   Product: gcc
   Version: 14.0
Status: UNCONFIRMED
  Severity: normal
  Priority: P3
 Component: tree-optimization
  Assignee: unassigned at gcc dot gnu.org
  Reporter: yinyuefengyi at gmail dot com
  Target Milestone: ---

For the below two cases(https://godbolt.org/z/5rbMTeqW9), are they false
positive warnings seem unnecessary since:

for foo1:
 memset has cleared the memory;

for foo2:
 though 'dest[11] = '\0';' is not the 'immediately' next_stmt after strncpy but
it does setting the last element to nul? 

#include 
#include 

int foo1 () {
char src[40];
char dest[12];

memset(dest, '\0', sizeof(dest));
strcpy(src, "This is tutorialspoint.com");
strncpy(dest, src, 10);

printf("%s", dest);
return(0);
}

char a;
int foo2 () {
char src[40];
char dest[12];

strcpy(src, "This is tutorialspoint.com");
strncpy(dest, src, 10);
a = dest[0];
dest[11] = '\0';

printf("%s", dest);
return(0);
}

Re: [PATCH 1/2] Improve do_store_flag for single bit when there is no non-zero bits

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/4/23 23:53, Andrew Pinski via Gcc-patches wrote:

In r14-1534-g908e5ab5c11c, I forgot you could turn off CCP or
turn off the bit tracking part of CCP so we would lose out
what TER was able to do before hand. This moves around the
TER code so that it is used instead of just the nonzerobits.
It also makes it easier to remove the TER part of the code
later on too.

Given that we want to kill TER, that seems like a good idea :-)



OK? Bootstrapped and tested on x86_64-linux-gnu.

Note it reintroduces PR 110117 (which was accidently fixed after
r14-1534-g908e5ab5c11c). The next patch in series will fix that.

gcc/ChangeLog:

* expr.cc (do_store_flag): Rearrange the
TER code so that it overrides the nonzero bits
info if we had `a & POW2`.

OK.
jeff


Re: [PATCH] For the `-A CMP -B -> B CMP A` pattern allow EQ/NE for all integer types

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/6/23 15:07, Andrew Pinski via Gcc-patches wrote:

I noticed while looking at some code generation issue, that forwprop
was not handling `-a == 0` for unsigned types and I was confused why
it was not.
r6-1814-g66e1cacf608045 removed these from fold because they
were supposed to be already handled by the match.pd patterns
but it was missed that the match.pd patterns checked
TYPE_OVERFLOW_UNDEFINED while fold didn't do that for NE/EQ.
This patch removes the restriction on NE/EQ on TYPE_OVERFLOW_UNDEFINED.

OK? Bootstrapped and tested on x86_64-linux-gnu.

gcc/ChangeLog:

PR tree-optimization/110134
* match.pd (-A CMP -B -> B CMP A): Allow EQ/NE for all integer
types.
(-A CMP CST -> B CMP (-CST)): Likewise.

gcc/testsuite/ChangeLog:

PR tree-optimization/110134
* gcc.dg/tree-ssa/negneq-1.c: New test.
* gcc.dg/tree-ssa/negneq-2.c: New test.
* gcc.dg/tree-ssa/negneq-3.c: New test.
* gcc.dg/tree-ssa/negneq-4.c: New test.

OK.
jeff


Re: [PATCH] libiberty: writeargv: Simplify function error mode.

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/6/23 02:44, Costas Argyris wrote:

You are right, this is also a remnant of the old function design
that I completely missed.    Here is the follow-up patch for that.

Thanks for pointing it out.

Costas

On Tue, 6 Jun 2023 at 04:12, Jeff Law > wrote:




On 6/5/23 08:37, Costas Argyris via Gcc-patches wrote:
 > writeargv can be simplified by getting rid of the error exit mode
 > that was only relevant many years ago when the function used
 > to open the file descriptor internally.
[ ... ]
Thanks.  I've pushed this to the trunk.

You could (as a follow-up) simplify it even further.  There's no need
for the status variable as far as I can tell.  You could just have the
final return be "return 0;" instead of "return status;".

Jeff


0001-libiberty-writeargv-Remove-unnecessary-status-variab.patch

 From 13fdfea60eeac64e028315392614b955e998487d Mon Sep 17 00:00:00 2001
From: Costas Argyris
Date: Tue, 6 Jun 2023 09:15:48 +0100
Subject: [PATCH] libiberty: writeargv: Remove unnecessary status variable.

Follow-up from 4d1e4ce986f pointed out by jlaw.

Signed-off-by: Costas Argyris
Thanks.  I created a ChangeLog entry and committed this change to the trunk.

Jeff


[Bug tree-optimization/105903] Missed optimization for __synth3way

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105903

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|ASSIGNED|RESOLVED
   Target Milestone|--- |14.0

--- Comment #8 from Andrew Pinski  ---
Fixed.

[Bug tree-optimization/105903] Missed optimization for __synth3way

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105903
Bug 105903 depends on bug 89263, which changed state.

Bug 89263 Summary: Simplify bool expression to OR
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug tree-optimization/94898] Failure to optimize compare plus sub of same operands into compare

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94898
Bug 94898 depends on bug 89263, which changed state.

Bug 89263 Summary: Simplify bool expression to OR
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug tree-optimization/20083] Missed optimization with conditional and basically ||

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=20083
Bug 20083 depends on bug 89263, which changed state.

Bug 89263 Summary: Simplify bool expression to OR
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug tree-optimization/89263] Simplify bool expression to OR

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

Andrew Pinski  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED
   Target Milestone|--- |14.0

--- Comment #12 from Andrew Pinski  ---
Fixed.

[Bug tree-optimization/20083] Missed optimization with conditional and basically ||

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=20083

Andrew Pinski  changed:

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED
   Target Milestone|--- |14.0

--- Comment #9 from Andrew Pinski  ---
Fixed finally (after 18 years) of messing around with PHI-OPT :).

[Bug middle-end/19987] [meta-bug] fold missing optimizations in general

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=19987
Bug 19987 depends on bug 94898, which changed state.

Bug 94898 Summary: Failure to optimize compare plus sub of same operands into 
compare
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94898

   What|Removed |Added

 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

[Bug tree-optimization/94898] Failure to optimize compare plus sub of same operands into compare

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94898

Andrew Pinski  changed:

   What|Removed |Added

   Target Milestone|--- |14.0
 Status|ASSIGNED|RESOLVED
 Resolution|--- |FIXED

--- Comment #7 from Andrew Pinski  ---
Fixed.

[Bug tree-optimization/94898] Failure to optimize compare plus sub of same operands into compare

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94898

--- Comment #6 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:64d90d06d2db43538c8a45adbb3d74842f7868ae

commit r14-1597-g64d90d06d2db43538c8a45adbb3d74842f7868ae
Author: Andrew Pinski 
Date:   Wed May 24 07:08:45 2023 +

Add match patterns for `a ? onezero : onezero` where one of the two
operands are constant

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than
ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets
`x & y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

[Bug tree-optimization/20083] Missed optimization with conditional and basically ||

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=20083

--- Comment #8 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:64d90d06d2db43538c8a45adbb3d74842f7868ae

commit r14-1597-g64d90d06d2db43538c8a45adbb3d74842f7868ae
Author: Andrew Pinski 
Date:   Wed May 24 07:08:45 2023 +

Add match patterns for `a ? onezero : onezero` where one of the two
operands are constant

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than
ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets
`x & y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

[Bug tree-optimization/109957] Missing loop PHI optimization

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109957

--- Comment #2 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:64d90d06d2db43538c8a45adbb3d74842f7868ae

commit r14-1597-g64d90d06d2db43538c8a45adbb3d74842f7868ae
Author: Andrew Pinski 
Date:   Wed May 24 07:08:45 2023 +

Add match patterns for `a ? onezero : onezero` where one of the two
operands are constant

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than
ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets
`x & y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

[Bug tree-optimization/99069] Failure to optimize bool selection pattern

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99069

--- Comment #4 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:64d90d06d2db43538c8a45adbb3d74842f7868ae

commit r14-1597-g64d90d06d2db43538c8a45adbb3d74842f7868ae
Author: Andrew Pinski 
Date:   Wed May 24 07:08:45 2023 +

Add match patterns for `a ? onezero : onezero` where one of the two
operands are constant

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than
ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets
`x & y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

[Bug tree-optimization/89263] Simplify bool expression to OR

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

--- Comment #11 from CVS Commits  ---
The trunk branch has been updated by Andrew Pinski :

https://gcc.gnu.org/g:64d90d06d2db43538c8a45adbb3d74842f7868ae

commit r14-1597-g64d90d06d2db43538c8a45adbb3d74842f7868ae
Author: Andrew Pinski 
Date:   Wed May 24 07:08:45 2023 +

Add match patterns for `a ? onezero : onezero` where one of the two
operands are constant

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than
ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets
`x & y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

Re: [PATCH] riscv: Fix scope for memory model calculation

2023-06-06 Thread Andrew Pinski via Gcc-patches
On Mon, Jun 5, 2023 at 9:52 PM Dimitar Dimitrov  wrote:
>
> During libgcc configure stage for riscv32-none-elf, when
> "--enable-checking=yes,rtl" has been activated, the following error
> is observed:
>
>   configure:3814: 
> /home/dinux/projects/pru/local-workspace/riscv32-gcc-build/./gcc/xgcc 
> -B/home/dinux/projects/pru/local-workspace/riscv32-gcc-build/./gcc/ 
> -B/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/bin/ 
> -B/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/lib/ -isystem 
> /mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/include -isystem 
> /mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/sys-include
> -c -g -O2  conftest.c >&5
>   during RTL pass: final
>   conftest.c: In function 'main':
>   conftest.c:16:1: internal compiler error: RTL check: expected code 
> 'const_int', have 'reg' in riscv_print_operand, at config/riscv/riscv.cc:4462

Note this is recorded as https://gcc.gnu.org/PR109725 .

Thanks,
Andrew Pinski

>  16 | }
> | ^
>   0x843c4d rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, 
> char const*)
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/rtl.cc:916
>   0x8ea823 riscv_print_operand
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv.cc:4462
>   0xde84b5 output_operand(rtx_def*, int)
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3632
>   0xde8ef8 output_asm_insn(char const*, rtx_def**)
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3544
>   0xded33b output_asm_insn(char const*, rtx_def**)
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3421
>   0xded33b final_scan_insn_1
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:2841
>   0xded6cb final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:2887
>   0xded8b7 final_1
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:1979
>   0xdee518 rest_of_handle_final
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:4240
>   0xdee518 execute
>   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:4318
>
> Fix by moving the calculation of memmodel to the cases where it is used.
>
> Regression tested for riscv32-none-elf. No changes in gcc.sum and
> g++.sum.  I don't have setup to test riscv64.
>
> gcc/ChangeLog:
>
> * config/riscv/riscv.cc (riscv_print_operand): Calculate
> memmodel only when it is valid.
>
> Signed-off-by: Dimitar Dimitrov 
> ---
>  gcc/config/riscv/riscv.cc | 13 +
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
> index c15da1d0e30..fa4bc3e1f7e 100644
> --- a/gcc/config/riscv/riscv.cc
> +++ b/gcc/config/riscv/riscv.cc
> @@ -4459,7 +4459,6 @@ riscv_print_operand (FILE *file, rtx op, int letter)
>  }
>machine_mode mode = GET_MODE (op);
>enum rtx_code code = GET_CODE (op);
> -  const enum memmodel model = memmodel_base (INTVAL (op));
>
>switch (letter)
>  {
> @@ -4596,7 +4595,8 @@ riscv_print_operand (FILE *file, rtx op, int letter)
>fputs (GET_RTX_NAME (code), file);
>break;
>
> -case 'A':
> +case 'A': {
> +  const enum memmodel model = memmodel_base (INTVAL (op));
>if (riscv_memmodel_needs_amo_acquire (model)
>   && riscv_memmodel_needs_amo_release (model))
> fputs (".aqrl", file);
> @@ -4605,18 +4605,23 @@ riscv_print_operand (FILE *file, rtx op, int letter)
>else if (riscv_memmodel_needs_amo_release (model))
> fputs (".rl", file);
>break;
> +}
>
> -case 'I':
> +case 'I': {
> +  const enum memmodel model = memmodel_base (INTVAL (op));
>if (model == MEMMODEL_SEQ_CST)
> fputs (".aqrl", file);
>else if (riscv_memmodel_needs_amo_acquire (model))
> fputs (".aq", file);
>break;
> +}
>
> -case 'J':
> +case 'J': {
> +  const enum memmodel model = memmodel_base (INTVAL (op));
>if (riscv_memmodel_needs_amo_release (model))
> fputs (".rl", file);
>break;
> +}
>
>  case 'i':
>if (code != REG)
> --
> 2.40.1
>


Re: [PATCH] riscv: Fix insn cost calculation

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/5/23 22:51, Dimitar Dimitrov wrote:

When building riscv32-none-elf with "--enable-checking=yes,rtl", the
following ICE is observed:

   cc1: internal compiler error: RTL check: expected code 'const_int', have 
'const_double' in riscv_const_insns, at config/riscv/riscv.cc:1313
   0x843c4d rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, 
char const*)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/rtl.cc:916
   0x8eab61 riscv_const_insns(rtx_def*)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv.cc:1313
   0x15443bb riscv_legitimate_constant_p
   /mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv.cc:826
   0xdd3c71 emit_move_insn(rtx_def*, rtx_def*)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/expr.cc:4310
   0x15f28e5 run_const_vector_selftests
   
/mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv-selftests.cc:285
   0x15f37bd selftest::riscv_run_selftests()
   
/mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv-selftests.cc:364
   0x1f6fba9 selftest::run_tests()
   /mnt/nvme/dinux/local-workspace/gcc/gcc/selftest-run-tests.cc:111
   0x11d1f39 toplev::run_self_tests()
   /mnt/nvme/dinux/local-workspace/gcc/gcc/toplev.cc:2185

Fix by following the spirit of the adjacent comment, and using the
dedicated riscv_const_insns() function to calculate cost for loading a
constant element.  Infinite recursion is not possible because the first
invocation is on a CONST_VECTOR, whereas the second is on a single
element of the vector (e.g. CONST_INT or CONST_DOUBLE).

Regression tested for riscv32-none-elf. No changes in gcc.sum and
g++.sum.  I don't have setup to test riscv64.
I think most folks are using QEMU to test.  Though I think most are 
doing it in a rv64 environment.




gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_const_insns): Recursively call
for constant element of a vector.
OK for the trunk.  I don't think this code is in gcc-13, but also OK as 
a backport if I'm wrong and it is in gcc-13.


jeff


Re: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization

2023-06-06 Thread Kito Cheng via Gcc-patches
Few comments, but all comments are asking adding more comment :P

> @@ -398,6 +410,48 @@ rvv_builder::get_merge_scalar_mask (unsigned int 
> index_in_pattern) const
>return gen_int_mode (mask, inner_int_mode ());
>  }
>
> +/* Return true if the variable-length vector is single step.  */
> +bool
> +rvv_builder::single_step_npatterns_p () const

what is single_step_npatterns? could you have more comment?

> +{
> +  if (nelts_per_pattern () != 3)
> +return false;
> +
> +  poly_int64 step
> += rtx_to_poly_int64 (elt (npatterns ())) - rtx_to_poly_int64 (elt (0));
> +  for (unsigned int i = 0; i < npatterns (); i++)
> +{
> +  poly_int64 ele0 = rtx_to_poly_int64 (elt (i));
> +  poly_int64 ele1 = rtx_to_poly_int64 (elt (npatterns () + i));
> +  poly_int64 ele2 = rtx_to_poly_int64 (elt (npatterns () * 2 + i));
> +  poly_int64 diff1 = ele1 - ele0;
> +  poly_int64 diff2 = ele2 - ele1;
> +  if (maybe_ne (step, diff1) || maybe_ne (step, diff2))
> +   return false;
> +}
> +  return true;
> +}
> +
> +/* Return true if all elements of NPATTERNS are equal.
> +
> +   E.g. NPATTERNS = 4:
> + { 2, 2, 2, 2, 4, 4, 4, 4, 8, 8, 8, 8, 16, 16, 16, 16, ... }
> +   E.g. NPATTERNS = 8:
> + { 2, 2, 2, 2, 2, 2, 2, 2, 8, 8, 8, 8, 8, 8, 8, 8, ... }
> +*/
> +bool
> +rvv_builder::npatterns_all_equal_p () const
> +{
> +  poly_int64 ele0 = rtx_to_poly_int64 (elt (0));
> +  for (unsigned int i = 1; i < npatterns (); i++)
> +{
> +  poly_int64 ele = rtx_to_poly_int64 (elt (i));
> +  if (!known_eq (ele, ele0))
> +   return false;
> +}
> +  return true;
> +}
> +
>  static unsigned
>  get_sew (machine_mode mode)
>  {
> @@ -425,7 +479,7 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT 
> minval,
> future.  */
>
>  static bool
> -const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, HOST_WIDE_INT 
> maxval)
> +const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval)
>  {
>if (!CONST_VECTOR_P (vec)
>|| GET_MODE_CLASS (GET_MODE (vec)) != MODE_VECTOR_INT)
> @@ -440,8 +494,10 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, 
> HOST_WIDE_INT maxval)
>for (int i = 0; i < nunits; i++)
>  {
>rtx vec_elem = CONST_VECTOR_ELT (vec, i);
> -  if (!CONST_INT_P (vec_elem)
> - || !IN_RANGE (INTVAL (vec_elem), minval, maxval))
> +  poly_int64 value;
> +  if (!poly_int_rtx_p (vec_elem, )
> + || maybe_lt (value, minval)
> + || maybe_gt (value, maxval))
> return false;
>  }
>return true;
> @@ -453,7 +509,7 @@ const_vec_all_in_range_p (rtx vec, HOST_WIDE_INT minval, 
> HOST_WIDE_INT maxval)
> future.  */
>
>  static rtx
> -gen_const_vector_dup (machine_mode mode, HOST_WIDE_INT val)
> +gen_const_vector_dup (machine_mode mode, poly_int64 val)
>  {
>rtx c = gen_int_mode (val, GET_MODE_INNER (mode));
>return gen_const_vec_duplicate (mode, c);
> @@ -727,7 +783,10 @@ emit_vlmax_gather_insn (rtx target, rtx op, rtx sel)
>rtx elt;
>insn_code icode;
>machine_mode data_mode = GET_MODE (target);
> -  if (const_vec_duplicate_p (sel, ))
> +  machine_mode sel_mode = GET_MODE (sel);
> +  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
> +icode = code_for_pred_gatherei16 (data_mode);
> +  else if (const_vec_duplicate_p (sel, ))
>  {
>icode = code_for_pred_gather_scalar (data_mode);
>sel = elt;
> @@ -744,7 +803,10 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, 
> rtx sel, rtx mask)
>rtx elt;
>insn_code icode;
>machine_mode data_mode = GET_MODE (target);
> -  if (const_vec_duplicate_p (sel, ))
> +  machine_mode sel_mode = GET_MODE (sel);
> +  if (maybe_ne (GET_MODE_SIZE (data_mode), GET_MODE_SIZE (sel_mode)))
> +icode = code_for_pred_gatherei16 (data_mode);
> +  else if (const_vec_duplicate_p (sel, ))
>  {
>icode = code_for_pred_gather_scalar (data_mode);
>sel = elt;
> @@ -895,11 +957,130 @@ expand_const_vector (rtx target, rtx src)
>return;
>  }
>
> -  /* TODO: We only support const duplicate vector for now. More cases
> - will be supported when we support auto-vectorization:
> +  /* Handle variable-length vector.  */
> +  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
> +  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
> +  rvv_builder builder (mode, npatterns, nelts_per_pattern);
> +  for (unsigned int i = 0; i < nelts_per_pattern; i++)
> +{
> +  for (unsigned int j = 0; j < npatterns; j++)
> +   builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
> +}
> +  builder.finalize ();
>
> -   1. multiple elts duplicate vector.
> -   2. multiple patterns with multiple elts.  */
> +  if (CONST_VECTOR_DUPLICATE_P (src))


I thought it's a predicator for a vector with same value like [a, a,
a, a,...] when I read the check
but seems like not? so could you add more comment for that?

> +{
> +  if 

Re: [PATCH] riscv: Fix scope for memory model calculation

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/5/23 22:51, Dimitar Dimitrov wrote:

During libgcc configure stage for riscv32-none-elf, when
"--enable-checking=yes,rtl" has been activated, the following error
is observed:

   configure:3814: 
/home/dinux/projects/pru/local-workspace/riscv32-gcc-build/./gcc/xgcc 
-B/home/dinux/projects/pru/local-workspace/riscv32-gcc-build/./gcc/ 
-B/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/bin/ 
-B/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/lib/ -isystem 
/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/include -isystem 
/mnt/nvme/dinux/local-workspace/riscv32-opt/riscv32-none-elf/sys-include-c -g -O2  
conftest.c >&5
   during RTL pass: final
   conftest.c: In function 'main':
   conftest.c:16:1: internal compiler error: RTL check: expected code 
'const_int', have 'reg' in riscv_print_operand, at config/riscv/riscv.cc:4462
  16 | }
 | ^
   0x843c4d rtl_check_failed_code1(rtx_def const*, rtx_code, char const*, int, 
char const*)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/rtl.cc:916
   0x8ea823 riscv_print_operand
   /mnt/nvme/dinux/local-workspace/gcc/gcc/config/riscv/riscv.cc:4462
   0xde84b5 output_operand(rtx_def*, int)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3632
   0xde8ef8 output_asm_insn(char const*, rtx_def**)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3544
   0xded33b output_asm_insn(char const*, rtx_def**)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:3421
   0xded33b final_scan_insn_1
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:2841
   0xded6cb final_scan_insn(rtx_insn*, _IO_FILE*, int, int, int*)
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:2887
   0xded8b7 final_1
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:1979
   0xdee518 rest_of_handle_final
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:4240
   0xdee518 execute
   /mnt/nvme/dinux/local-workspace/gcc/gcc/final.cc:4318

Fix by moving the calculation of memmodel to the cases where it is used.

Regression tested for riscv32-none-elf. No changes in gcc.sum and
g++.sum.  I don't have setup to test riscv64.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_print_operand): Calculate
memmodel only when it is valid.
Good to see you poking around in the RISC-V world Dimitar!  Are you 
still poking at the PRU as well?


Anyway, this is fine for the trunk and for backporting to gcc-13 if the 
problem exists there as well.


jeff


Re: [PATCH 2/2] Add match patterns for `a ? onezero : onezero` where one of the two operands are constant

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/6/23 18:17, Andrew Pinski via Gcc-patches wrote:

This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets `x & 
y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to 
disable phiopt.
   Note PR 109957 is filed for the now missing optimization in that testcase 
too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.

OK.

tbird rendered the bool0^1 as bool0 to the first power, so I was briefly 
confused before I realized what was going on.


jeff


[PATCH] LoongArch: Change jumptable's register constraint to 'q' [PR110136]

2023-06-06 Thread Lulu Cheng
If the $ra register is modified during the jump to the jump table, the hardware
branch prediction function will be broken, resulting in a significant increase
in the branch false prediction rate and affecting performance.

gcc/ChangeLog:

* config/loongarch/loongarch.md: Change register constraint to 'q'.
---
 gcc/config/loongarch/loongarch.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 816a943d155..f9b64173104 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2926,9 +2926,11 @@ (define_expand "tablejump"
   DONE;
 })
 
+;; Jump to the jump table Avoid using the $r1 register to prevent
+;; affecting hardware branch prediction.
 (define_insn "@tablejump"
   [(set (pc)
-   (match_operand:P 0 "register_operand" "r"))
+   (match_operand:P 0 "register_operand" "q"))
(use (label_ref (match_operand 1 "" "")))]
   ""
   "jr\t%0"
-- 
2.31.1



RE: Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv

2023-06-06 Thread Li, Pan2 via Gcc-patches
Committed, thanks Kito and Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Wednesday, June 7, 2023 10:26 AM
To: kito.cheng ; Li, Pan2 
Cc: gcc-patches ; Kito.cheng ; 
Wang, Yanzhang ; Robin Dapp 
Subject: Re: Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with 
rv64gcv

LGTM.


juzhe.zh...@rivai.ai

From: Kito Cheng
Date: 2023-06-07 10:22
To: pan2.li
CC: gcc-patches; 
juzhe.zhong; 
kito.cheng; 
yanzhang.wang; 
rdapp.gcc
Subject: Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv
lgtm, thanks for fixing this :)

On Wed, Jun 7, 2023 at 10:19 AM Pan Li via Gcc-patches
mailto:gcc-patches@gcc.gnu.org>> wrote:
>
> From: Pan Li mailto:pan2...@intel.com>>
>
> This patch would like to fix the incorrect requirement of the vector
> builtin types for the ZVFH/ZVFHMIN extension. The incorrect requirement
> will result in the ops mismatch with iterators, and then ICE will be
> triggered if ZVFH/ZVFHMIN is not given.
>
> Sorry for inconviensient.
>
> Signed-off-by: Pan Li mailto:pan2...@intel.com>>
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-types.def
> (vfloat32mf2_t): Take RVV_REQUIRE_ELEN_FP_16 as requirement.
> (vfloat32m1_t): Ditto.
> (vfloat32m2_t): Ditto.
> (vfloat32m4_t): Ditto.
> (vfloat32m8_t): Ditto.
> (vint16mf4_t): Ditto.
> (vint16mf2_t): Ditto.
> (vint16m1_t): Ditto.
> (vint16m2_t): Ditto.
> (vint16m4_t): Ditto.
> (vint16m8_t): Ditto.
> (vuint16mf4_t): Ditto.
> (vuint16mf2_t): Ditto.
> (vuint16m1_t): Ditto.
> (vuint16m2_t): Ditto.
> (vuint16m4_t): Ditto.
> (vuint16m8_t): Ditto.
> (vint32mf2_t): Ditto.
> (vint32m1_t): Ditto.
> (vint32m2_t): Ditto.
> (vint32m4_t): Ditto.
> (vint32m8_t): Ditto.
> (vuint32mf2_t): Ditto.
> (vuint32m1_t): Ditto.
> (vuint32m2_t): Ditto.
> (vuint32m4_t): Ditto.
> (vuint32m8_t): Ditto.
> ---
>  .../riscv/riscv-vector-builtins-types.def | 66 +--
>  1 file changed, 33 insertions(+), 33 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
> b/gcc/config/riscv/riscv-vector-builtins-types.def
> index bd3deae8340..589ea532727 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-types.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-types.def
> @@ -518,23 +518,23 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
>
> -DEF_RVV_WEXTF_OPS (vfloat32mf2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_WEXTF_OPS (vfloat32m1_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m4_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m8_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_WEXTF_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
>
> -DEF_RVV_CONVERT_I_OPS (vint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_I_OPS (vint16mf2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m1_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m4_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m8_t, TARGET_ZVFH)
> +DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
> @@ -546,12 +546,12 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
>
> -DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_U_OPS 

Re: [PATCH 1/2] Match: zero_one_valued_p should match 0 constants too

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/6/23 18:17, Andrew Pinski via Gcc-patches wrote:

While working on `bool0 ? bool1 : bool2` I noticed that
zero_one_valued_p does not match on the constant zero
as in that case tree_nonzero_bits will return 0 and
that is different from 1.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (zero_one_valued_p): Match 0 integer constant
too.

Presumably "1" is matched by the tree_nonzero_bits (@0) == 1.  So OK.

jeff


[Bug c++/98105] constexpr and unnamed namespace yields relocation R_X86_64_PC32 against undefined symbol ... can not be used when making a shared object; recompile with -fPIC

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98105

--- Comment #3 from Andrew Pinski  ---
Reduced:
```
void f(const int&);

namespace {
  struct myFactory
  {
static constexpr int s_sList = 1;
  };
}
//constexpr int myFactory::s_sList;
void sdi_register_model()
{
  f(myFactory::s_sList);
}
```

If we uncomment the definition, it works.

I think this is correct behavior, just we should produce a better error message
of not have the definition and only the declaration here.

Re: Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv

2023-06-06 Thread juzhe.zh...@rivai.ai
LGTM.



juzhe.zh...@rivai.ai
 
From: Kito Cheng
Date: 2023-06-07 10:22
To: pan2.li
CC: gcc-patches; juzhe.zhong; kito.cheng; yanzhang.wang; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv
lgtm, thanks for fixing this :)
 
On Wed, Jun 7, 2023 at 10:19 AM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to fix the incorrect requirement of the vector
> builtin types for the ZVFH/ZVFHMIN extension. The incorrect requirement
> will result in the ops mismatch with iterators, and then ICE will be
> triggered if ZVFH/ZVFHMIN is not given.
>
> Sorry for inconviensient.
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-types.def
> (vfloat32mf2_t): Take RVV_REQUIRE_ELEN_FP_16 as requirement.
> (vfloat32m1_t): Ditto.
> (vfloat32m2_t): Ditto.
> (vfloat32m4_t): Ditto.
> (vfloat32m8_t): Ditto.
> (vint16mf4_t): Ditto.
> (vint16mf2_t): Ditto.
> (vint16m1_t): Ditto.
> (vint16m2_t): Ditto.
> (vint16m4_t): Ditto.
> (vint16m8_t): Ditto.
> (vuint16mf4_t): Ditto.
> (vuint16mf2_t): Ditto.
> (vuint16m1_t): Ditto.
> (vuint16m2_t): Ditto.
> (vuint16m4_t): Ditto.
> (vuint16m8_t): Ditto.
> (vint32mf2_t): Ditto.
> (vint32m1_t): Ditto.
> (vint32m2_t): Ditto.
> (vint32m4_t): Ditto.
> (vint32m8_t): Ditto.
> (vuint32mf2_t): Ditto.
> (vuint32m1_t): Ditto.
> (vuint32m2_t): Ditto.
> (vuint32m4_t): Ditto.
> (vuint32m8_t): Ditto.
> ---
>  .../riscv/riscv-vector-builtins-types.def | 66 +--
>  1 file changed, 33 insertions(+), 33 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
> b/gcc/config/riscv/riscv-vector-builtins-types.def
> index bd3deae8340..589ea532727 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-types.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-types.def
> @@ -518,23 +518,23 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
>
> -DEF_RVV_WEXTF_OPS (vfloat32mf2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_WEXTF_OPS (vfloat32m1_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m4_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m8_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_WEXTF_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
>
> -DEF_RVV_CONVERT_I_OPS (vint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_I_OPS (vint16mf2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m1_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m4_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m8_t, TARGET_ZVFH)
> +DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
> @@ -546,12 +546,12 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
>
> -DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m1_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m4_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m8_t, TARGET_ZVFH)
> +DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m8_t, 

Re: [PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv

2023-06-06 Thread Kito Cheng via Gcc-patches
lgtm, thanks for fixing this :)

On Wed, Jun 7, 2023 at 10:19 AM Pan Li via Gcc-patches
 wrote:
>
> From: Pan Li 
>
> This patch would like to fix the incorrect requirement of the vector
> builtin types for the ZVFH/ZVFHMIN extension. The incorrect requirement
> will result in the ops mismatch with iterators, and then ICE will be
> triggered if ZVFH/ZVFHMIN is not given.
>
> Sorry for inconviensient.
>
> Signed-off-by: Pan Li 
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-vector-builtins-types.def
> (vfloat32mf2_t): Take RVV_REQUIRE_ELEN_FP_16 as requirement.
> (vfloat32m1_t): Ditto.
> (vfloat32m2_t): Ditto.
> (vfloat32m4_t): Ditto.
> (vfloat32m8_t): Ditto.
> (vint16mf4_t): Ditto.
> (vint16mf2_t): Ditto.
> (vint16m1_t): Ditto.
> (vint16m2_t): Ditto.
> (vint16m4_t): Ditto.
> (vint16m8_t): Ditto.
> (vuint16mf4_t): Ditto.
> (vuint16mf2_t): Ditto.
> (vuint16m1_t): Ditto.
> (vuint16m2_t): Ditto.
> (vuint16m4_t): Ditto.
> (vuint16m8_t): Ditto.
> (vint32mf2_t): Ditto.
> (vint32m1_t): Ditto.
> (vint32m2_t): Ditto.
> (vint32m4_t): Ditto.
> (vint32m8_t): Ditto.
> (vuint32mf2_t): Ditto.
> (vuint32m1_t): Ditto.
> (vuint32m2_t): Ditto.
> (vuint32m4_t): Ditto.
> (vuint32m8_t): Ditto.
> ---
>  .../riscv/riscv-vector-builtins-types.def | 66 +--
>  1 file changed, 33 insertions(+), 33 deletions(-)
>
> diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
> b/gcc/config/riscv/riscv-vector-builtins-types.def
> index bd3deae8340..589ea532727 100644
> --- a/gcc/config/riscv/riscv-vector-builtins-types.def
> +++ b/gcc/config/riscv/riscv-vector-builtins-types.def
> @@ -518,23 +518,23 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
>  DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
>
> -DEF_RVV_WEXTF_OPS (vfloat32mf2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32 | 
> RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_WEXTF_OPS (vfloat32m1_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m4_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> -DEF_RVV_WEXTF_OPS (vfloat32m8_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
> +DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_WEXTF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_WEXTF_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
>  DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
>
> -DEF_RVV_CONVERT_I_OPS (vint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_I_OPS (vint16mf2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m1_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m4_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_I_OPS (vint16m8_t, TARGET_ZVFH)
> +DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
> @@ -546,12 +546,12 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
>
> -DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
> -DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m1_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m2_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m4_t, TARGET_ZVFH)
> -DEF_RVV_CONVERT_U_OPS (vuint16m8_t, TARGET_ZVFH)
> +DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
> RVV_REQUIRE_MIN_VLEN_64)
> +DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m1_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m2_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m4_t, RVV_REQUIRE_ELEN_FP_16)
> +DEF_RVV_CONVERT_U_OPS (vuint16m8_t, RVV_REQUIRE_ELEN_FP_16)
>
>  DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
>  DEF_RVV_CONVERT_U_OPS (vuint32m1_t, 0)
> @@ -563,22 +563,22 @@ DEF_RVV_CONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
>  DEF_RVV_CONVERT_U_OPS 

[PATCH] RISC-V: Fix ICE when include riscv_vector.h with rv64gcv

2023-06-06 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to fix the incorrect requirement of the vector
builtin types for the ZVFH/ZVFHMIN extension. The incorrect requirement
will result in the ops mismatch with iterators, and then ICE will be
triggered if ZVFH/ZVFHMIN is not given.

Sorry for inconviensient.

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-types.def
(vfloat32mf2_t): Take RVV_REQUIRE_ELEN_FP_16 as requirement.
(vfloat32m1_t): Ditto.
(vfloat32m2_t): Ditto.
(vfloat32m4_t): Ditto.
(vfloat32m8_t): Ditto.
(vint16mf4_t): Ditto.
(vint16mf2_t): Ditto.
(vint16m1_t): Ditto.
(vint16m2_t): Ditto.
(vint16m4_t): Ditto.
(vint16m8_t): Ditto.
(vuint16mf4_t): Ditto.
(vuint16mf2_t): Ditto.
(vuint16m1_t): Ditto.
(vuint16m2_t): Ditto.
(vuint16m4_t): Ditto.
(vuint16m8_t): Ditto.
(vint32mf2_t): Ditto.
(vint32m1_t): Ditto.
(vint32m2_t): Ditto.
(vint32m4_t): Ditto.
(vint32m8_t): Ditto.
(vuint32mf2_t): Ditto.
(vuint32m1_t): Ditto.
(vuint32m2_t): Ditto.
(vuint32m4_t): Ditto.
(vuint32m8_t): Ditto.
---
 .../riscv/riscv-vector-builtins-types.def | 66 +--
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def 
b/gcc/config/riscv/riscv-vector-builtins-types.def
index bd3deae8340..589ea532727 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -518,23 +518,23 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
 
-DEF_RVV_WEXTF_OPS (vfloat32mf2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32 | 
RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_WEXTF_OPS (vfloat32m1_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
-DEF_RVV_WEXTF_OPS (vfloat32m2_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
-DEF_RVV_WEXTF_OPS (vfloat32m4_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
-DEF_RVV_WEXTF_OPS (vfloat32m8_t, TARGET_ZVFH | RVV_REQUIRE_ELEN_FP_32)
+DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_WEXTF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_16)
 
 DEF_RVV_WEXTF_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_CONVERT_I_OPS (vint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_CONVERT_I_OPS (vint16mf2_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_I_OPS (vint16m1_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_I_OPS (vint16m2_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_I_OPS (vint16m4_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_I_OPS (vint16m8_t, TARGET_ZVFH)
+DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
 DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
@@ -546,12 +546,12 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_U_OPS (vuint16m1_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_U_OPS (vuint16m2_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_U_OPS (vuint16m4_t, TARGET_ZVFH)
-DEF_RVV_CONVERT_U_OPS (vuint16m8_t, TARGET_ZVFH)
+DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | 
RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_U_OPS (vuint16m1_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_U_OPS (vuint16m2_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_U_OPS (vuint16m4_t, RVV_REQUIRE_ELEN_FP_16)
+DEF_RVV_CONVERT_U_OPS (vuint16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
 DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint32m1_t, 0)
@@ -563,22 +563,22 @@ DEF_RVV_CONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, TARGET_ZVFH | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_WCONVERT_I_OPS (vint32m1_t, TARGET_ZVFH)
-DEF_RVV_WCONVERT_I_OPS (vint32m2_t, TARGET_ZVFH)
-DEF_RVV_WCONVERT_I_OPS 

Re: [PATCH] RISCV: Add -m(no)-omit-leaf-frame-pointer support.

2023-06-06 Thread Jeff Law via Gcc-patches




On 6/4/23 20:49, Wang, Yanzhang wrote:

Hi Jeff,

Yes, there's a requirement to support backtrace based on the fp+ra.
And the unwind/cfa is not acceptable because it will add additional
sections to the binary. Currently, -fno-omit-frame-pointer can not
save the ra for the leaf function. So we need to add another option
like ARM/X86 to support consistent fp+ra stack layout for the leaf
and non-leaf functions.
One of the things that needs to be upstreamed is long jump support 
within a function.  Essentially once a function reaches 1M in size we 
have the real possibility that a direct jump may not reach its target.


To support this I expect that $ra is going to become a fixed register 
(ie, not available to the register allocator as a temporary).  It'll be 
used as a scratch register for long jump sequences.


One of the consequences of this is $ra will need to be saved in leaf 
functions that are near or over 1M in size.


Note that at the time when we have to lay out the stack, we do not know 
the precise length of the function.  So there's a degree of "fuzz" in 
the decision whether or not to save $ra in a function that is close to 
the 1M limit.


I don't think you can reliably know if $ra is valid in an arbitrary leaf 
function or not.  You could implement some heuristics by looking at the 
symbol table (which I'm guessing you don't want to do) or by 
disassembling the prologue (again, I'm guessing you don't want to do 
that either).


Meaning that what you really want is to be using -fno-omit-frame-pointer 
and for $ra to always be saved in the stack, even in a leaf function.


Presumably you're not suggesting any of these options be used in general 
-- they're going to be used for things like embedded devices or 
firmware?  Also note there are low overhead unwinding schemes out there 
that are already supported in various tools -- ORC & SFRAME come 
immediately to mind.   Those may be better than building a bespoke 
solution for the embedded space.




Jeff


[Bug debug/86257] Program compiled with fPIC crashes while stepping over thread-local variable GDB

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86257

Andrew Pinski  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
   Target Milestone|--- |9.0
 Resolution|--- |FIXED

--- Comment #9 from Andrew Pinski  ---
Fixed.

[Bug target/104271] [12 Regression] 538.imagick_r run-time at -Ofast -march=native regressed by 26% on Intel Cascade Lake server CPU

2023-06-06 Thread lili.cui at intel dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104271

--- Comment #14 from cuilili  ---
This regression has been fixed with the commit below and we can close this
ticket.

https://gcc.gnu.org/g:1b9a5cc9ec08e9f239dd2096edcc447b7a72f64a

[Bug target/58208] deque 32-bit "-O3" bug

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58208

--- Comment #16 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #15)
> This was a glibc issue.
> 
> Fixed by glibc commit 5e49c52efdb37eb0aa315156f39056f25ff9ddaa (
> https://sourceware.org/git/?p=glibc.git;a=commit;
> h=5e49c52efdb37eb0aa315156f39056f25ff9ddaa )

Which was included in glibc-2.13 (and above).

[Bug target/58208] deque 32-bit "-O3" bug

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58208

Andrew Pinski  changed:

   What|Removed |Added

 Status|UNCONFIRMED |RESOLVED
 Resolution|--- |MOVED

--- Comment #15 from Andrew Pinski  ---
This was a glibc issue.

Fixed by glibc commit 5e49c52efdb37eb0aa315156f39056f25ff9ddaa (
https://sourceware.org/git/?p=glibc.git;a=commit;h=5e49c52efdb37eb0aa315156f39056f25ff9ddaa
)

[Bug target/58208] deque 32-bit "-O3" bug

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58208

--- Comment #14 from Andrew Pinski  ---
movdqa  %xmm1, 16(%esp)
movdqa  %xmm0, 32(%esp)

Maybe stack alignment for deconstructors.

[Bug tree-optimization/110038] [14 Regression] ICE: in rewrite_expr_tree_parallel, at tree-ssa-reassoc.cc:5522 with --param=tree-reassoc-width=2147483647

2023-06-06 Thread lili.cui at intel dot com via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110038

--- Comment #5 from cuilili  ---
(In reply to Martin Jambor from comment #4)
> So is this now fixed?

Yes, the attachment case has been fixed.

[Bug target/84377] gcc-7.3.0 miscompiles truncf128@@GLIBC_2.26 in libm.so

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84377

Andrew Pinski  changed:

   What|Removed |Added

 Resolution|--- |DUPLICATE
 Status|UNCONFIRMED |RESOLVED

--- Comment #6 from Andrew Pinski  ---
Dup of bug 81763 in the end.

*** This bug has been marked as a duplicate of bug 81763 ***

[Bug target/81763] Issues with BMI on 32bit x86 apps on GCC 7.1+

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81763

Andrew Pinski  changed:

   What|Removed |Added

 CC||dilfridge at gentoo dot org

--- Comment #51 from Andrew Pinski  ---
*** Bug 84377 has been marked as a duplicate of this bug. ***

[Bug target/84377] gcc-7.3.0 miscompiles truncf128@@GLIBC_2.26 in libm.so

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84377

--- Comment #5 from Andrew Pinski  ---
The gimple level is the same:
```
  _5 = 281474976710655 >> j0_20;
  _6 = ~_5;
  _7 = (long long unsigned int) _6;
  _8 = _7 & _35;
  qw_u.parts64.msw = _8;
  qw_u.parts64.lsw = 0;
```

The place where it changes first between GCC 7.3 and 7.4 is IRA.

[pushed] c++: NRV and goto [PR92407]

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Here our named return value optimization was breaking the required
destructor when the goto takes 'a' out of scope.  The simplest fix is to
disable the optimization in the presence of user labels.

We could do better by disabling the optimization only if there is a backward
goto across the variable declaration, but we don't currently track that.

PR c++/92407

gcc/cp/ChangeLog:

* typeck.cc (check_return_expr): Prevent NRV in the presence of
named labels.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv22.C: New test.
---
 gcc/cp/typeck.cc |  3 +++
 gcc/testsuite/g++.dg/opt/nrv22.C | 30 ++
 2 files changed, 33 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/opt/nrv22.C

diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 11fcc7fcd3b..6618c6a2021 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -11155,6 +11155,9 @@ check_return_expr (tree retval, bool *no_warning)
   if (fn_returns_value_p && flag_elide_constructors)
 {
   if (named_return_value_okay_p
+ /* The current NRV implementation breaks if a backward goto needs to
+destroy the object (PR92407).  */
+ && !cp_function_chain->x_named_labels
   && (current_function_return_value == NULL_TREE
  || current_function_return_value == bare_retval))
current_function_return_value = bare_retval;
diff --git a/gcc/testsuite/g++.dg/opt/nrv22.C b/gcc/testsuite/g++.dg/opt/nrv22.C
new file mode 100644
index 000..eb889fa615b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/nrv22.C
@@ -0,0 +1,30 @@
+// PR c++/92407
+// { dg-do run }
+
+struct A
+{
+  A () { a++; }
+  A (const A &) { a++; }
+  ~A () { a--; }
+  static int a;
+};
+int A::a = 0;
+
+A
+foo ()
+{
+  int cnt = 10;
+lab:
+  A a;
+  if (cnt--)
+goto lab;
+  return a;
+}
+
+int
+main ()
+{
+  foo ();
+  if (A::a)
+__builtin_abort ();
+}

base-commit: 29c82c6ca929e0f5eccfe038dea71177d814c6b7
prerequisite-patch-id: aed53cdac161144c31cb1433282e1ad1d49d3770
prerequisite-patch-id: 1098cb4457a5eff90fa8176f9b0d8d2e9477596e
prerequisite-patch-id: 823f2ce422455c6c7ccbaa9938b670a600b376df
-- 
2.31.1



[Bug c++/51571] No named return value optimization while adding a dummy scope

2023-06-06 Thread jason at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51571

Jason Merrill  changed:

   What|Removed |Added

   Target Milestone|--- |14.0

--- Comment #10 from Jason Merrill  ---
Fixed for GCC 14.

[Bug c++/92407] Destruction of objects returned from functions skipped by goto

2023-06-06 Thread jason at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92407

--- Comment #9 from Jason Merrill  ---
Fixed for GCC 14 so far.

[Bug c++/92407] Destruction of objects returned from functions skipped by goto

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92407

--- Comment #8 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:7e0b65b239c3a0d68ce94896b236b03de666ffd6

commit r14-1593-g7e0b65b239c3a0d68ce94896b236b03de666ffd6
Author: Jason Merrill 
Date:   Sun Jun 4 12:09:11 2023 -0400

c++: enable NRVO from inner block [PR51571]

Our implementation of the named return value optimization has been limited
to variables declared in the outermost block of the function, to avoid
needing to handle the case where the variable needs to be destroyed due to
going out of scope.  PR92407 pointed out a case we were missing, where the
variable goes out of scope due to a goto and we were failing to destroy it.

It occurred to me that this problem is the flip side of PR33799, where we
need to be sure to destroy the return value if a cleanup throws on return;
here we want to avoid destroying the return value when exiting the
variable's scope on return.  We can use the same flag to indicate to both
cleanups that we're returning.

This implements the guaranteed copy elision specified by P2025 (which is
not
yet part of the draft standard).

PR c++/51571
PR c++/92407

gcc/cp/ChangeLog:

* decl.cc (finish_function): Simplify NRV handling.
* except.cc (maybe_set_retval_sentinel): Also set if NRV.
(maybe_splice_retval_cleanup): Don't add the cleanup region
if we don't need it.
* semantics.cc (nrv_data): Add simple field.
(finalize_nrv): Set it.
(finalize_nrv_r): Check it and retval sentinel.
* cp-tree.h (finalize_nrv): Adjust declaration.
* typeck.cc (check_return_expr): Remove named_labels check.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv23.C: New test.

[Bug c++/51571] No named return value optimization while adding a dummy scope

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=51571

--- Comment #9 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:7e0b65b239c3a0d68ce94896b236b03de666ffd6

commit r14-1593-g7e0b65b239c3a0d68ce94896b236b03de666ffd6
Author: Jason Merrill 
Date:   Sun Jun 4 12:09:11 2023 -0400

c++: enable NRVO from inner block [PR51571]

Our implementation of the named return value optimization has been limited
to variables declared in the outermost block of the function, to avoid
needing to handle the case where the variable needs to be destroyed due to
going out of scope.  PR92407 pointed out a case we were missing, where the
variable goes out of scope due to a goto and we were failing to destroy it.

It occurred to me that this problem is the flip side of PR33799, where we
need to be sure to destroy the return value if a cleanup throws on return;
here we want to avoid destroying the return value when exiting the
variable's scope on return.  We can use the same flag to indicate to both
cleanups that we're returning.

This implements the guaranteed copy elision specified by P2025 (which is
not
yet part of the draft standard).

PR c++/51571
PR c++/92407

gcc/cp/ChangeLog:

* decl.cc (finish_function): Simplify NRV handling.
* except.cc (maybe_set_retval_sentinel): Also set if NRV.
(maybe_splice_retval_cleanup): Don't add the cleanup region
if we don't need it.
* semantics.cc (nrv_data): Add simple field.
(finalize_nrv): Set it.
(finalize_nrv_r): Check it and retval sentinel.
* cp-tree.h (finalize_nrv): Adjust declaration.
* typeck.cc (check_return_expr): Remove named_labels check.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv23.C: New test.

[Bug c++/92407] Destruction of objects returned from functions skipped by goto

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92407

--- Comment #7 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:b192e2007e1c98b548f4aa878523b485968d24a4

commit r14-1592-gb192e2007e1c98b548f4aa878523b485968d24a4
Author: Jason Merrill 
Date:   Sun Jun 4 12:00:55 2023 -0400

c++: NRV and goto [PR92407]

Here our named return value optimization was breaking the required
destructor when the goto takes 'a' out of scope.  The simplest fix is to
disable the optimization in the presence of user labels.

We could do better by disabling the optimization only if there is a
backward
goto across the variable declaration, but we don't currently track that.

PR c++/92407

gcc/cp/ChangeLog:

* typeck.cc (check_return_expr): Prevent NRV in the presence of
named labels.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv22.C: New test.

[Bug c++/58487] Missed return value optimization

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58487

--- Comment #6 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:2ae5384d457b9c67586de012816dfc71a6943164

commit r14-1594-g2ae5384d457b9c67586de012816dfc71a6943164
Author: Jason Merrill 
Date:   Tue Jun 6 12:46:26 2023 -0400

c++: Add -Wnrvo

While looking at PRs about cases where we don't perform the named return
value optimization, it occurred to me that it might be useful to have a
warning for that.

This does not fix PR58487, but might be interesting to people watching it.

PR c++/58487

gcc/c-family/ChangeLog:

* c.opt: Add -Wnrvo.

gcc/ChangeLog:

* doc/invoke.texi: Document it.

gcc/cp/ChangeLog:

* typeck.cc (want_nrvo_p): New.
(check_return_expr): Handle -Wnrvo.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv25.C: New test.

[Bug c++/33799] Return value's destructor not executed when a local variable's destructor throws

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=33799

--- Comment #22 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:08cea4e56a094ff9cc7c65fdc9ce8c4d7aff64be

commit r14-1591-g08cea4e56a094ff9cc7c65fdc9ce8c4d7aff64be
Author: Jason Merrill 
Date:   Tue Jun 6 15:31:23 2023 -0400

c++: fix throwing cleanup with label

While looking at PR92407 I noticed that the expectations of
maybe_splice_retval_cleanup weren't being met; an sk_cleanup level was
confusing its attempt to recognize the outer block of the function.  And
even if I fixed the detection, it failed to actually wrap the body of the
function because the STATEMENT_LIST it got only had the label, not anything
after it.  So I moved the call after poplevel does pop_stmt_list on all the
sk_cleanup levels.

PR c++/33799

gcc/cp/ChangeLog:

* except.cc (maybe_splice_retval_cleanup): Change
recognition of function body and try scopes.
* semantics.cc (do_poplevel): Call it after poplevel.
(at_try_scope): New.
* cp-tree.h (maybe_splice_retval_cleanup): Adjust.

gcc/testsuite/ChangeLog:

* g++.dg/eh/return1.C: Add label cases.

[Bug c++/58050] No return value optimization when calling static function through unnamed temporary

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58050

--- Comment #3 from CVS Commits  ---
The trunk branch has been updated by Jason Merrill :

https://gcc.gnu.org/g:4fe84e2a4c0b600d2bc01f171b3b9dd1f4357208

commit r14-1589-g4fe84e2a4c0b600d2bc01f171b3b9dd1f4357208
Author: Jason Merrill 
Date:   Tue Jun 6 12:01:23 2023 -0400

c++: add NRV testcase [PR58050]

This was fixed in GCC 10.

PR c++/58050

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv24.C: New test.

[pushed] c++: Add -Wnrvo

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

While looking at PRs about cases where we don't perform the named return
value optimization, it occurred to me that it might be useful to have a
warning for that.

This does not fix PR58487, but might be interesting to people watching it.

PR c++/58487

gcc/c-family/ChangeLog:

* c.opt: Add -Wnrvo.

gcc/ChangeLog:

* doc/invoke.texi: Document it.

gcc/cp/ChangeLog:

* typeck.cc (want_nrvo_p): New.
(check_return_expr): Handle -Wnrvo.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv25.C: New test.
---
 gcc/doc/invoke.texi  | 19 +++
 gcc/c-family/c.opt   |  4 
 gcc/cp/typeck.cc | 25 +++--
 gcc/testsuite/g++.dg/opt/nrv25.C | 15 +++
 4 files changed, 61 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/nrv25.C

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9130104af22..6d08229ce40 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -6678,6 +6678,25 @@ is only active when 
@option{-fdelete-null-pointer-checks} is active,
 which is enabled by optimizations in most targets.  The precision of
 the warnings depends on the optimization options used.
 
+@opindex Wnrvo
+@opindex Wno-nrvo
+@item -Wnrvo @r{(C++ and Objective-C++ only)}
+Warn if the compiler does not elide the copy from a local variable to
+the return value of a function in a context where it is allowed by
+[class.copy.elision].  This elision is commonly known as the Named
+Return Value Optimization.  For instance, in the example below the
+compiler cannot elide copies from both v1 and b2, so it elides neither.
+
+@smallexample
+std::vector f()
+@{
+  std::vector v1, v2;
+  // ...
+  if (cond) return v1;
+  else return v2; // warning: not eliding copy
+@}
+@end smallexample
+
 @opindex Winfinite-recursion
 @opindex Wno-infinite-recursion
 @item -Winfinite-recursion
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index cddeece..cead1995561 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -923,6 +923,10 @@ Wnamespaces
 C++ ObjC++ Var(warn_namespaces) Warning
 Warn on namespace definition.
 
+Wnrvo
+C++ ObjC++ Var(warn_nrvo)
+Warn if the named return value optimization is not performed although it is 
allowed.
+
 Wpacked-not-aligned
 C ObjC C++ ObjC++ Var(warn_packed_not_aligned) Warning LangEnabledBy(C ObjC 
C++ ObjC++,Wall)
 Warn when fields in a struct with the packed attribute are misaligned.
diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
index 11fcc7fcd3b..6b5705e806d 100644
--- a/gcc/cp/typeck.cc
+++ b/gcc/cp/typeck.cc
@@ -10670,6 +10670,16 @@ can_do_nrvo_p (tree retval, tree functype)
  && !TYPE_VOLATILE (TREE_TYPE (retval)));
 }
 
+/* True if we would like to perform NRVO, i.e. can_do_nrvo_p is true and we
+   would otherwise return in memory.  */
+
+static bool
+want_nrvo_p (tree retval, tree functype)
+{
+  return (can_do_nrvo_p (retval, functype)
+ && aggregate_value_p (functype, current_function_decl));
+}
+
 /* Like can_do_nrvo_p, but we check if we're trying to move a class
prvalue.  */
 
@@ -11151,7 +11161,7 @@ check_return_expr (tree retval, bool *no_warning)
   bare_retval = tree_strip_any_location_wrapper (retval);
 }
 
-  bool named_return_value_okay_p = can_do_nrvo_p (bare_retval, functype);
+  bool named_return_value_okay_p = want_nrvo_p (bare_retval, functype);
   if (fn_returns_value_p && flag_elide_constructors)
 {
   if (named_return_value_okay_p
@@ -11159,7 +11169,18 @@ check_return_expr (tree retval, bool *no_warning)
  || current_function_return_value == bare_retval))
current_function_return_value = bare_retval;
   else
-   current_function_return_value = error_mark_node;
+   {
+ if ((named_return_value_okay_p
+  || (current_function_return_value
+  && current_function_return_value != error_mark_node))
+ && !warning_suppressed_p (current_function_decl, OPT_Wnrvo))
+   {
+ warning (OPT_Wnrvo, "not eliding copy on return in %qD",
+  current_function_decl);
+ suppress_warning (current_function_decl, OPT_Wnrvo);
+   }
+ current_function_return_value = error_mark_node;
+   }
 }
 
   /* We don't need to do any conversions when there's nothing being
diff --git a/gcc/testsuite/g++.dg/opt/nrv25.C b/gcc/testsuite/g++.dg/opt/nrv25.C
new file mode 100644
index 000..35c4a88a088
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/nrv25.C
@@ -0,0 +1,15 @@
+// PR c++/58487
+// { dg-additional-options -Wnrvo }
+
+struct A {
+  A() {}
+  A(const A&);
+};
+
+A test() {
+  A a, b;
+  if (true)
+return a;
+  else
+return b;  // { dg-warning Wnrvo }
+}

base-commit: 29c82c6ca929e0f5eccfe038dea71177d814c6b7
prerequisite-patch-id: aed53cdac161144c31cb1433282e1ad1d49d3770

[pushed] c++: enable NRVO from inner block [PR51571]

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

Our implementation of the named return value optimization has been limited
to variables declared in the outermost block of the function, to avoid
needing to handle the case where the variable needs to be destroyed due to
going out of scope.  PR92407 pointed out a case we were missing, where the
variable goes out of scope due to a goto and we were failing to destroy it.

It occurred to me that this problem is the flip side of PR33799, where we
need to be sure to destroy the return value if a cleanup throws on return;
here we want to avoid destroying the return value when exiting the
variable's scope on return.  We can use the same flag to indicate to both
cleanups that we're returning.

This implements the guaranteed copy elision specified by P2025 (which is not
yet part of the draft standard).

PR c++/51571
PR c++/92407

gcc/cp/ChangeLog:

* decl.cc (finish_function): Simplify NRV handling.
* except.cc (maybe_set_retval_sentinel): Also set if NRV.
(maybe_splice_retval_cleanup): Don't add the cleanup region
if we don't need it.
* semantics.cc (nrv_data): Add simple field.
(finalize_nrv): Set it.
(finalize_nrv_r): Check it and retval sentinel.
* cp-tree.h (finalize_nrv): Adjust declaration.
* typeck.cc (check_return_expr): Remove named_labels check.

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv23.C: New test.
---
 gcc/cp/cp-tree.h |  2 +-
 gcc/cp/decl.cc   | 19 +++
 gcc/cp/except.cc | 12 ++--
 gcc/cp/semantics.cc  | 31 ++-
 gcc/cp/typeck.cc |  3 ---
 gcc/testsuite/g++.dg/opt/nrv23.C | 23 +++
 6 files changed, 63 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/opt/nrv23.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 834fdd12ef8..87572e3574d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7714,7 +7714,7 @@ extern bool check_accessibility_of_qualified_id (tree, 
tree, tree, tsubst_flags_
 extern tree finish_qualified_id_expr   (tree, tree, bool, bool,
 bool, bool, tsubst_flags_t);
 extern void simplify_aggr_init_expr(tree *);
-extern void finalize_nrv   (tree *, tree, tree);
+extern void finalize_nrv   (tree, tree);
 extern tree omp_reduction_id   (enum tree_code, tree, tree);
 extern tree cp_remove_omp_priv_cleanup_stmt(tree *, int *, void *);
 extern bool cp_check_omp_declare_reduction (tree);
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 3985c6d2d1f..c07a4a8d58d 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -18236,23 +18236,10 @@ finish_function (bool inline_p)
 
   /* Set up the named return value optimization, if we can.  Candidate
  variables are selected in check_return_expr.  */
-  if (current_function_return_value)
+  if (tree r = current_function_return_value)
 {
-  tree r = current_function_return_value;
-  tree outer;
-
-  if (r != error_mark_node
- /* This is only worth doing for fns that return in memory--and
-simpler, since we don't have to worry about promoted modes.  */
- && aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl)), fndecl)
- /* Only allow this for variables declared in the outer scope of
-the function so we know that their lifetime always ends with a
-return; see g++.dg/opt/nrv6.C.  We could be more flexible if
-we were to do this optimization in tree-ssa.  */
- && (outer = outer_curly_brace_block (fndecl))
- && chain_member (r, BLOCK_VARS (outer)))
-   finalize_nrv (_SAVED_TREE (fndecl), r, DECL_RESULT (fndecl));
-
+  if (r != error_mark_node)
+   finalize_nrv (fndecl, r);
   current_function_return_value = NULL_TREE;
 }
 
diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc
index 28106dadf1e..6c0f0815424 100644
--- a/gcc/cp/except.cc
+++ b/gcc/cp/except.cc
@@ -1280,7 +1280,9 @@ build_noexcept_spec (tree expr, tsubst_flags_t complain)
 /* If the current function has a cleanup that might throw, and the return value
has a non-trivial destructor, return a MODIFY_EXPR to set
current_retval_sentinel so that we know that the return value needs to be
-   destroyed on throw.  Otherwise, returns NULL_TREE.  */
+   destroyed on throw.  Do the same if the current function might use the
+   named return value optimization, so we don't destroy it on return.
+   Otherwise, returns NULL_TREE.  */
 
 tree
 maybe_set_retval_sentinel ()
@@ -1290,7 +1292,9 @@ maybe_set_retval_sentinel ()
   tree retval = DECL_RESULT (current_function_decl);
   if (!TYPE_HAS_NONTRIVIAL_DESTRUCTOR (TREE_TYPE (retval)))
 return NULL_TREE;
-  if (!cp_function_chain->throwing_cleanup)
+  if 

[pushed] c++: fix throwing cleanup with label

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

While looking at PR92407 I noticed that the expectations of
maybe_splice_retval_cleanup weren't being met; an sk_cleanup level was
confusing its attempt to recognize the outer block of the function.  And
even if I fixed the detection, it failed to actually wrap the body of the
function because the STATEMENT_LIST it got only had the label, not anything
after it.  So I moved the call after poplevel does pop_stmt_list on all the
sk_cleanup levels.

PR c++/33799

gcc/cp/ChangeLog:

* except.cc (maybe_splice_retval_cleanup): Change
recognition of function body and try scopes.
* semantics.cc (do_poplevel): Call it after poplevel.
(at_try_scope): New.
* cp-tree.h (maybe_splice_retval_cleanup): Adjust.

gcc/testsuite/ChangeLog:

* g++.dg/eh/return1.C: Add label cases.
---
 gcc/cp/cp-tree.h  |  2 +-
 gcc/cp/except.cc  | 25 -
 gcc/cp/semantics.cc   | 16 +++-
 gcc/testsuite/g++.dg/eh/return1.C | 21 +
 4 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 101da35d322..834fdd12ef8 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -7047,7 +7047,7 @@ extern tree begin_eh_spec_block   (void);
 extern void finish_eh_spec_block   (tree, tree);
 extern tree build_eh_type_type (tree);
 extern tree cp_protect_cleanup_actions (void);
-extern void maybe_splice_retval_cleanup(tree);
+extern void maybe_splice_retval_cleanup(tree, bool);
 extern tree maybe_set_retval_sentinel  (void);
 
 extern tree template_parms_to_args (tree);
diff --git a/gcc/cp/except.cc b/gcc/cp/except.cc
index b04eb00d220..28106dadf1e 100644
--- a/gcc/cp/except.cc
+++ b/gcc/cp/except.cc
@@ -1312,21 +1312,20 @@ maybe_set_retval_sentinel ()
on throw.  */
 
 void
-maybe_splice_retval_cleanup (tree compound_stmt)
+maybe_splice_retval_cleanup (tree compound_stmt, bool is_try)
 {
-  /* If we need a cleanup for the return value, add it in at the same level as
- pushdecl_outermost_localscope.  And also in try blocks.  */
-  const bool function_body
-= (current_binding_level->level_chain
-   && current_binding_level->level_chain->kind == sk_function_parms
-  /* When we're processing a default argument, c_f_d may not have been
-set.  */
-   && current_function_decl);
+  if (!current_function_decl || !cfun
+  || DECL_CONSTRUCTOR_P (current_function_decl)
+  || DECL_DESTRUCTOR_P (current_function_decl)
+  || !current_retval_sentinel)
+return;
 
-  if ((function_body || current_binding_level->kind == sk_try)
-  && !DECL_CONSTRUCTOR_P (current_function_decl)
-  && !DECL_DESTRUCTOR_P (current_function_decl)
-  && current_retval_sentinel)
+  /* if we need a cleanup for the return value, add it in at the same level as
+ pushdecl_outermost_localscope.  And also in try blocks.  */
+  cp_binding_level *b = current_binding_level;
+  const bool function_body = b->kind == sk_function_parms;
+
+  if (function_body || is_try)
 {
   location_t loc = DECL_SOURCE_LOCATION (current_function_decl);
   tree_stmt_iterator iter = tsi_start (compound_stmt);
diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index c94ea090a99..a13c16f34a3 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -628,6 +628,17 @@ set_cleanup_locs (tree stmts, location_t loc)
   set_cleanup_locs (stmt, loc);
 }
 
+/* True iff the innermost block scope is a try block.  */
+
+static bool
+at_try_scope ()
+{
+  cp_binding_level *b = current_binding_level;
+  while (b && b->kind == sk_cleanup)
+b = b->level_chain;
+  return b && b->kind == sk_try;
+}
+
 /* Finish a scope.  */
 
 tree
@@ -635,11 +646,14 @@ do_poplevel (tree stmt_list)
 {
   tree block = NULL;
 
-  maybe_splice_retval_cleanup (stmt_list);
+  bool was_try = at_try_scope ();
 
   if (stmts_are_full_exprs_p ())
 block = poplevel (kept_level_p (), 1, 0);
 
+  /* This needs to come after poplevel merges sk_cleanup statement_lists.  */
+  maybe_splice_retval_cleanup (stmt_list, was_try);
+
   stmt_list = pop_stmt_list (stmt_list);
 
   /* input_location is the last token of the scope, usually a }.  */
diff --git a/gcc/testsuite/g++.dg/eh/return1.C 
b/gcc/testsuite/g++.dg/eh/return1.C
index ac2225405da..e22d674ae9a 100644
--- a/gcc/testsuite/g++.dg/eh/return1.C
+++ b/gcc/testsuite/g++.dg/eh/return1.C
@@ -33,6 +33,13 @@ X f()
   return X(false);
 }
 
+X f2()
+{
+ foo:
+  X x(true);
+  return X(false);
+}
+
 X g()
 {
   return X(true),X(false);
@@ -54,6 +61,16 @@ X i()
   return X(false);
 }
 
+X i2()
+{
+  try {
+  foo:
+X x(true);
+return X(false);
+  } catch(...) {}
+  return X(false);
+}
+
 X j()
 {
   try {
@@ -84,6 +101,8 @@ int main()
   try { f(); }
   catch (...) {}
 
+  try { 

[pushed] c++: fix contracts with NRV

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

The NRV implementation was blindly replacing the operand of RETURN_EXPR,
clobbering anything that check_return_expr might have added on to the actual
initialization, such as checking the postcondition.

gcc/cp/ChangeLog:

* semantics.cc (finalize_nrv_r): [RETURN_EXPR]: Only replace the
INIT_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/contracts/contracts-post7.C: New test.
---
 gcc/cp/semantics.cc   | 12 ++--
 .../g++.dg/contracts/contracts-post7.C| 29 +++
 2 files changed, 39 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/contracts/contracts-post7.C

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index c04514679f0..c94ea090a99 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -4940,9 +4940,17 @@ finalize_nrv_r (tree* tp, int* walk_subtrees, void* data)
 *walk_subtrees = 0;
   /* Change all returns to just refer to the RESULT_DECL; this is a nop,
  but differs from using NULL_TREE in that it indicates that we care
- about the value of the RESULT_DECL.  */
+ about the value of the RESULT_DECL.  But preserve anything appended
+ by check_return_expr.  */
   else if (TREE_CODE (*tp) == RETURN_EXPR)
-TREE_OPERAND (*tp, 0) = dp->result;
+{
+  tree *p = _OPERAND (*tp, 0);
+  while (TREE_CODE (*p) == COMPOUND_EXPR)
+   p = _OPERAND (*p, 0);
+  gcc_checking_assert (TREE_CODE (*p) == INIT_EXPR
+  && TREE_OPERAND (*p, 0) == dp->result);
+  *p = dp->result;
+}
   /* Change all cleanups for the NRV to only run when an exception is
  thrown.  */
   else if (TREE_CODE (*tp) == CLEANUP_STMT
diff --git a/gcc/testsuite/g++.dg/contracts/contracts-post7.C 
b/gcc/testsuite/g++.dg/contracts/contracts-post7.C
new file mode 100644
index 000..1c33181b5e6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/contracts/contracts-post7.C
@@ -0,0 +1,29 @@
+// { dg-do run }
+// { dg-options "-std=c++2a -fcontracts" }
+
+#include 
+
+using std::experimental::contract_violation;
+void handle_contract_violation(const contract_violation )
+{
+  __builtin_exit (0);
+}
+
+struct A {
+  int i;
+  A(): i(42) {}
+  A(const A&);
+};
+
+A f()
+  [[ post r: r.i == 24 ]]
+{
+  A a;
+  return a;
+}
+
+int main()
+{
+  f();
+  return -1;
+}

base-commit: 29c82c6ca929e0f5eccfe038dea71177d814c6b7
prerequisite-patch-id: aed53cdac161144c31cb1433282e1ad1d49d3770
-- 
2.31.1



[pushed] c++: add NRV testcase [PR58050]

2023-06-06 Thread Jason Merrill via Gcc-patches
Tested x86_64-pc-linux-gnu, applying to trunk.

-- 8< --

This was fixed in GCC 10.

PR c++/58050

gcc/testsuite/ChangeLog:

* g++.dg/opt/nrv24.C: New test.
---
 gcc/testsuite/g++.dg/opt/nrv24.C | 18 ++
 1 file changed, 18 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/opt/nrv24.C

diff --git a/gcc/testsuite/g++.dg/opt/nrv24.C b/gcc/testsuite/g++.dg/opt/nrv24.C
new file mode 100644
index 000..7a7a59b26f7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/nrv24.C
@@ -0,0 +1,18 @@
+// PR c++/58050
+// { dg-do link }
+
+struct B {
+  B() { }
+  B(const B&); // not defined, link error on unnecessary copy
+  ~B() { }
+};
+struct A {
+  static B make() { return B(); }
+} a;
+A *ap() { return  }
+int main () {
+  {B b = A::make();}
+  {B B = a.make();}
+  {B b = ap()->make();}
+  {B b = A().make();}
+}

base-commit: 29c82c6ca929e0f5eccfe038dea71177d814c6b7
-- 
2.31.1



Re: [PATCH V5] Use reg mode to move sub blocks for parameters and returns

2023-06-06 Thread guojiufu via Gcc-patches

Hi,

On 2023-06-05 00:59, Jeff Law wrote:

On 5/9/23 07:43, Jiufu Guo wrote:


Thanks for point out this!  Yes, BLKmode rtx may not always be a MEM.
MEM_SIZE is only ok for MEM after the it's known size is computed.
Here MEM_SIZE is fine just because it is an stack rtx corresponding
to the type of parameter and returns which has been computed.

I updated the patch to resolve the conflicts with the trunk, and
retest bootstrap, and then updated the patch a new version.

And this version pass bootstrap and regtest on ppc64{,le}, x86_64.

The major change is 'move_sub_blocks' only handles the case when
the block size can be move by same submode, or say (size % sub_size)
is 0.  If no objection, I would committed the new version.

BR,
Jeff (Jiufu)

gcc/ChangeLog:

	* cfgexpand.cc (expand_used_vars): Update to mark 
DECL_USEDBY_RETURN_P

for returns.
* expr.cc (move_sub_blocks): New function.
(expand_assignment): Update assignment code about returns/parameters.
* function.cc (assign_parm_setup_block): Update to mark
DECL_REGS_TO_STACK_P for parameter.
* tree-core.h (struct tree_decl_common): Add comment.
* tree.h (DECL_USEDBY_RETURN_P): New define.
(DECL_REGS_TO_STACK_P): New define.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr65421-1.c: New test.
* gcc.target/powerpc/pr65421-2.c: New test.

I don't think this was ever explicitly ACK'd.  OK for the trunk.


Thanks so much! And sorry for the late reply.
I'm trying to investigate another patch that may fix other PRs and also 
could

handle this issue.  So, I may suspend this for the new patch.


BR,
Jeff (Jiufu Guo)



jeff


[Bug target/84377] gcc-7.3.0 miscompiles truncf128@@GLIBC_2.26 in libm.so

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84377

--- Comment #4 from Andrew Pinski  ---
GCC 7.3.0:
```
.loc 1 42 0 discriminator 2
movl$65535, %edi
movl$-1, %esi
movl$0, 48(%esp)
shrdl   %edi, %esi
testb   $32, %cl
sarx%ecx, %edi, %edi
movl$0, 52(%esp)
cmovne  %edi, %esi
andn%eax, %esi, %esi
andn%edx, %esi, %edi
movl%esi, 56(%esp)
movl%edi, 60(%esp)
vmovdqa 48(%esp), %xmm3
vmovaps %xmm3, (%esp)
```

GCC 7.4.0:
```
.loc 1 42 0 discriminator 2
movl$65535, %edi
movl$-1, %esi
movl$0, 48(%esp)
shrdl   %edi, %esi
sarx%ecx, %edi, %edi
movl%edi, %ebx
movl$0, 52(%esp)
sarl$31, %ebx
testb   $32, %cl
cmovne  %edi, %esi
cmovne  %ebx, %edi
andn%eax, %esi, %esi
andn%edx, %edi, %edi
movl%esi, 56(%esp)
movl%edi, 60(%esp)
vmovdqa 48(%esp), %xmm3
vmovaps %xmm3, (%esp)
```

[Bug target/84377] gcc-7.3.0 miscompiles truncf128@@GLIBC_2.26 in libm.so

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84377

Andrew Pinski  changed:

   What|Removed |Added

  Known to work||7.4.0, 7.5.0, 8.1.0
  Known to fail||7.3.0

--- Comment #3 from Andrew Pinski  ---
So I can reproduce it with GCC 7.3.0 with:
```
-xc -march=skylake -mmmx -mno-3dnow -msse -msse2 -msse3 -mssse3 -mno-sse4a
-mcx16 -msahf -mmovbe -maes -mno-sha -mpclmul -mpopcnt -mabm -mno-lwp -mfma
-mno-fma4 -mno-xop -mbmi -msgx -mbmi2 -mno-tbm -mavx -mavx2 -msse4.2 -msse4.1
-mlzcnt -mrtm -mhle -mrdrnd -mf16c -mfsgsbase -mrdseed -mprfchw -madx -mfxsr
-mxsave -mxsaveopt -mno-avx512f -mno-avx512er -mno-avx512cd -mno-avx512pf
-mno-prefetchwt1 -mclflushopt -mxsavec -mxsaves -mno-avx512dq -mno-avx512bw
-mno-avx512vl -mno-avx512ifma -mno-avx512vbmi -mno-avx5124fmaps
-mno-avx5124vnniw -mno-clwb -mno-mwaitx -mno-clzero -mno-pku -mno-rdpid -m32
-O2 -std=gnu11 -fgnu89-inline -fmerge-all-constants -fno-strict-aliasing
-frounding-math -fstack-protector-all -mpreferred-stack-boundary=4  -fPIC
```
But it passes with GCC 7.4.0.

[Bug tree-optimization/14753] [tree-ssa] some missed forward propagation opportunities

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=14753

Andrew Pinski  changed:

   What|Removed |Added

 Depends on||85234, 110134

--- Comment #13 from Andrew Pinski  ---
(In reply to Andrew Pinski from comment #12)
> Summary of the ones still need to be done:
> comment #0:
> * foo
PR 85234 (I think)

> comment #3:
> * rshift_gt
PR 85234 (I think)
> * rshift_eq
PR 85234 (I think)
> * mask_gt
I don't think this has a bug #

> * neg_eq_cst
> * neg_eq_var
PR 110134 (just submitted a patch for that)

> 
> comment #4:
> * minus_cst
I don't think this has a bug #


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85234
[Bug 85234] missed optimisation opportunity for (x >> CST)!=0 is not optimized
to   (((unsigned)x) >=  (1

Re: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization

2023-06-06 Thread juzhe.zh...@rivai.ai
Ping this patch. Ok for trunk ?
Since following patches are blocked by this.



juzhe.zh...@rivai.ai
 
From: juzhe.zhong
Date: 2023-06-06 12:16
To: gcc-patches
CC: kito.cheng; kito.cheng; palmer; palmer; jeffreyalaw; rdapp.gcc; pan2.li; 
Juzhe-Zhong
Subject: [PATCH] RISC-V: Support RVV VLA SLP auto-vectorization
From: Juzhe-Zhong 
 
This patch enables basic VLA SLP auto-vectorization.
Consider this following case:
void
f (uint8_t *restrict a, uint8_t *restrict b)
{
  for (int i = 0; i < 100; ++i)
{
  a[i * 8 + 0] = b[i * 8 + 7] + 1;
  a[i * 8 + 1] = b[i * 8 + 7] + 2;
  a[i * 8 + 2] = b[i * 8 + 7] + 8;
  a[i * 8 + 3] = b[i * 8 + 7] + 4;
  a[i * 8 + 4] = b[i * 8 + 7] + 5;
  a[i * 8 + 5] = b[i * 8 + 7] + 6;
  a[i * 8 + 6] = b[i * 8 + 7] + 7;
  a[i * 8 + 7] = b[i * 8 + 7] + 3;
}
}
 
To enable VLA SLP auto-vectorization, we should be able to handle this 
following const vector:
 
1. NPATTERNS = 8, NELTS_PER_PATTERN = 3.
{ 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 
16, ... }
 
2. NPATTERNS = 8, NELTS_PER_PATTERN = 1. 
{ 1, 2, 8, 4, 5, 6, 7, 3, ... }
 
And these vector can be generated at prologue.
 
After this patch, we end up with this following codegen:
 
Prologue:
...
vsetvli a7,zero,e16,m2,ta,ma
vid.v   v4
vsrl.vi v4,v4,3
li  a3,8
vmul.vx v4,v4,a3  ===> v4 = { 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 
8, 8, 16, 16, 16, 16, 16, 16, 16, 16, ... }
...
li  t1,67633152
addit1,t1,513
li  a3,50790400
addia3,a3,1541
sllia3,a3,32
add a3,a3,t1
vsetvli t1,zero,e64,m1,ta,ma
vmv.v.x v3,a3   ===> v3 = { 1, 2, 8, 4, 5, 6, 7, 3, ... }
...
LoopBody:
...
min a3,...
vsetvli zero,a3,e8,m1,ta,ma
vle8.v  v2,0(a6)
vsetvli a7,zero,e8,m1,ta,ma
vrgatherei16.vv v1,v2,v4
vadd.vv v1,v1,v3
vsetvli zero,a3,e8,m1,ta,ma
vse8.v  v1,0(a2)
add a6,a6,a4
add a2,a2,a4
mv  a3,a5
add a5,a5,t1
bgtua3,a4,.L3
...
 
Note: we need to use "vrgatherei16.vv" instead of "vrgather.vv" for SEW = 8 
since "vrgatherei16.vv" can cover larger
  range than "vrgather.vv" (which only can maximum element index = 255).
Epilogue:
lbu a5,799(a1)
addiw   a4,a5,1
sb  a4,792(a0)
addiw   a4,a5,2
sb  a4,793(a0)
addiw   a4,a5,8
sb  a4,794(a0)
addiw   a4,a5,4
sb  a4,795(a0)
addiw   a4,a5,5
sb  a4,796(a0)
addiw   a4,a5,6
sb  a4,797(a0)
addiw   a4,a5,7
sb  a4,798(a0)
addiw   a5,a5,3
sb  a5,799(a0)
ret
 
There is one more last thing we need to do is the "Epilogue auto-vectorization" 
which needs VLS modes support.
I will support VLS modes for "Epilogue auto-vectorization" in the future.
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (expand_vec_perm_const): New function.
* config/riscv/riscv-v.cc 
(rvv_builder::can_duplicate_repeating_sequence_p): Support POLY handling.
(rvv_builder::single_step_npatterns_p): New function.
(rvv_builder::npatterns_all_equal_p): Ditto.
(const_vec_all_in_range_p): Support POLY handling.
(gen_const_vector_dup): Ditto.
(emit_vlmax_gather_insn): Add vrgatherei16.
(emit_vlmax_masked_gather_mu_insn): Ditto.
(expand_const_vector): Add VLA SLP const vector support.
(expand_vec_perm): Support POLY.
(struct expand_vec_perm_d): New struct.
(shuffle_generic_patterns): New function.
(expand_vec_perm_const_1): Ditto.
(expand_vec_perm_const): Ditto.
* config/riscv/riscv.cc (riscv_vectorize_vec_perm_const): Ditto.
(TARGET_VECTORIZE_VEC_PERM_CONST): New targethook.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/autovec/scalable-1.c: Adapt testcase for VLA 
vectorizer.
* gcc.target/riscv/rvv/autovec/v-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve32f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve32x_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64d-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64d_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64f-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64f_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/zve64x_zvl128b-1.c: Ditto.
* gcc.target/riscv/rvv/autovec/partial/slp-1.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-2.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-3.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-4.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-5.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-6.c: New test.
* gcc.target/riscv/rvv/autovec/partial/slp-7.c: New test.
 

Re: [PATCH] Fold _mm{, 256, 512}_abs_{epi8, epi16, epi32, epi64} into gimple ABSU_EXPR + VCE.

2023-06-06 Thread Hongtao Liu via Gcc-patches
On Tue, Jun 6, 2023 at 10:36 PM Uros Bizjak  wrote:
>
> On Tue, Jun 6, 2023 at 1:42 PM Hongtao Liu  wrote:
> >
> > On Tue, Jun 6, 2023 at 5:11 PM Uros Bizjak  wrote:
> > >
> > > On Tue, Jun 6, 2023 at 6:33 AM liuhongt via Gcc-patches
> > >  wrote:
> > > >
> > > > r14-1145 fold the intrinsics into gimple ABS_EXPR which has UB for
> > > > TYPE_MIN, but PABSB will store unsigned result into dst. The patch
> > > > uses ABSU_EXPR + VCE instead of ABS_EXPR.
> > > >
> > > > Also don't fold _mm_abs_{pi8,pi16,pi32} w/o TARGET_64BIT since 64-bit
> > > > vector absm2 is guarded with TARGET_MMX_WITH_SSE.
> > >
> > >This should be !TARGET_MMX_WITH_SSE. TARGET_64BIT is not enough, see
> > >the definition of T_M_W_S in i386.h. OTOH, these builtins are
> > >available for TARGET_MMX, so I'm not sure if the above check is needed
> > >at all.
> > BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, 0,
> > CODE_FOR_ssse3_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB,
> > UNKNOWN, (int) V8QI_FTYPE_V8QI)
> >
> > ISA requirement(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX) will be
> > checked by ix86_check_builtin_isa_match which is at the beginning of
> > ix86_gimple_fold_builtin.
> > Here, we're folding those builtin into gimple ABSU_EXPR, and
> > ABSU_EXPR will be lowered by vec_lower pass when backend
> > doesn't support corressponding absm2_optab, that's why i only check
> > TARGET_64BIT here.
> >
> > > Please note that we are using builtins here, so we should not fold to
> > > absm2, but to ssse3_absm2, which is also available with TARGET_MMX.
> > Yes, that exactly why I checked TARGET_64BIT here, w/ TARGET_64BIT,
> > backend suppport absm2_optab which exactly matches ssse3_absm2.
> > w/o TARGET_64BIT, the builtin shouldn't folding into gimple ABSU_EXPR,
> > but let backend expanded to ssse3_absm2.
>
> Thanks for the explanation, but for consistency, I'd recommend
> checking TARGET_MMX_WITH_SSE (= TARGET_64BIT && TARGET_SSE2) here. The
> macro is self-explanatory, while the usage of TARGET_64BIT is not that
> descriptive.
Sure.
>
> Uros.



-- 
BR,
Hongtao


[Bug tree-optimization/94898] Failure to optimize compare plus sub of same operands into compare

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94898

--- Comment #5 from Andrew Pinski  ---
Patch submitted:
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620829.html

[Bug tree-optimization/20083] Missed optimization with conditional and basically ||

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=20083

--- Comment #7 from Andrew Pinski  ---
Patch submitted:
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620829.html

[Bug tree-optimization/89263] Simplify bool expression to OR

2023-06-06 Thread pinskia at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89263

Andrew Pinski  changed:

   What|Removed |Added

URL||https://gcc.gnu.org/piperma
   ||il/gcc-patches/2023-June/62
   ||0829.html

--- Comment #10 from Andrew Pinski  ---
Patch submitted:
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620829.html

[Bug modula2/110019] Reported line numbers ar off-by-1 when preprocessing source files

2023-06-06 Thread gaius at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110019

Gaius Mulley  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|ASSIGNED|RESOLVED

--- Comment #4 from Gaius Mulley  ---
Closing now that the patch has been applied.

[Bug modula2/110019] Reported line numbers ar off-by-1 when preprocessing source files

2023-06-06 Thread cvs-commit at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110019

--- Comment #3 from CVS Commits  ---
The master branch has been updated by Gaius Mulley :

https://gcc.gnu.org/g:29c82c6ca929e0f5eccfe038dea71177d814c6b7

commit r14-1588-g29c82c6ca929e0f5eccfe038dea71177d814c6b7
Author: Gaius Mulley 
Date:   Wed Jun 7 01:21:19 2023 +0100

PR modula2/110019 Reported line numbers off by 1 when cpp invoked.

Fix off by one in m2.flex when the line number is set via cpp.

gcc/m2/ChangeLog:

PR modula2/110019
* gm2-compiler/SymbolKey.mod (SearchAndDo): Reformatted.
(ForeachNodeDo): Reformatted.
* gm2-compiler/SymbolTable.mod (AddListify): Join list
with "," or "and" if more than one word is in the list.
* m2.flex: Remove -1 from atoi(yytext) line number.

gcc/testsuite/ChangeLog:

PR modula2/110019
* gm2/cpp/fail/cpp-fail.exp: New test.
* gm2/cpp/fail/foocpp.mod: New test.

Signed-off-by: Gaius Mulley 

[Bug modula2/110019] Reported line numbers ar off-by-1 when preprocessing source files

2023-06-06 Thread gaius at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110019

Gaius Mulley  changed:

   What|Removed |Added

 CC||gaius at gcc dot gnu.org

--- Comment #2 from Gaius Mulley  ---
Created attachment 55276
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55276=edit
Proposed fix

Here is a proposed patch:

[PATCH 1/2] Match: zero_one_valued_p should match 0 constants too

2023-06-06 Thread Andrew Pinski via Gcc-patches
While working on `bool0 ? bool1 : bool2` I noticed that
zero_one_valued_p does not match on the constant zero
as in that case tree_nonzero_bits will return 0 and
that is different from 1.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd (zero_one_valued_p): Match 0 integer constant
too.
---
 gcc/match.pd | 5 +
 1 file changed, 5 insertions(+)

diff --git a/gcc/match.pd b/gcc/match.pd
index f9cbd757752..f97ff7ef760 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1983,11 +1983,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (bit_not (bit_not @0))
   @0)
 
+/* zero_one_valued_p will match when a value is known to be either
+   0 or 1 including the constant 0. */
 (match zero_one_valued_p
  @0
  (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1)))
 (match zero_one_valued_p
  truth_valued_p@0)
+(match zero_one_valued_p
+ integer_zerop@0
+ (if (INTEGRAL_TYPE_P (type
 
 /* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }.  */
 (simplify
-- 
2.31.1



[PATCH 2/2] Add match patterns for `a ? onezero : onezero` where one of the two operands are constant

2023-06-06 Thread Andrew Pinski via Gcc-patches
This adds a match pattern that are for boolean values
that optimizes `a ? onezero : 0` to `a & onezero` and
`a ? 1 : onezero` to `a | onezero`.

This was reported a few times and I thought I would finally
add the match pattern for this.

This hits a few times in GCC itself too.

Notes on the testcases:
* phi-opt-2.c: This now is optimized to `a & b` in phiopt rather than ifcombine
* phi-opt-25b.c: The test part that was failing was parity which now gets `x & 
y` treatment.
* ssa-thread-21.c: there is no longer a threading opportunity, so need to 
disable phiopt.
  Note PR 109957 is filed for the now missing optimization in that testcase too.

gcc/ChangeLog:

PR tree-optimization/89263
PR tree-optimization/99069
PR tree-optimization/20083
PR tree-optimization/94898
* match.pd: Add patterns to optimize `a ? onezero : onezero` with
one of the operands are constant.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/phi-opt-2.c: Adjust the testcase.
* gcc.dg/tree-ssa/phi-opt-25b.c: Adjust the testcase.
* gcc.dg/tree-ssa/ssa-thread-21.c: Disable phiopt.
* gcc.dg/tree-ssa/phi-opt-27.c: New test.
* gcc.dg/tree-ssa/phi-opt-28.c: New test.
* gcc.dg/tree-ssa/phi-opt-29.c: New test.
* gcc.dg/tree-ssa/phi-opt-30.c: New test.
* gcc.dg/tree-ssa/phi-opt-31.c: New test.
* gcc.dg/tree-ssa/phi-opt-32.c: New test.
---
 gcc/match.pd  | 18 ++
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-2.c | 12 ++--
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25b.c   |  8 ++-
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-27.c| 14 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-28.c| 14 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-29.c| 25 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-30.c| 55 +++
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-31.c| 15 +
 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-32.c| 12 
 gcc/testsuite/gcc.dg/tree-ssa/ssa-thread-21.c |  3 +-
 10 files changed, 165 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-27.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-28.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-29.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-30.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-31.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/phi-opt-32.c

diff --git a/gcc/match.pd b/gcc/match.pd
index f97ff7ef760..dc36927cd0f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4721,6 +4721,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  )
 )
 
+(simplify
+ (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
+ (switch
+  /* bool0 ? bool1 : 0 -> bool0 & bool1 */
+  (if (integer_zerop (@2))
+   (bit_and (convert @0) @1))
+  /* bool0 ? 0 : bool2 -> (bool0^1) & bool2 */
+  (if (integer_zerop (@1))
+   (bit_and (bit_xor (convert @0) { build_one_cst (type); } ) @2))
+  /* bool0 ? 1 : bool2 -> bool0 | bool2 */
+  (if (integer_onep (@1))
+   (bit_ior (convert @0) @2))
+  /* bool0 ? bool1 : 1 -> (bool0^1) | bool1 */
+  (if (integer_onep (@2))
+   (bit_ior (bit_xor (convert @0) @2) @1))
+ )
+)
+
 /* Optimize
# x_5 in range [cst1, cst2] where cst2 = cst1 + 1
x_5 ? cstN ? cst4 : cst3
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-2.c
index 5c7815e2c1a..006e8e83052 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-2.c
@@ -14,10 +14,8 @@ _Bool f1(_Bool a, _Bool b)
 }
 
 
-/* There should be only one if, the outer one; the inner one
-   should have been changed to straight line code with the
-   value of b (except that we don't fold ! (b != 0) into b
-   which can be fixed in a different patch).
-   Test this only when known to be !LOGICAL_OP_NON_SHORT_CIRCUIT,
-   otherwise ifcombine may convert this into return a & b;.  */
-/* { dg-final { scan-tree-dump-times "if" 1 "optimized" } } */
+/* There should be no if statements and be fold into just return a & b.
+   This can be done without ifcombine but in phiopt where a ? b : 0 is
+   converted into a & b. */
+/* { dg-final { scan-tree-dump-not "if" "optimized" } } */
+/* { dg-final { scan-tree-dump-times " & " 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25b.c 
b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25b.c
index 698a20f7a56..7298da0c96e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25b.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/phi-opt-25b.c
@@ -65,6 +65,8 @@ int test_popcountll(unsigned long long x, unsigned long long 
y)
   return x ? __builtin_popcountll(y) : 0;
 }
 
-/* 4 types of functions, each with 3 types and there are 2 goto each */
-/* { dg-final { scan-tree-dump-times "goto " 24 "optimized" } } */
-
+/* 3 types of functions (not including parity), each with 3 types and there 
are 2 goto each */
+/* { dg-final { scan-tree-dump-times "goto " 18 "optimized" } } */
+/* { dg-final 

RE: [x86_64 PATCH] PR target/110104: Missing peephole2 for addcarry.

2023-06-06 Thread Roger Sayle


Hi Jakub,
Jakub Jelinek wrote:
> Seems to be pretty much the same as one of the 12 define_peephole2
patterns I've posted in
https://gcc.gnu.org/pipermail/gcc-patches/2023-June/620821.html

Doh!  Impressive work.  I need to study how you handle constant carry flags.
Fingers-crossed that patches that touch both the middle-end and a backend
don't get delayed too long in the review/approval process.

> The testcase will be useful though (but I'd go with including the intrin
header and using the intrinsic rather than builtin).

I find the use of intrin headers a pain when running cc1 under gdb,
requiring additional paths to be
specified with -I etc.  Perhaps there's a trick that I'm missing?
__builtins are more free-standing,
and therefore work with cross-compilers to targets/development environments
that I don't have.

I withdraw my patch.  Please feel free to assign PR 110104 to yourself in
Bugzilla.

Cheers (and thanks),
Roger




[Bug target/54089] [SH] Refactor shift patterns

2023-06-06 Thread olegendo at gcc dot gnu.org via Gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54089

--- Comment #67 from Oleg Endo  ---
(In reply to Alexander Klepikov from comment #66)
> (In reply to Alexander Klepikov from comment #65)
> > > I'm thinking of something else.
> > 
> > During kernel compile I got few internal errors in different passes. That
> > is, additional loop optimization pass patch is no good at all.
> 
> I am sorry, I am, as always, panicking ahead of time. I think I found what's
> wrong and do additional tests.

Don't worry.  I know what you're going through.  Been there myself ;)
Take your time.

[x86 PATCH] PR target/31985: Improve memory operand use with doubleword add.

2023-06-06 Thread Roger Sayle

This patch addresses the last remaining issue with PR target/31985, that
GCC could make better use of memory addressing modes when implementing
double word addition.  This is achieved by adding a define_insn_and_split
that combines an *add3_doubleword with a *concat3, so
that the components of the concat can be used directly, without first
being loaded into a double word register.

For test_c in the bugzilla PR:

Before:
pushl   %ebx
subl$16, %esp
movl28(%esp), %eax
movl36(%esp), %ecx
movl32(%esp), %ebx
movl24(%esp), %edx
addl%ecx, %eax
adcl%ebx, %edx
movl%eax, 8(%esp)
movl%edx, 12(%esp)
addl$16, %esp
popl%ebx
ret

After:
test_c:
subl$20, %esp
movl36(%esp), %eax
movl32(%esp), %edx
addl28(%esp), %eax
adcl24(%esp), %edx
movl%eax, 8(%esp)
movl%edx, 12(%esp)
addl$20, %esp
ret


If this approach is considered acceptable, similar splitters can be
used for other doubleword operations.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32}
with no new failures.  Ok for mainline?

2023-06-07  Roger Sayle  

gcc/ChangeLog
PR target/31985
* config/i386/i386.md (*add3_doubleword_concat): New
define_insn_and_split combine *add3_doubleword with a
*concat3 for more efficient lowering after reload.

gcc/testsuite/ChangeLog
PR target/31985
* gcc.target/i386/pr31985.c: New test case.


Roger
--

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e6ebc46..3592249 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -6124,6 +6124,36 @@
  (clobber (reg:CC FLAGS_REG))])]
  "split_double_mode (mode, [0], 2, [0], [3]);")
 
+(define_insn_and_split "*add3_doubleword_concat"
+  [(set (match_operand: 0 "register_operand" "=r")
+   (plus:
+ (any_or_plus:
+   (ashift:
+ (zero_extend:
+   (match_operand:DWIH 2 "nonimmediate_operand" "rm"))
+ (match_operand: 3 "const_int_operand"))
+   (zero_extend:
+ (match_operand:DWIH 4 "nonimmediate_operand" "rm")))
+ (match_operand: 1 "register_operand" "0")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[3]) ==  * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:CCC FLAGS_REG)
+  (compare:CCC
+(plus:DWIH (match_dup 1) (match_dup 4))
+(match_dup 1)))
+ (set (match_dup 0)
+  (plus:DWIH (match_dup 1) (match_dup 4)))])
+   (parallel [(set (match_dup 5)
+  (plus:DWIH
+(plus:DWIH
+  (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
+  (match_dup 6))
+(match_dup 2)))
+ (clobber (reg:CC FLAGS_REG))])]
+ "split_double_mode (mode, [0], 2, [0], [5]);")
+
 (define_insn "*add_1"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
(plus:SWI48
diff --git a/gcc/testsuite/gcc.target/i386/pr31985.c 
b/gcc/testsuite/gcc.target/i386/pr31985.c
new file mode 100644
index 000..a6de1b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr31985.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target ia32 } } */
+/* { dg-options "-O2" } */
+
+void test_c (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
+{
+  volatile unsigned int x, y;
+  unsigned long long __a = b | ((unsigned long long)a << 32);
+  unsigned long long __b = d | ((unsigned long long)c << 32);
+  unsigned long long __c = __a + __b;
+  x = (unsigned int)(__c & 0x);
+  y = (unsigned int)(__c >> 32);
+}
+
+/* { dg-final { scan-assembler-times "movl" 4 } } */


  1   2   3   4   >