Re: [PATCH 2/3] RISC-V: Part-2: Save/Restore vector registers which need to be preversed

2023-08-06 Thread Kito Cheng via Gcc-patches
> +  machine_mode m1_mode = TARGET_VECTOR_ELEN_64
> +  ? (TARGET_MIN_VLEN >= 128 ? VNx2DImode : 
> VNx1DImode)
> +  : VNx1SImode;

This should update since JuZhe has update the mode system :P

> @@ -5907,7 +6057,7 @@ riscv_expand_epilogue (int style)
>   Start off by assuming that no registers need to be restored.  */
>struct riscv_frame_info *frame = &cfun->machine->frame;
>unsigned mask = frame->mask;
> -  HOST_WIDE_INT step2 = 0;
> +  poly_int64 step2 = 0;

I saw we check `step2.to_constant () > 0` later, does it mean step2 is
always a scalar rather than a poly number?
If so, I would suggest keeping HOST_WIDE_INT if possible.


> @@ -6058,10 +6218,10 @@ riscv_expand_epilogue (int style)
>  riscv_emit_stack_tie ();
>
>/* Deallocate the final bit of the frame.  */
> -  if (step2 > 0)
> +  if (step2.to_constant () > 0)
>  {
>insn = emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
> -  GEN_INT (step2)));
> +  GEN_INT (step2.to_constant (;
>
>rtx dwarf = NULL_RTX;
>rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx,


Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread Kito Cheng via Gcc-patches
A build-able patch attached, again, it's based on your patch :)

On Mon, Aug 7, 2023 at 11:46 AM Li, Pan2 via Gcc-patches
 wrote:
>
> I am not quite sure if I understand it correctly, but I bet below enums are 
> required by RISC-V mode switching, like FRM_MODE_DYN in entry, or 
> FRM_MODE_CALL/EXIT in emit.
>
> > ;; Defines rounding mode of an floating-point operation.
> > -(define_attr "frm_mode" "rne,rtz,rdn,rup,rmm,dyn,dyn_exit,dyn_call,none"
> > +(define_attr "frm_mode" ""
> >  (cond [(eq_attr "type" "vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfdiv,vfwmul")
> > -(const_string "dyn")]
> > +(const_string "FRM_DYN")]
> >(const_string "none")))
>
> Pan
>
> -Original Message-
> From: Kito Cheng 
> Sent: Monday, August 7, 2023 11:27 AM
> To: Li, Pan2 
> Cc: juzhe.zh...@rivai.ai; gcc-patches ; Wang, 
> Yanzhang 
> Subject: Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding 
> mode intrinsic
>
> What about using similar way as vlmul?
>
>
> # NOTE: diff is based on your patch.
> [kitoc@hsinchu02 riscv]$ git diff
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 33f7cb1d670..3cb5c23cb09 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -345,6 +345,7 @@ enum floating_point_rounding_mode
>   FRM_DYN = 7, /* Aka 0b111.  */
>   FRM_STATIC_MIN = FRM_RNE,
>   FRM_STATIC_MAX = FRM_RMM,
> +  FRM_NONE = 8,
> };
>
> opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index d5fb8611d6e..3d5dc0c11be 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -112,6 +112,7 @@ public:
>   {
> m_has_fp_rounding_mode_p = true;
> m_fp_rounding_mode = mode;
> +gcc_assert (mode != FRM_NONE);
>   }
>
>   void add_output_operand (rtx x, machine_mode mode)
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index f966f1ba769..c1a7650fe85 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -865,9 +865,9 @@ (define_attr "vxrm_mode" "rnu,rne,rdn,rod,none"
> (const_string "none")))
>
> ;; Defines rounding mode of an floating-point operation.
> -(define_attr "frm_mode" "rne,rtz,rdn,rup,rmm,dyn,dyn_exit,dyn_call,none"
> +(define_attr "frm_mode" ""
>   (cond [(eq_attr "type" "vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfdiv,vfwmul")
> -(const_string "dyn")]
> +(const_string "FRM_DYN")]
>(const_string "none")))
>
> ;; -
From 29bfcda510cd86f6b4804e0ea2178b2ce8e6671d Mon Sep 17 00:00:00 2001
From: Kito Cheng 
Date: Mon, 7 Aug 2023 14:34:12 +0800
Subject: [PATCH] f

---
 gcc/config/riscv/riscv-protos.h |  5 ++-
 gcc/config/riscv/riscv-v.cc | 19 
 gcc/config/riscv/riscv.cc   | 38 
 gcc/config/riscv/riscv.h|  2 +-
 gcc/config/riscv/vector.md  | 80 -
 5 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 33f7cb1d670..395f056f8d2 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -236,7 +236,6 @@ bool check_builtin_call (location_t, vec, unsigned int,
 			   tree, unsigned int, tree *);
 bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 bool legitimize_move (rtx, rtx);
-int get_frm_mode (rtx);
 void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
@@ -345,8 +344,12 @@ enum floating_point_rounding_mode
   FRM_DYN = 7, /* Aka 0b111.  */
   FRM_STATIC_MIN = FRM_RNE,
   FRM_STATIC_MAX = FRM_RMM,
+  FRM_NONE = 8,
+  FRM_DYN_EXIT = 9,
+  FRM_DYN_CALL = 10,
 };
 
+enum floating_point_rounding_mode get_frm_mode (rtx);
 opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
 	 poly_uint64);
 unsigned int autovectorize_vector_modes (vec *, bool);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d5fb8611d6e..9ab6ae17d33 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -112,6 +112,7 @@ public:
   {
 m_has_fp_rounding_mode_p = true;
 m_fp_rounding_mode = mode;
+gcc_assert (mode <= FRM_DYN);
   }
 
   void add_output_operand (rtx x, machine_mode mode)
@@ -1514,8 +1515,8 @@ expand_const_vector (rtx target, rtx src)
 }
 
 /* Get the frm mode with given CONST_INT rtx, the default mode is
-   FRM_MODE_DYN.  */
-int
+   FRM_DYN.  */
+enum floating_point_rounding_mode
 get_frm_mode (rtx operand)
 {
   gcc_assert (CONST_INT_P (operand));
@@ -1523,19 +1524,19 @@ get_frm_mode (rtx operand)
   switch (INTVAL (operand))
 {
 case FRM_RNE:
-  return FRM_MODE_RNE;
+  return FRM_RNE;
 case FRM_RTZ:
-  return FRM_MODE_RTZ;
+  return FRM_RTZ;
 case FRM_RDN:
-  return FRM_MODE_R

[PATCH] MATCH: [PR109959] `(uns <= 1) & uns` could be optimized to `uns == 1`

2023-08-06 Thread Andrew Pinski via Gcc-patches
I noticed while looking into some code generation of bitmap_single_bit_set_p,
that sometimes:
```
  if (uns > 1)
return 0;
  return uns == 1;
```
Would not optimize down to just:
```
return uns == 1;
```

In this case, VRP likes to change `a == 1` into `(bool)a` if
a has a range of [0,1] due to `a <= 1` side of the branch.
We might end up with this similar code even without VRP,
in the case of builtin-sprintf-warn-23.c (and Wrestrict.c), we had:
```
if (s < 0 || 1 < s)
  s = 0;
```
Which is the same as `s = ((unsigned)s) <= 1 ? s : 0`;
So we should be able to catch that also.

This adds 2 patterns to catch `(uns <= 1) & uns` and
`(uns > 1) ? 0 : uns` and convert those into:
`(convert) uns == 1`.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/109959

gcc/ChangeLog:

* match.pd (`(a > 1) ? 0 : (cast)a`, `(a <= 1) & (cast)a`):
New patterns.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/builtin-sprintf-warn-23.c: Remove xfail.
* c-c++-common/Wrestrict.c: Update test and remove some xfail.
* gcc.dg/tree-ssa/cmpeq-1.c: New test.
* gcc.dg/tree-ssa/cmpeq-2.c: New test.
* gcc.dg/tree-ssa/cmpeq-3.c: New test.
---
 gcc/match.pd  | 20 +++
 gcc/testsuite/c-c++-common/Wrestrict.c| 11 +++---
 .../gcc.dg/tree-ssa/builtin-sprintf-warn-23.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-1.c   | 36 +++
 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-2.c   | 32 +
 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-3.c   | 22 
 6 files changed, 117 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/cmpeq-3.c

diff --git a/gcc/match.pd b/gcc/match.pd
index de54b17abba..9b4819e5be7 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4902,6 +4902,26 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  )
 )
 
+/* (a > 1) ? 0 : (cast)a is the same as (cast)(a == 1)
+   for unsigned types. */
+(simplify
+ (cond (gt @0 integer_onep@1) integer_zerop (convert? @2))
+ (if (TYPE_UNSIGNED (TREE_TYPE (@0))
+  && bitwise_equal_p (@0, @2))
+  (convert (eq @0 @1))
+ )
+)
+
+/* (a <= 1) & (cast)a is the same as (cast)(a == 1)
+   for unsigned types. */
+(simplify
+ (bit_and:c (convert1? (le @0 integer_onep@1)) (convert2? @2))
+ (if (TYPE_UNSIGNED (TREE_TYPE (@0))
+  && bitwise_equal_p (@0, @2))
+  (convert (eq @0 @1))
+ )
+)
+
 (simplify
  (cond @0 zero_one_valued_p@1 zero_one_valued_p@2)
  (switch
diff --git a/gcc/testsuite/c-c++-common/Wrestrict.c 
b/gcc/testsuite/c-c++-common/Wrestrict.c
index 9eb02bdbfcb..4d005a618b3 100644
--- a/gcc/testsuite/c-c++-common/Wrestrict.c
+++ b/gcc/testsuite/c-c++-common/Wrestrict.c
@@ -681,7 +681,7 @@ void test_strcpy_range (void)
   ptrdiff_t r;
 
   r = SR (0, 1);
-  T (8, "0", a + r, a);   /* { dg-warning "accessing between 1 and 2 bytes at 
offsets \\\[0, 1] and 0 overlaps up to 2 bytes at offset \\\[0, 1]" "strcpy" { 
xfail *-*-*} } */
+  T (8, "0", a + r, a);   /* { dg-warning "accessing 2 bytes at offsets \\\[0, 
1] and 0 overlaps between 1 and 2 bytes at offset \\\[0, 1]" "strcpy" } */
 
   r = SR (2, 5);
   T (8, "01",  a + r, a);/* { dg-warning "accessing 3 bytes at 
offsets \\\[2, 5] and 0 may overlap 1 byte at offset 2" } */
@@ -860,10 +860,11 @@ void test_strncpy_range (char *d, size_t n)
 
   i = SR (0, 1);
   T ("0123", a, a + i, 0);
-  T ("0123", a, a + i, 1);
-  /* Offset in the range [0, i] is represented as a PHI (&a, &a + i)
- that the implementation isn't equipped to handle yet.  */
-  T ("0123", a, a + i, 2);   /* { dg-warning "accessing 2 bytes at offsets 0 
and \\\[0, 1] may overlap 1 byte at offset 1" "strncpy" { xfail *-*-* } } */
+  T ("0123", a, a + i, 1); /* { dg-warning "accessing 1 byte at offsets 0 and 
\\\[0, 1] may overlap 1 byte at offset 0" } */
+  /* When i == 1 the following overlaps at least 1 byte: the nul at a[1]
+ (if a + 1 is the empty string).  If a + 1 is not empty then it overlaps
+ it plus as many non-nul characters after it, up to the total of 2.  */
+  T ("0123", a, a + i, 2);   /* { dg-warning "accessing 2 bytes at offsets 0 
and \\\[0, 1] overlaps between 1 and 2 bytes at offset \\\[0, 1]" "strncpy" } */
 
   i = SR (1, 5);
   T ("0123", a, a + i, 0);
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-23.c 
b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-23.c
index 112b08afc44..051c58892e6 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-23.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/builtin-sprintf-warn-23.c
@@ -719,5 +719,5 @@ void test_overlap_with_precision (char *d, int i, int j)
   T (d, "%.*s", i, d + 0);/* { dg-warning "may overlap" } */
   T (d, "%.*s", i, d + 1);/* { dg-warning "may overlap" } */
   T (d, "%.*s", i, d + 2);
-  T (d, "%.*s", i, d + i);/* { dg-warn

Re: [PATCH v1] LoongArch:Implement 128-bit floating point functions in gcc.

2023-08-06 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-07 at 12:01 +0800, chenxiaolong wrote:
> +/* Count the number of functions with "q" as the suffix */
> +static int MATHQ_NUMS=(int)LARCH_MAX_FTYPE_MAX-(int)LARCH_BUILTIN_HUGE_VALQ;

This is obviously not the GCC coding standard...  It should have some
white spaces:

static int MATHQ_NUMS = (int)LARCH_MAX_FTYPE_MAX - (int)LARCH_BUILTIN_HUGE_VALQ;

And I guess this variable should be declared const.

> +/* Define an float to do funciton huge_valq*/
> +#define FLOAT_BUILTIN_HUGE(INSN, FUNCTION_TYPE)   \
> +{ CODE_FOR_ ## INSN,   \
> +"__builtin_" #INSN,  LARCH_BUILTIN_HUGE_DIRECT,\
> +FUNCTION_TYPE, loongarch_builtin_avail_default }

/* snip */

> +/* Define an float to do funciton nansq*/
> +#define FLOAT_BUILTIN_NANSQ(INSN, FUNCTION_TYPE)  \
> +{ CODE_FOR_ ## INSN,   \
> +"__builtin_" #INSN,  LARCH_BUILTIN_NANSQ_DIRECT,   \
> +FUNCTION_TYPE, loongarch_builtin_avail_default }

What's the point to define these macros each is only used once?

> +  tree type,ftype;
> +  tree const_string_type
> + 
> =build_pointer_type(build_qualified_type(char_type_node,TYPE_QUAL_CONST));

Really bad format.  In GNU coding standard you should have a white space
after '=', and before '(', etc.  Please fix the formatting everywhere.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH v1] LoongArch:Implement 128-bit floating point functions in gcc.

2023-08-06 Thread chenxiaolong
From: Xiaolong Chen 

In the implementation process, the "q" suffix function is
Re-register and associate the "__float128" type with the
"long double" type so that the compiler can handle the
corresponding function correctly. The functions implemented
include __builtin_{huge_valq infq, fabsq, copysignq, nanq, nansq}.

gcc/ChangeLog:

* config/loongarch/loongarch-builtins.cc (DEF_LARCH_FTYPE):
(MATHQ_NUMS=):Add the type of the builtin(q) function.
(enum loongarch_builtin_type):Add the type of the function.
(FLOAT_BUILTIN_HUGE):
(FLOAT_BUILTIN_INFQ):
(FLOAT_BUILTIN_FABSQ):
(FLOAT_BUILTIN_COPYSIGNQ):
(FLOAT_BUILTIN_NANQ):
(FLOAT_BUILTIN_NANSQ):
(loongarch_init_builtins):
(loongarch_fold_builtin):
(loongarch_expand_builtin):
* config/loongarch/loongarch-protos.h (loongarch_fold_builtin):
(loongarch_c_mode_for_suffix):Add the declaration of the function.
* config/loongarch/loongarch.cc (loongarch_c_mode_for_suffix):
Add the definition of the function.
(TARGET_FOLD_BUILTIN):
(TARGET_C_MODE_FOR_SUFFIX):
* config/loongarch/loongarch.md (infq):
():Add an instruction template to the machine
description file to generate information such as the icode used
by the function and the constructor.

libgcc/ChangeLog:

* config/loongarch/t-softfp-tf:
* config/loongarch/tf-signs.c: New file.
---
 gcc/config/loongarch/loongarch-builtins.cc | 196 -
 gcc/config/loongarch/loongarch-protos.h|   2 +
 gcc/config/loongarch/loongarch.cc  |  14 ++
 gcc/config/loongarch/loongarch.md  |  25 +++
 libgcc/config/loongarch/t-softfp-tf|   3 +
 libgcc/config/loongarch/tf-signs.c |  99 +++
 6 files changed, 337 insertions(+), 2 deletions(-)
 create mode 100644 libgcc/config/loongarch/tf-signs.c

diff --git a/gcc/config/loongarch/loongarch-builtins.cc 
b/gcc/config/loongarch/loongarch-builtins.cc
index b929f224dfa..cb7f0e60674 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -36,6 +36,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "fold-const.h"
 #include "expr.h"
 #include "langhooks.h"
+#include "calls.h"
+#include "explow.h"
 
 /* Macros to create an enumeration identifier for a function prototype.  */
 #define LARCH_FTYPE_NAME1(A, B) LARCH_##A##_FTYPE_##B
@@ -48,9 +50,18 @@ enum loongarch_function_type
 #define DEF_LARCH_FTYPE(NARGS, LIST) LARCH_FTYPE_NAME##NARGS LIST,
 #include "config/loongarch/loongarch-ftypes.def"
 #undef DEF_LARCH_FTYPE
+  LARCH_BUILTIN_HUGE_VALQ,
+  LARCH_BUILTIN_INFQ, 
+  LARCH_BUILTIN_FABSQ,
+  LARCH_BUILTIN_COPYSIGNQ,
+  LARCH_BUILTIN_NANQ,
+  LARCH_BUILTIN_NANSQ,
   LARCH_MAX_FTYPE_MAX
 };
 
+/* Count the number of functions with "q" as the suffix */
+static int MATHQ_NUMS=(int)LARCH_MAX_FTYPE_MAX-(int)LARCH_BUILTIN_HUGE_VALQ;
+
 /* Specifies how a built-in function should be converted into rtl.  */
 enum loongarch_builtin_type
 {
@@ -62,7 +73,25 @@ enum loongarch_builtin_type
   /* The function corresponds directly to an .md pattern.  There is no return
  value and the arguments are mapped to operands 0 and above.  */
   LARCH_BUILTIN_DIRECT_NO_TARGET,
+  
+ /*The function corresponds to an __builtin_huge_valq */
+  LARCH_BUILTIN_HUGE_DIRECT ,
+
+  /*Define the type of the __builtin_infq function */
+  LARCH_BUILTIN_INFQ_DIRECT ,
+
+  /*Define the type of the __builtin_fabsq function*/
+  LARCH_BUILTIN_FABSQ_DIRECT ,
 
+  /*Define the type of the __builtin_copysignq function */
+  LARCH_BUILTIN_COPYSIGNQ_DIRECT ,
+
+
+  /*Define the type of the __builtin_copysignq function */
+  LARCH_BUILTIN_NANQ_DIRECT ,
+  
+  /*Define the type of the __builtin_copysignq function */
+  LARCH_BUILTIN_NANSQ_DIRECT ,
 };
 
 /* Declare an availability predicate for built-in functions that require
@@ -135,6 +164,41 @@ AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
 #define DIRECT_NO_TARGET_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
   LARCH_BUILTIN (INSN, #INSN, LARCH_BUILTIN_DIRECT_NO_TARGET, \
 FUNCTION_TYPE, AVAIL)
+/* Define an float to do funciton huge_valq*/
+#define FLOAT_BUILTIN_HUGE(INSN, FUNCTION_TYPE)   \
+{ CODE_FOR_ ## INSN,   \
+"__builtin_" #INSN,  LARCH_BUILTIN_HUGE_DIRECT,\
+FUNCTION_TYPE, loongarch_builtin_avail_default }
+
+/* Define an float to do funciton infq*/
+#define FLOAT_BUILTIN_INFQ(INSN, FUNCTION_TYPE)   \
+{ CODE_FOR_ ## INSN,   \
+"__builtin_" #INSN,  LARCH_BUILTIN_INFQ_DIRECT,\
+FUNCTION_TYPE, loongarch_builtin_avail_default }
+
+/* Define an float to do funciton fabsq*/
+#define FLOAT_BUILTIN_FABSQ(INSN, FUNCTION_TYPE)  \
+{ CODE_FOR_ #

RE: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread Li, Pan2 via Gcc-patches
I am not quite sure if I understand it correctly, but I bet below enums are 
required by RISC-V mode switching, like FRM_MODE_DYN in entry, or 
FRM_MODE_CALL/EXIT in emit.

> ;; Defines rounding mode of an floating-point operation.
> -(define_attr "frm_mode" "rne,rtz,rdn,rup,rmm,dyn,dyn_exit,dyn_call,none"
> +(define_attr "frm_mode" ""
>  (cond [(eq_attr "type" "vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfdiv,vfwmul")
> -(const_string "dyn")]
> +(const_string "FRM_DYN")]
>(const_string "none")))

Pan

-Original Message-
From: Kito Cheng  
Sent: Monday, August 7, 2023 11:27 AM
To: Li, Pan2 
Cc: juzhe.zh...@rivai.ai; gcc-patches ; Wang, Yanzhang 

Subject: Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding 
mode intrinsic

What about using similar way as vlmul?


# NOTE: diff is based on your patch.
[kitoc@hsinchu02 riscv]$ git diff
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 33f7cb1d670..3cb5c23cb09 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -345,6 +345,7 @@ enum floating_point_rounding_mode
  FRM_DYN = 7, /* Aka 0b111.  */
  FRM_STATIC_MIN = FRM_RNE,
  FRM_STATIC_MAX = FRM_RMM,
+  FRM_NONE = 8,
};

opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d5fb8611d6e..3d5dc0c11be 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -112,6 +112,7 @@ public:
  {
m_has_fp_rounding_mode_p = true;
m_fp_rounding_mode = mode;
+gcc_assert (mode != FRM_NONE);
  }

  void add_output_operand (rtx x, machine_mode mode)
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index f966f1ba769..c1a7650fe85 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -865,9 +865,9 @@ (define_attr "vxrm_mode" "rnu,rne,rdn,rod,none"
(const_string "none")))

;; Defines rounding mode of an floating-point operation.
-(define_attr "frm_mode" "rne,rtz,rdn,rup,rmm,dyn,dyn_exit,dyn_call,none"
+(define_attr "frm_mode" ""
  (cond [(eq_attr "type" "vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfdiv,vfwmul")
-(const_string "dyn")]
+(const_string "FRM_DYN")]
   (const_string "none")))

;; -


Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread Kito Cheng via Gcc-patches
What about using similar way as vlmul?


# NOTE: diff is based on your patch.
[kitoc@hsinchu02 riscv]$ git diff
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 33f7cb1d670..3cb5c23cb09 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -345,6 +345,7 @@ enum floating_point_rounding_mode
  FRM_DYN = 7, /* Aka 0b111.  */
  FRM_STATIC_MIN = FRM_RNE,
  FRM_STATIC_MAX = FRM_RMM,
+  FRM_NONE = 8,
};

opt_machine_mode vectorize_related_mode (machine_mode, scalar_mode,
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index d5fb8611d6e..3d5dc0c11be 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -112,6 +112,7 @@ public:
  {
m_has_fp_rounding_mode_p = true;
m_fp_rounding_mode = mode;
+gcc_assert (mode != FRM_NONE);
  }

  void add_output_operand (rtx x, machine_mode mode)
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index f966f1ba769..c1a7650fe85 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -865,9 +865,9 @@ (define_attr "vxrm_mode" "rnu,rne,rdn,rod,none"
(const_string "none")))

;; Defines rounding mode of an floating-point operation.
-(define_attr "frm_mode" "rne,rtz,rdn,rup,rmm,dyn,dyn_exit,dyn_call,none"
+(define_attr "frm_mode" ""
  (cond [(eq_attr "type" "vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfdiv,vfwmul")
-(const_string "dyn")]
+(const_string "FRM_DYN")]
   (const_string "none")))

;; -


Re: [PATCH V5 2/2] Optimize '(X - N * M) / N' to 'X / N - M' if valid

2023-08-06 Thread guojiufu via Gcc-patches



Hi,

Gentle ping...

On 2023-07-18 22:05, Jiufu Guo wrote:

Hi,

Integer expression "(X - N * M) / N" can be optimized to "X / N - M"
if there is no wrap/overflow/underflow and "X - N * M" has the same
sign with "X".

Compare the previous version:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624067.html
- APIs: overflow, nonnegative_p and nonpositive_p are moved close
  to value range.
- Use above APIs in match.pd.

Bootstrap & regtest pass on ppc64{,le} and x86_64.
Is this patch ok for trunk?

BR,
Jeff (Jiufu Guo)

PR tree-optimization/108757

gcc/ChangeLog:

* match.pd ((X - N * M) / N): New pattern.
((X + N * M) / N): New pattern.
((X + C) div_rshift N): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/pr108757-1.c: New test.
* gcc.dg/pr108757-2.c: New test.
* gcc.dg/pr108757.h: New test.

---
 gcc/match.pd  |  85 +++
 gcc/testsuite/gcc.dg/pr108757-1.c |  18 +++
 gcc/testsuite/gcc.dg/pr108757-2.c |  19 +++
 gcc/testsuite/gcc.dg/pr108757.h   | 233 ++
 4 files changed, 355 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr108757.h

diff --git a/gcc/match.pd b/gcc/match.pd
index 8543f777a28..39dbb0567dc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -942,6 +942,91 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 #endif


+#if GIMPLE
+(for div (trunc_div exact_div)
+ /* Simplify (t + M*N) / N -> t / N + M.  */
+ (simplify
+  (div (plus:c@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+   && get_range_query (cfun)->range_of_expr (vr1, @1)
+   && get_range_query (cfun)->range_of_expr (vr2, @2)
+   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+   && get_range_query (cfun)->range_of_expr (vr0, @0)
+   && get_range_query (cfun)->range_of_expr (vr3, @3)
+   && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr3)
+   && get_range_query (cfun)->range_of_expr (vr4, @4)
+   && (TYPE_UNSIGNED (type)
+  || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+  || (vr0.nonpositive_p () && vr4.nonpositive_p (
+  (plus (div @0 @2) @1
+
+ /* Simplify (t - M*N) / N -> t / N - M.  */
+ (simplify
+  (div (minus@4 @0 (mult:c@3 @1 @2)) @2)
+  (with {value_range vr0, vr1, vr2, vr3, vr4;}
+  (if (INTEGRAL_TYPE_P (type)
+   && get_range_query (cfun)->range_of_expr (vr1, @1)
+   && get_range_query (cfun)->range_of_expr (vr2, @2)
+   && range_op_handler (MULT_EXPR).overflow_free_p (vr1, vr2)
+   && get_range_query (cfun)->range_of_expr (vr0, @0)
+   && get_range_query (cfun)->range_of_expr (vr3, @3)
+   && range_op_handler (MINUS_EXPR).overflow_free_p (vr0, vr3)
+   && get_range_query (cfun)->range_of_expr (vr4, @4)
+   && (TYPE_UNSIGNED (type)
+  || (vr0.nonnegative_p () && vr4.nonnegative_p ())
+  || (vr0.nonpositive_p () && vr4.nonpositive_p (
+  (minus (div @0 @2) @1)
+
+/* Simplify
+   (t + C) / N -> t / N + C / N where C is multiple of N.
+   (t + C) >> N -> t >> N + C>>N if low N bits of C is 0.  */
+(for op (trunc_div exact_div rshift)
+ (simplify
+  (op (plus@3 @0 INTEGER_CST@1) INTEGER_CST@2)
+   (with
+{
+  wide_int c = wi::to_wide (@1);
+  wide_int n = wi::to_wide (@2);
+  bool is_rshift = op == RSHIFT_EXPR;
+  bool neg_c = false;
+  bool ok = false;
+  value_range vr0;
+  if (INTEGRAL_TYPE_P (type)
+ && get_range_query (cfun)->range_of_expr (vr0, @0))
+{
+ ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+: wi::multiple_of_p (c, n, TYPE_SIGN (type));
+ value_range vr1, vr3;
+ ok = ok && get_range_query (cfun)->range_of_expr (vr1, @1)
+  && range_op_handler (PLUS_EXPR).overflow_free_p (vr0, vr1)
+  && get_range_query (cfun)->range_of_expr (vr3, @3)
+  && (TYPE_UNSIGNED (type)
+  || (vr0.nonnegative_p () && vr3.nonnegative_p ())
+  || (vr0.nonpositive_p () && vr3.nonpositive_p ()));
+
+ /* Try check 'X + C' as 'X - -C' for unsigned.  */
+ if (!ok && TYPE_UNSIGNED (type) && c.sign_mask () < 0)
+   {
+ neg_c = true;
+ c = -c;
+ ok = is_rshift ? wi::ctz (c) >= n.to_shwi ()
+: wi::multiple_of_p (c, n, UNSIGNED);
+ ok = ok && wi::geu_p (vr0.lower_bound (), c);
+   }
+   }
+}
+   (if (ok)
+   (with
+{
+  wide_int m;
+  m = is_rshift ? wi::rshift (c, n, TYPE_SIGN (type))
+   : wi::div_trunc (c, n, TYPE_SIGN (type));
+  m = neg_c ? -m : m;
+}
+   (plus (op @0 @2) { wide_int_to_tree(type, m); }))
+#endif
+
 (for op (negate abs)
  /* Simplify cos(-x) and cos(|x|) -> cos(x).  Similarly for cosh.  */
  (for cos

RE: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread Li, Pan2 via Gcc-patches
Sure thing, let’s wait kito’s comment for this.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, August 7, 2023 9:31 AM
To: Li, Pan2 ; gcc-patches 
Cc: Wang, Yanzhang ; kito.cheng 
Subject: Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding 
mode intrinsic

I have  no ideal. I would prefer kito makes decision here.



juzhe.zh...@rivai.ai

From: Li, Pan2
Date: 2023-08-07 09:22
To: juzhe.zh...@rivai.ai; 
gcc-patches
CC: Wang, Yanzhang; 
kito.cheng
Subject: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
We will have below error if there is no cast here.

../gcc/config/riscv/vector.md:6134:36: error: invalid conversion from ‘int’ to 
‘attr_frm_mode’ [-fpermissive]

Or we can return attr_frm_mode in get_frm_mode but it requires some additional 
header files. Is there any guidance here in GCC coding style?

Pan

From: juzhe.zh...@rivai.ai 
mailto:juzhe.zh...@rivai.ai>>
Sent: Monday, August 7, 2023 8:46 AM
To: Li, Pan2 mailto:pan2...@intel.com>>; gcc-patches 
mailto:gcc-patches@gcc.gnu.org>>
Cc: Li, Pan2 mailto:pan2...@intel.com>>; Wang, Yanzhang 
mailto:yanzhang.w...@intel.com>>; kito.cheng 
mailto:kito.ch...@gmail.com>>
Subject: Re: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic


+   (set (attr "frm_mode")

+  (symbol_ref "(enum attr_frm_mode) riscv_vector::get_frm_mode 
(operands[8])"))])

I don't think we need "(enum attr_frm_mode)"



juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-08-06 11:36
To: gcc-patches
CC: juzhe.zhong; 
pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
From: Pan Li mailto:pan2...@intel.com>>

The frm_mode attr has some assumptions for each define insn as below.

1. The define insn has at least 9 operands.
2. The operands[9] must be frm reg.
3. The operands[9] must be const int.

Actually, the frm operand can be operands[8], operands[9] or
operands[10], and not all the define insn has frm operands.

This patch would like to refactor frm and eliminate the above
assumptions, as well as unblock the underlying rounding mode intrinsic
API support.

After refactor, the default frm will be none, and the selected insn type
will be dyn. For the floating point which honors the frm, we will
set the frm_mode attr explicitly in define_insn.

Passed both the riscv.exp and rvv.exp for rv32/rv64 tests.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>

gcc/ChangeLog:

* config/riscv/riscv-protos.h (get_frm_mode): Remove operand
assumptions.
* config/riscv/riscv-v.cc (get_frm_mode): New function.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_ternop_insn):
* config/riscv/vector.md: Set frm_mode attr explicitly.
---
gcc/config/riscv/riscv-protos.h   |   1 +
gcc/config/riscv/riscv-v.cc   |  28 
gcc/config/riscv/riscv-vector-builtins.cc |  22 ++-
gcc/config/riscv/vector.md| 170 ++
4 files changed, 159 insertions(+), 62 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 324991e2619..33f7cb1d670 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -236,6 +236,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
   tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx);
+int get_frm_mode (rtx);
void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 278452b9e05..d5fb8611d6e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1513,6 +1513,34 @@ expand_const_vector (rtx target, rtx src)
 gcc_unreachable ();
}
+/* Get the frm mode with given CONST_INT rtx, the default mode is
+   FRM_MODE_DYN.  */
+int
+get_frm_mode (rtx operand)
+{
+  gcc_assert (CONST_INT_P (operand));
+
+  switch (INTVAL (operand))
+{
+case FRM_RNE:
+  return FRM_MODE_RNE;
+case FRM_RTZ:
+  return FRM_MODE_RTZ;
+case FRM_RDN:
+  return FRM_MODE_RDN;
+case FRM_RUP:
+  return FRM_MODE_RUP;
+case FRM_RMM:
+  return FRM_MODE_RMM;
+case FRM_DYN:
+  return FRM_MODE_DYN;
+default:
+  return FRM_MODE_DYN;
+}
+
+  gcc_unreachable ();
+}
+
/* Expand a pre-RA RVV data move from SRC to DEST.
It expands move for RVV fractional vector modes.

Re: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread juzhe.zh...@rivai.ai
I have  no ideal. I would prefer kito makes decision here.




juzhe.zh...@rivai.ai
 
From: Li, Pan2
Date: 2023-08-07 09:22
To: juzhe.zh...@rivai.ai; gcc-patches
CC: Wang, Yanzhang; kito.cheng
Subject: RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
We will have below error if there is no cast here.
 
../gcc/config/riscv/vector.md:6134:36: error: invalid conversion from ‘int’ to 
‘attr_frm_mode’ [-fpermissive]
 
Or we can return attr_frm_mode in get_frm_mode but it requires some additional 
header files. Is there any guidance here in GCC coding style?
 
Pan
 
From: juzhe.zh...@rivai.ai  
Sent: Monday, August 7, 2023 8:46 AM
To: Li, Pan2 ; gcc-patches 
Cc: Li, Pan2 ; Wang, Yanzhang ; 
kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
 
+   (set (attr "frm_mode")+   (symbol_ref "(enum attr_frm_mode) 
riscv_vector::get_frm_mode (operands[8])"))])
 
I don't think we need "(enum attr_frm_mode)"




juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-06 11:36
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
From: Pan Li 
 
The frm_mode attr has some assumptions for each define insn as below.
 
1. The define insn has at least 9 operands.
2. The operands[9] must be frm reg.
3. The operands[9] must be const int.
 
Actually, the frm operand can be operands[8], operands[9] or
operands[10], and not all the define insn has frm operands.
 
This patch would like to refactor frm and eliminate the above
assumptions, as well as unblock the underlying rounding mode intrinsic
API support.
 
After refactor, the default frm will be none, and the selected insn type
will be dyn. For the floating point which honors the frm, we will
set the frm_mode attr explicitly in define_insn.
 
Passed both the riscv.exp and rvv.exp for rv32/rv64 tests.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (get_frm_mode): Remove operand
assumptions.
* config/riscv/riscv-v.cc (get_frm_mode): New function.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_ternop_insn):
* config/riscv/vector.md: Set frm_mode attr explicitly.
---
gcc/config/riscv/riscv-protos.h   |   1 +
gcc/config/riscv/riscv-v.cc   |  28 
gcc/config/riscv/riscv-vector-builtins.cc |  22 ++-
gcc/config/riscv/vector.md| 170 ++
4 files changed, 159 insertions(+), 62 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 324991e2619..33f7cb1d670 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -236,6 +236,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
   tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx);
+int get_frm_mode (rtx);
void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 278452b9e05..d5fb8611d6e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1513,6 +1513,34 @@ expand_const_vector (rtx target, rtx src)
 gcc_unreachable ();
}
+/* Get the frm mode with given CONST_INT rtx, the default mode is
+   FRM_MODE_DYN.  */
+int
+get_frm_mode (rtx operand)
+{
+  gcc_assert (CONST_INT_P (operand));
+
+  switch (INTVAL (operand))
+{
+case FRM_RNE:
+  return FRM_MODE_RNE;
+case FRM_RTZ:
+  return FRM_MODE_RTZ;
+case FRM_RDN:
+  return FRM_MODE_RDN;
+case FRM_RUP:
+  return FRM_MODE_RUP;
+case FRM_RMM:
+  return FRM_MODE_RMM;
+case FRM_DYN:
+  return FRM_MODE_DYN;
+default:
+  return FRM_MODE_DYN;
+}
+
+  gcc_unreachable ();
+}
+
/* Expand a pre-RA RVV data move from SRC to DEST.
It expands move for RVV fractional vector modes.  */
bool
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 528dca7ae85..abab06c00ed 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3730,17 +3730,29 @@ function_expander::use_ternop_insn (bool vd_accum_p, 
insn_code icode)
 }
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
-add_input_operand (argno);
+{
+  if (base->has_rounding_mode_operand_p ()
+   && argno == call_expr_nargs (exp) - 2)
+ {
+   /* Since the rounding mode argument position is not consistent with
+  the instruction pattern, we need to skip rounding mode argument
+  here.  */
+   continue;
+ }
+  add_input_operand (argno);
+}
   add_input_operand (Pmode, get_tail_policy_for_pred (pred));
   add_input_operand (Pmode, get_mask_policy_for_pred (pred));
   add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMA

RE: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread Li, Pan2 via Gcc-patches
We will have below error if there is no cast here.

../gcc/config/riscv/vector.md:6134:36: error: invalid conversion from 'int' to 
'attr_frm_mode' [-fpermissive]

Or we can return attr_frm_mode in get_frm_mode but it requires some additional 
header files. Is there any guidance here in GCC coding style?

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, August 7, 2023 8:46 AM
To: Li, Pan2 ; gcc-patches 
Cc: Li, Pan2 ; Wang, Yanzhang ; 
kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic


+   (set (attr "frm_mode")

+   (symbol_ref "(enum attr_frm_mode) riscv_vector::get_frm_mode 
(operands[8])"))])

I don't think we need "(enum attr_frm_mode)"



juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-08-06 11:36
To: gcc-patches
CC: juzhe.zhong; 
pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
From: Pan Li mailto:pan2...@intel.com>>

The frm_mode attr has some assumptions for each define insn as below.

1. The define insn has at least 9 operands.
2. The operands[9] must be frm reg.
3. The operands[9] must be const int.

Actually, the frm operand can be operands[8], operands[9] or
operands[10], and not all the define insn has frm operands.

This patch would like to refactor frm and eliminate the above
assumptions, as well as unblock the underlying rounding mode intrinsic
API support.

After refactor, the default frm will be none, and the selected insn type
will be dyn. For the floating point which honors the frm, we will
set the frm_mode attr explicitly in define_insn.

Passed both the riscv.exp and rvv.exp for rv32/rv64 tests.

Signed-off-by: Pan Li mailto:pan2...@intel.com>>

gcc/ChangeLog:

* config/riscv/riscv-protos.h (get_frm_mode): Remove operand
assumptions.
* config/riscv/riscv-v.cc (get_frm_mode): New function.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_ternop_insn):
* config/riscv/vector.md: Set frm_mode attr explicitly.
---
gcc/config/riscv/riscv-protos.h   |   1 +
gcc/config/riscv/riscv-v.cc   |  28 
gcc/config/riscv/riscv-vector-builtins.cc |  22 ++-
gcc/config/riscv/vector.md| 170 ++
4 files changed, 159 insertions(+), 62 deletions(-)

diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 324991e2619..33f7cb1d670 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -236,6 +236,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
   tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx);
+int get_frm_mode (rtx);
void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 278452b9e05..d5fb8611d6e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1513,6 +1513,34 @@ expand_const_vector (rtx target, rtx src)
 gcc_unreachable ();
}
+/* Get the frm mode with given CONST_INT rtx, the default mode is
+   FRM_MODE_DYN.  */
+int
+get_frm_mode (rtx operand)
+{
+  gcc_assert (CONST_INT_P (operand));
+
+  switch (INTVAL (operand))
+{
+case FRM_RNE:
+  return FRM_MODE_RNE;
+case FRM_RTZ:
+  return FRM_MODE_RTZ;
+case FRM_RDN:
+  return FRM_MODE_RDN;
+case FRM_RUP:
+  return FRM_MODE_RUP;
+case FRM_RMM:
+  return FRM_MODE_RMM;
+case FRM_DYN:
+  return FRM_MODE_DYN;
+default:
+  return FRM_MODE_DYN;
+}
+
+  gcc_unreachable ();
+}
+
/* Expand a pre-RA RVV data move from SRC to DEST.
It expands move for RVV fractional vector modes.  */
bool
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 528dca7ae85..abab06c00ed 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3730,17 +3730,29 @@ function_expander::use_ternop_insn (bool vd_accum_p, 
insn_code icode)
 }
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
-add_input_operand (argno);
+{
+  if (base->has_rounding_mode_operand_p ()
+   && argno == call_expr_nargs (exp) - 2)
+ {
+   /* Since the rounding mode argument position is not consistent with
+  the instruction pattern, we need to skip rounding mode argument
+  here.  */
+   continue;
+ }
+  add_input_operand (argno);
+}
   add_input_operand (Pmode, get_tail_policy_for_pred (pred));
   add_input_operand (Pmode, get_mask_policy_for_pred (pred));
   add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
- 

Re: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode intrinsic

2023-08-06 Thread juzhe.zh...@rivai.ai
+   (set (attr "frm_mode")
+   (symbol_ref "(enum attr_frm_mode) riscv_vector::get_frm_mode 
(operands[8])"))])

I don't think we need "(enum attr_frm_mode)"



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-06 11:36
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Refactor RVV frm_mode attr for rounding mode 
intrinsic
From: Pan Li 
 
The frm_mode attr has some assumptions for each define insn as below.
 
1. The define insn has at least 9 operands.
2. The operands[9] must be frm reg.
3. The operands[9] must be const int.
 
Actually, the frm operand can be operands[8], operands[9] or
operands[10], and not all the define insn has frm operands.
 
This patch would like to refactor frm and eliminate the above
assumptions, as well as unblock the underlying rounding mode intrinsic
API support.
 
After refactor, the default frm will be none, and the selected insn type
will be dyn. For the floating point which honors the frm, we will
set the frm_mode attr explicitly in define_insn.
 
Passed both the riscv.exp and rvv.exp for rv32/rv64 tests.
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-protos.h (get_frm_mode): Remove operand
assumptions.
* config/riscv/riscv-v.cc (get_frm_mode): New function.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_ternop_insn):
* config/riscv/vector.md: Set frm_mode attr explicitly.
---
gcc/config/riscv/riscv-protos.h   |   1 +
gcc/config/riscv/riscv-v.cc   |  28 
gcc/config/riscv/riscv-vector-builtins.cc |  22 ++-
gcc/config/riscv/vector.md| 170 ++
4 files changed, 159 insertions(+), 62 deletions(-)
 
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 324991e2619..33f7cb1d670 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -236,6 +236,7 @@ bool check_builtin_call (location_t, vec, 
unsigned int,
   tree, unsigned int, tree *);
bool const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
bool legitimize_move (rtx, rtx);
+int get_frm_mode (rtx);
void emit_vlmax_vsetvl (machine_mode, rtx);
void emit_hard_vlmax_vsetvl (machine_mode, rtx);
void emit_vlmax_insn (unsigned, int, rtx *, rtx = 0);
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 278452b9e05..d5fb8611d6e 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1513,6 +1513,34 @@ expand_const_vector (rtx target, rtx src)
 gcc_unreachable ();
}
+/* Get the frm mode with given CONST_INT rtx, the default mode is
+   FRM_MODE_DYN.  */
+int
+get_frm_mode (rtx operand)
+{
+  gcc_assert (CONST_INT_P (operand));
+
+  switch (INTVAL (operand))
+{
+case FRM_RNE:
+  return FRM_MODE_RNE;
+case FRM_RTZ:
+  return FRM_MODE_RTZ;
+case FRM_RDN:
+  return FRM_MODE_RDN;
+case FRM_RUP:
+  return FRM_MODE_RUP;
+case FRM_RMM:
+  return FRM_MODE_RMM;
+case FRM_DYN:
+  return FRM_MODE_DYN;
+default:
+  return FRM_MODE_DYN;
+}
+
+  gcc_unreachable ();
+}
+
/* Expand a pre-RA RVV data move from SRC to DEST.
It expands move for RVV fractional vector modes.  */
bool
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index 528dca7ae85..abab06c00ed 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3730,17 +3730,29 @@ function_expander::use_ternop_insn (bool vd_accum_p, 
insn_code icode)
 }
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
-add_input_operand (argno);
+{
+  if (base->has_rounding_mode_operand_p ()
+   && argno == call_expr_nargs (exp) - 2)
+ {
+   /* Since the rounding mode argument position is not consistent with
+  the instruction pattern, we need to skip rounding mode argument
+  here.  */
+   continue;
+ }
+  add_input_operand (argno);
+}
   add_input_operand (Pmode, get_tail_policy_for_pred (pred));
   add_input_operand (Pmode, get_mask_policy_for_pred (pred));
   add_input_operand (Pmode, get_avl_type_rtx (avl_type::NONVLMAX));
-  /* TODO: Currently, we don't support intrinsic that is modeling rounding 
mode.
- We add default rounding mode for the intrinsics that didn't model rounding
- mode yet.  */
+  if (base->has_rounding_mode_operand_p ())
+add_input_operand (call_expr_nargs (exp) - 2);
+
+  /* The RVV floating-point only support dynamic rounding mode in the
+ FRM register.  */
   if (opno != insn_data[icode].n_generator_args)
-add_input_operand (Pmode, const0_rtx);
+add_input_operand (Pmode, gen_int_mode (riscv_vector::FRM_DYN, Pmode));
   return generate_insn (icode);
}
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 750b2de8df9..db3ee105ef4 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -867,26 +867,8 @@ (define_attr "vxrm_mode" "rnu,rne,rdn,rod,no

Re: [PATCH][RFC] tree-optimization/92335 - Improve sinking heuristics for vectorization

2023-08-06 Thread Prathamesh Kulkarni via Gcc-patches
On Thu, 3 Aug 2023 at 17:48, Richard Biener  wrote:
>
> On Thu, 3 Aug 2023, Richard Biener wrote:
>
> > On Thu, 3 Aug 2023, Richard Biener wrote:
> >
> > > On Thu, 3 Aug 2023, Prathamesh Kulkarni wrote:
> > >
> > > > On Wed, 2 Aug 2023 at 14:17, Richard Biener via Gcc-patches
> > > >  wrote:
> > > > >
> > > > > On Mon, 31 Jul 2023, Jeff Law wrote:
> > > > >
> > > > > >
> > > > > >
> > > > > > On 7/28/23 01:05, Richard Biener via Gcc-patches wrote:
> > > > > > > The following delays sinking of loads within the same innermost
> > > > > > > loop when it was unconditional before.  That's a not uncommon
> > > > > > > issue preventing vectorization when masked loads are not 
> > > > > > > available.
> > > > > > >
> > > > > > > Bootstrapped and tested on x86_64-unknown-linux-gnu.
> > > > > > >
> > > > > > > I have a followup patch improving sinking that without this would
> > > > > > > cause more of the problematic sinking - now that we have a second
> > > > > > > sink pass after loop opts this looks like a reasonable approach?
> > > > > > >
> > > > > > > OK?
> > > > > > >
> > > > > > > Thanks,
> > > > > > > Richard.
> > > > > > >
> > > > > > >  PR tree-optimization/92335
> > > > > > >  * tree-ssa-sink.cc (select_best_block): Before loop
> > > > > > >  optimizations avoid sinking unconditional loads/stores
> > > > > > >  in innermost loops to conditional executed places.
> > > > > > >
> > > > > > >  * gcc.dg/tree-ssa/ssa-sink-10.c: Disable vectorizing.
> > > > > > >  * gcc.dg/tree-ssa/predcom-9.c: Clone from ssa-sink-10.c,
> > > > > > >  expect predictive commoning to happen instead of sinking.
> > > > > > >  * gcc.dg/vect/pr65947-3.c: Adjust.
> > > > > > I think it's reasonable -- there's probably going to be cases where 
> > > > > > it's not
> > > > > > great, but more often than not I think it's going to be a reasonable
> > > > > > heuristic.
> > > > > >
> > > > > > If there is undesirable fallout, better to find it over the coming 
> > > > > > months than
> > > > > > next spring.  So I'd suggest we go forward now to give more time to 
> > > > > > find any
> > > > > > pathological cases (if they exist).
> > > > >
> > > > > Agreed, I've pushed this now.
> > > > Hi Richard,
> > > > After this patch (committed in 
> > > > 399c8dd44ff44f4b496223c7cc980651c4d6f6a0),
> > > > pr65947-7.c "failed" for aarch64-linux-gnu:
> > > > FAIL: gcc.dg/vect/pr65947-7.c scan-tree-dump-not vect "LOOP VECTORIZED"
> > > > FAIL: gcc.dg/vect/pr65947-7.c -flto -ffat-lto-objects
> > > > scan-tree-dump-not vect "LOOP VECTORIZED"
> > > >
> > > > /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target {
> > > > ! vect_fold_extract_last } } } } */
> > > >
> > > > With your commit, condition_reduction in pr65947-7.c gets vectorized
> > > > regardless of vect_fold_extract_last,
> > > > which gates the above test (which is an improvement, because the
> > > > function didn't get vectorized before the commit).
> > > >
> > > > The attached patch thus removes the gating on vect_fold_extract_last,
> > > > and the test passes again.
> > > > OK to commit ?
> > >
> > > OK.
> >
> > Or wait - the loop doesn't vectorize on x86_64, so I guess one
> > critical target condition is missing.  Can you figure out which?
>
> I see
>
> /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> note:   vect_is_simple_use: operand last_19 = PHI ,
> type of def: reduction
> /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> note:   vect_is_simple_use: vectype vector(4) int
> /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> missed:   multiple types in double reduction or condition reduction or
> fold-left reduction.
> /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:13:1:
> missed:   not vectorized: relevant phi not supported: last_19 = PHI
> 
> /space/rguenther/src/gcc/gcc/testsuite/gcc.dg/vect/pr65947-7.c:18:21:
> missed:  bad operation or unsupported loop bound.
Hi Richard,
Looking at the aarch64 vect dump, it seems the loop in
condition_reduction gets vectorized with V4HI mode
while fails for other modes in vectorizable_condition:

  if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
  && ncopies > 1)
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 "multiple types in double reduction or condition "
 "reduction or fold-left reduction.\n");
  return false;
}

>From the dump:
foo.c:9:21: note:   === vect_analyze_loop_operations ===
foo.c:9:21: note:   examining phi: last_19 = PHI 
foo.c:9:21: note:   vect_is_simple_use: operand (int) aval_13, type of
def: internal
foo.c:9:21: note:   vect_is_simple_use: vectype vector(4) int
foo.c:9:21: note:   vect_is_simple_use: operand last_19 = PHI
, type of def: reduction
foo.c:9:21: note:   vect_is_simple_use: vectype vector(4) int

For V8HI, VF = 8, and vectype_in = vector(4) int.
Thus ncopies = VF / length(vectype_in) = 2, 

Fix profile update after versioning ifconverted loop

2023-08-06 Thread Jan Hubicka via Gcc-patches
Hi,
If loop is ifconverted and later versioning by vectorizer, vectorizer will
reuse the scalar loop produced by ifconvert. Curiously enough it does not seem
to do so for versions produced by loop distribution while for loop distribution
this matters (since since both ldist versions survive to final code) while
after ifcvt it does not (since we remove non-vectorized path).

This patch fixes associated profile update.  Here it is necessary to scale both
arms of the conditional according to runtime checks inserted.  We got partly
right the loop body, but not the preheader block and block after exit.  The
first is particularly bad since it changes loop iterations estimates.

So we now turn 4 original loops:
  loop 1: iterations by profile: 473.497707 (reliable) entry count:84821 
(precise, freq 0.9979)
  loop 2: iterations by profile: 100.00 (reliable) entry count:39848881 
(precise, freq 468.8104)
  loop 3: iterations by profile: 100.00 (reliable) entry count:39848881 
(precise, freq 468.8104)
  loop 4: iterations by profile: 100.999596 (reliable) entry count:84167 
(precise, freq 0.9902)

Into following loops
  iterations by profile: 5.312499 (unreliable, maybe flat) entry count:12742188 
(guessed, freq 149.9081)
 vectorized and split loop 1, peeled
  iterations by profile: 0.009496 (unreliable, maybe flat) entry count:374798 
(guessed, freq 4.4094)
 split loop 1 (last iteration), peeled
  iterations by profile: 100.08 (unreliable) entry count:3945039 (guessed, 
freq 46.4122)
 scalar version of loop 1
  iterations by profile: 100.07 (unreliable) entry count:7101070 (guessed, 
freq 83.5420)
 redundant scalar version of loop 1 which we could eliminate if vectorizer 
understood ldist
  iterations by profile: 100.00 (unreliable) entry count:35505353 (guessed, 
freq 417.7100)
 unvectorized loop 2
  iterations by profile: 5.312500 (unreliable) entry count:25563855 (guessed, 
freq 300.7512)
 vectorized loop 2, not peeled (hits max-peel-insns)
  iterations by profile: 100.07 (unreliable) entry count:7101070 (guessed, 
freq 83.5420)
 unvectorized loop 3
  iterations by profile: 5.312500 (unreliable) entry count:25563855 (guessed, 
freq 300.7512)
 vectorized loop 3, not peeled (hits max-peel-insns)
  iterations by profile: 473.497707 (reliable) entry count:84821 (precise, freq 
0.9979)
 loop 1
  iterations by profile: 100.999596 (reliable) entry count:84167 (precise, freq 
0.9902)
 loop 4

With this change we are on 0 profile erros on hmmer benchmark:

Pass dump id |dynamic mismatch  |overall  |
 |in count  |size|time|
172t ch_vect |0 |  996   | 385812023346   |
173t ifcvt   | 71010686+71010686| 1021  +2.5%| 468361969416 +21.4%|
174t vect|210830784   +139820098| 1497 +46.6%| 216073467874 -53.9%|
175t dce |210830784 | 1387  -7.3%| 205273170281  -5.0%|
176t pcom|210830784 | 1387   | 201722634966  -1.7%|
177t cunroll |0   -210830784| 1443  +4.0%| 180441501289 -10.5%|
182t ivopts  |0 | 1385  -4.0%| 136412345683 -24.4%|
183t lim |0 | 1389  +0.3%| 135093950836  -1.0%|
192t reassoc |0 | 1381  -0.6%| 134778347700  -0.2%|
193t slsr|0 | 1380  -0.1%| 134738100330  -0.0%|
195t tracer  |0 | 1521 +10.2%| 134738179146  +0.0%|
196t fre |  2680654 +2680654| 1489  -2.1%| 134659672725  -0.1%|
198t dom |  5361308 +2680654| 1473  -1.1%| 134449553658  -0.2%|
201t vrp |  5361308 | 1474  +0.1%| 134489004050  +0.0%|
202t ccp |  5361308 | 1472  -0.1%| 134440752274  -0.0%|
204t dse |  5361308 | 1444  -1.9%| 133802300525  -0.5%|
206t forwprop|  5361308 | 1433  -0.8%| 133542828370  -0.2%|
207t sink|  5361308 | 1431  -0.1%| 133542658728  -0.0%|
211t store-me|  5361308 | 1430  -0.1%| 133542573728  -0.0%|
212t cddce   |  5361308 | 1428  -0.1%| 133541776728  -0.0%|
258r expand  |  5361308 |||
260r into_cfg|  5361308 | 9334  -0.8%| 885820707913  -0.6%|
261r jump|  5361308 | 9330  -0.0%| 885820367913  -0.0%|
265r fwprop1 |  5361308 | 9206  -1.3%| 876756504385  -1.0%|
267r rtl pre |  5361308 | 9210  +0.0%| 876914305953  +0.0%|
269r cprop   |  5361308 | 9202  -0.1%| 876756165101  -0.0%|
271r cse_loca|  5361308 | 9198  -0.0%| 876727760821  -0.0%|
272r ce1 |  5361308 | 9126  -0.8%| 875726815885  -0.1%|
276r loop2_in|  5361308 | 916

[Committed] Avoid FAIL of gcc.target/i386/pr110792.c

2023-08-06 Thread Roger Sayle

My apologies (again), I managed to mess up the 64-bit version of the
test case for PR 110792.  Unlike the 32-bit version, the 64-bit case
contains exactly the same load instructions, just in a different order
making the correct and incorrect behaviours impossible to distinguish
with a scan-assembler-not.  Somewhere between checking that this test
failed in a clean tree without the patch, and getting the escaping
correct, I'd failed to notice that this also FAILs in the patched tree.
Doh!  Instead of removing the test completely, I've left it as a
compilation test.

The original fix is tested by the 32-bit test case.

Committed to mainline as obvious.  Sorry for the inconvenience.


2023-08-06  Roger Sayle  

gcc/testsuite/ChangeLog
PR target/110792
* gcc.target/i386/pr110792.c: Remove dg-final scan-assembler-not.


diff --git a/gcc/testsuite/gcc.target/i386/pr110792.c 
b/gcc/testsuite/gcc.target/i386/pr110792.c
index b65125c48b6..eea4e1877db 100644
--- a/gcc/testsuite/gcc.target/i386/pr110792.c
+++ b/gcc/testsuite/gcc.target/i386/pr110792.c
@@ -15,4 +15,3 @@ unsigned __int128 whirl(unsigned char x0)
asm("":::"memory");
return tt;
 }
-/* { dg-final { scan-assembler-not "movq\tWHIRL_S\\+8\\(%rdi\\), %rdi" } } */


Fix profile update after peeled epilogues

2023-08-06 Thread Jan Hubicka via Gcc-patches
Hi,
Epilogue peeling expects the scalar loop to have same number of executions as
the vector loop which is true at the beggining of vectorization. However if the
epilogues are vectorized, this is no longer the case.  In this situation the
loop preheader is replaced by new guard code with correct profile, however
loop body is left unscaled.  This leads to loop that exists more often then
it is entered.

This patch add slogic to scale the frequencies down and also to fix profile
of original preheader where necesary.

Bootstrapped/regtested x86_64-linux, comitted.

gcc/ChangeLog:

* tree-vect-loop-manip.cc (vect_do_peeling): Fix profile update of 
peeled epilogues.

gcc/testsuite/ChangeLog:

* gcc.dg/vect/vect-bitfield-read-1.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-2.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-3.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-4.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-5.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-6.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-read-7.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-write-1.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-write-2.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-write-3.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-write-4.c: Check profile consistency.
* gcc.dg/vect/vect-bitfield-write-5.c: Check profile consistency.
* gcc.dg/vect/vect-epilogues-2.c: Check profile consistency.
* gcc.dg/vect/vect-epilogues.c: Check profile consistency.
* gcc.dg/vect/vect-mask-store-move-1.c: Check profile consistency.

diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
index 42e50d9f0c8..147c959568d 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-require-effective-target vect_shift } */
 
@@ -39,3 +40,4 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
index a9aeefcd72c..982e6a7967b 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
 /* { dg-require-effective-target vect_shift } */
 /* { dg-require-effective-target vect_long_long } */
 
@@ -42,3 +43,4 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
index c7d0fd26bad..f2a43c39f50 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-3.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-require-effective-target vect_shift } */
 
@@ -43,3 +44,4 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
index 6a3ed8c0c6f..9f6f0220664 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-4.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
 /* { dg-require-effective-target vect_shift } */
 /* { dg-require-effective-target vect_long_long } */
 
@@ -44,3 +45,4 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
 
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
index b2889df8a0a..662aed104cf 100644
--- a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-5.c
@@ -1,3 +1,4 @@
+/* { dg-additional-options "-fdump-tree-optimized-details-blocks" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-require-effective-target vect_shift } */
 
@@ -41,3 +42,4 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-not "Invalid sum" "optimized" } } */
diff --git a/gcc/testsui

Re: [PATCH] Add -Wdisabled-optimization warning for not optimizing sibling calls

2023-08-06 Thread Bradley Lucier via Gcc-patches

On 8/5/23 5:53 PM, David Malcolm wrote:

...but the warning branch uses "warning", which implicitly uses the
input_location global variable.  Is the warning reported at the correct
place?  It's better to use warning_at and pass it the location at which
the warning should be emitted.


Thanks, I changed the patch to follow your suggestion.

I built and ran make check with the patch; there were no changes to the 
test results.


As a test, I again built GCC with

../../gcc-mainline/configure CXX="/pkgs/gcc-mainline-new-new/bin/g++ 
-Wdisabled-optimization" --enable-languages=c --disable-multilib 
--prefix=/pkgs/gcc-mainline-test-test --disable-werror --disable-bootstrap


I found no changes to the warning messages.

Brad

diff --git a/gcc/calls.cc b/gcc/calls.cc
index 1f3a6d5c450..de293ac51bb 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -1242,10 +1242,12 @@ void
 maybe_complain_about_tail_call (tree call_expr, const char *reason)
 {
   gcc_assert (TREE_CODE (call_expr) == CALL_EXPR);
-  if (!CALL_EXPR_MUST_TAIL_CALL (call_expr))
-return;
-
-  error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);
+  if (CALL_EXPR_MUST_TAIL_CALL (call_expr))
+error_at (EXPR_LOCATION (call_expr), "cannot tail-call: %s", reason);
+  else if (flag_optimize_sibling_calls)
+warning_at (EXPR_LOCATION (call_expr), OPT_Wdisabled_optimization,
+"cannot apply sibling-call optimization: %s", reason);
+  return;
 }

 /* Fill in ARGS_SIZE and ARGS array based on the parameters found in


[PATCH] c++: follow DR 2386 and update implementation of get_tuple_size [PR110216]

2023-08-06 Thread gnaggnoyil via Gcc-patches
DR 2386 updated the tuple_size requirements for structured binding and
it now requires tuple_size to be considered only if
std::tuple_size names a complete class type with member value. GCC
before this patch does not follow the updated requrements, and this
patch is intended to implement it.

DR 2386
PR c++/110216

gcc/cp/ChangeLog:

* decl.cc (get_tuple_size): Update implemetation to follow DR 2386.

gcc/testsuite/ChangeLog:

* g++.dg/cpp1z/pr110216.C: New test.

Signed-off-by: Yonggang Li 
---
 gcc/cp/decl.cc|  6 +-
 gcc/testsuite/g++.dg/cpp1z/pr110216.C | 21 +
 2 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp1z/pr110216.C

diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 792ab330dd0..923b81a33b0 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -8940,10 +8940,14 @@ get_tuple_size (tree type)
 /*context*/std_node,
 /*entering_scope*/false, tf_none);
   inst = complete_type (inst);
-  if (inst == error_mark_node || !COMPLETE_TYPE_P (inst))
+  if (inst == error_mark_node
+  || !COMPLETE_TYPE_P (inst)
+  || !CLASS_TYPE_P (type))
 return NULL_TREE;
   tree val = lookup_qualified_name (inst, value_identifier,
LOOK_want::NORMAL, /*complain*/false);
+  if (val == error_mark_node)
+return NULL_TREE;
   if (VAR_P (val) || TREE_CODE (val) == CONST_DECL)
 val = maybe_constant_value (val);
   if (TREE_CODE (val) == INTEGER_CST)
diff --git a/gcc/testsuite/g++.dg/cpp1z/pr110216.C 
b/gcc/testsuite/g++.dg/cpp1z/pr110216.C
new file mode 100644
index 000..be4fd5f7053
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/pr110216.C
@@ -0,0 +1,21 @@
+// DR 2386
+// PR c++/110216
+// { dg-do compile { target c++17 } }
+
+
+namespace std{
+  template  struct tuple_size;
+}
+
+struct A {
+  int x = 0;
+};
+
+template <> struct std::tuple_size <::A> {};
+
+auto [x] = A{};
+
+int
+main ()
+{
+}
-- 
2.41.0



Re: [PATCH V2] rs6000: Don't allow AltiVec address in movoo & movxo pattern [PR110411]

2023-08-06 Thread Peter Bergner via Gcc-patches
On 7/19/23 11:46 AM, jeevitha via Gcc-patches wrote:
> gcc/
>   PR target/110411
>   * config/rs6000/mma.md (define_insn_and_split movoo): Disallow
>   AltiVec address in movoo and movxo pattern.

No need to mention movxo here, since the next change covers movxo.
And maybe better as "Disallow AltiVec address operands."?


>   (define_insn_and_split movxo): Likewise.

Fine.


>   *config/rs6000/predicates.md (vsx_quad_dform_memory_operand):Remove
 ^   ^
Need a space in the two spots above.

I cannot approve it, but it looks good to me with the above bits fixed.

Peter




[PATCH 6/9] LoongArch: Fix 64-bit immediate move for loongarch32 target

2023-08-06 Thread Jiajie Chen via Gcc-patches
loongarch_move_integer does not support splitting 64-bit integer into
two 32-bit ones. Thus, define_split is removed from movdi_32bit and
TARGET_64BIT is added to the split condition of movdi_64bit to avoid
using it for loongarch32.

gcc/ChangeLog:

* config/loongarch/loongarch.md (movdi_32bit): Remove not
  working split, use existing loongarch_split_move instead.
  (movdi_64bit) Add TARGET_64BIT to split condition.
---
 gcc/config/loongarch/loongarch.md | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 9eb6bb75c35..c611a8a822a 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1777,22 +1777,13 @@
 DONE;
 })
 
-(define_insn_and_split "*movdi_32bit"
+(define_insn "*movdi_32bit"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,w,*f,*f,*r,*m")
(match_operand:DI 1 "move_operand" "r,i,w,r,*J*r,*m,*f,*f"))]
   "!TARGET_64BIT
&& (register_operand (operands[0], DImode)
|| reg_or_0_operand (operands[1], DImode))"
   { return loongarch_output_move (operands[0], operands[1]); }
-  "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
-  (operands[0]))"
-  [(const_int 0)]
-  "
-{
-  loongarch_move_integer (operands[0], operands[0], INTVAL (operands[1]));
-  DONE;
-}
-  "
   [(set_attr "move_type" "move,const,load,store,mgtf,fpload,mftg,fpstore")
(set_attr "mode" "DI")])
 
@@ -1804,7 +1795,7 @@
|| reg_or_0_operand (operands[1], DImode))"
   { return loongarch_output_move (operands[0], operands[1]); }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
-  (operands[0]))"
+  (operands[0])) && TARGET_64BIT"
   [(const_int 0)]
   "
 {
-- 
2.41.0



[PATCH 9/9] LoongArch: Add: Add -march=loongarch64 to tests with -mabi=lp64d

2023-08-06 Thread Jiajie Chen via Gcc-patches
The compiler emits a warning if the current target (-march=loongarch32)
mismatches with abi(-march-lp64d). Adding: Add -march=loongarch64
explicitly fixes the tests.

gcc/testsuite/ChangeLog:

* g++.target/loongarch/bytepick.C: Add -march=loongarch64
* g++.target/loongarch/pr106828.C: Add -march=loongarch64
* gcc.target/loongarch/add-const.c: Add -march=loongarch64
* gcc.target/loongarch/arch-1.c: Add -march=loongarch64
* gcc.target/loongarch/attr-model-1.c: Add -march=loongarch64
* gcc.target/loongarch/attr-model-2.c: Add -march=loongarch64
* gcc.target/loongarch/flt-abi-isa-1.c: Add -march=loongarch64
* gcc.target/loongarch/fscaleb.c: Add -march=loongarch64
* gcc.target/loongarch/ftint-no-inexact.c: Add
  -march=loongarch64
* gcc.target/loongarch/ftint.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-1.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-2.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-3.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-4.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-5.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-6.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-7.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-8.c: Add -march=loongarch64
* gcc.target/loongarch/func-call-extreme-1.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-extreme-2.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-1.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-2.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-3.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-4.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-5.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-6.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-7.c: Add
  -march=loongarch64
* gcc.target/loongarch/func-call-medium-8.c: Add
  -march=loongarch64
* gcc.target/loongarch/imm-load.c: Add -march=loongarch64
* gcc.target/loongarch/imm-load1.c: Add -march=loongarch64
* gcc.target/loongarch/mulw_d_w.c: Add -march=loongarch64
* gcc.target/loongarch/pr109465-1.c: Add -march=loongarch64
* gcc.target/loongarch/pr109465-2.c: Add -march=loongarch64
* gcc.target/loongarch/pr109465-3.c: Add -march=loongarch64
* gcc.target/loongarch/prolog-opt.c: Add -march=loongarch64
* gcc.target/loongarch/relocs-symbol-noaddend.c: Add
  -march=loongarch64
* gcc.target/loongarch/zero-size-field-pass.c: Add
  -march=loongarch64
* gcc.target/loongarch/zero-size-field-ret.c: Add
  -march=loongarch64
---
 gcc/testsuite/g++.target/loongarch/bytepick.C   | 2 +-
 gcc/testsuite/g++.target/loongarch/pr106828.C   | 2 +-
 gcc/testsuite/gcc.target/loongarch/add-const.c  | 2 +-
 gcc/testsuite/gcc.target/loongarch/arch-1.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-1.c   | 2 +-
 gcc/testsuite/gcc.target/loongarch/attr-model-2.c   | 2 +-
 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-1.c  | 2 +-
 gcc/testsuite/gcc.target/loongarch/fscaleb.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c   | 2 +-
 gcc/testsuite/gcc.target/loongarch/ftint.c  | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-1.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-2.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-3.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-4.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-5.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-6.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-7.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-8.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c| 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-1.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-2.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-3.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-4.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c | 2 +-
 gcc/testsuite/gcc.target/loongarch/imm-load.c   |

[PATCH 5/9] LoongArch: Fix 64-bit move for loongarch32 target

2023-08-06 Thread Jiajie Chen via Gcc-patches
Bring back 64-bit move splitting for loongarch32. The code was removed
in commit 16fc26d4e7a (`LoongArch: Support split symbol.`) for unknown
reason.

gcc/ChangeLog:

* config/loongarch/loongarch.md: Handle move splitting for
  64-bit operands.
---
 gcc/config/loongarch/loongarch.md | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 93d8bf5bcca..9eb6bb75c35 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1965,6 +1965,16 @@
   [(set_attr "move_type" "move,load,store")
(set_attr "mode" "DF")])
 
+(define_split
+  [(set (match_operand:MOVE64 0 "nonimmediate_operand")
+   (match_operand:MOVE64 1 "move_operand"))]
+  "reload_completed && loongarch_split_move_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  loongarch_split_move (operands[0], operands[1], curr_insn);
+  DONE;
+})
+
 ;; Emit a doubleword move in which exactly one of the operands is
 ;; a floating-point register.  We can't just emit two normal moves
 ;; because of the constraints imposed by the FPU register model;
-- 
2.41.0



[PATCH 1/9] LoongArch: Introduce loongarch32 target

2023-08-06 Thread Jiajie Chen via Gcc-patches
Introduce loongarch32 target and ilp32 abi variants. The ilp32d abi
variant is selected as the default abi of loongarch32 target.

contrib/ChangeLog:

* config-list.mk: Add loongarch32-linux-gnu*.

gcc/ChangeLog:

* config.gcc: Add target triple loongarch32-*-*-* and
  corresponding abi ilp32f, ilp32d and ilp32s.
* config/loongarch/genopts/loongarch-strings: Add strings for
  loongarch32 and ilp32 abi variants.
* config/loongarch/genopts/loongarch.opt.in: Add
  -march=loongarch32 and -mabi=ilp32d/ilp32f/ilp32s.
* config/loongarch/gnu-user.h: Add ilp32 abi variants to spec.
* config/loongarch/linux.h: Add ABI_LIBDIR for ilp32 abi
  variants.
* config/loongarch/loongarch-c.cc (loongarch_cpu_cpp_builtins):
  Add builtin definitions for loongarch32 target.
* config/loongarch/loongarch-def.c: Add loongarch32 and ilp32
  definitions.
* config/loongarch/loongarch-def.h: Add loongarch32 and ilp32
  definitions.
* config/loongarch/loongarch-driver.h: Add ilp32 abi variants to
  spec.
* config/loongarch/loongarch-opts.cc: Handle ilp32 abi variants.
* config/loongarch/loongarch-opts.h: Add loongarch32 case to
  macros.
* config/loongarch/loongarch-str.h: Add loongarch32 and ilp32
  strings.
* config/loongarch/loongarch.cc: Disable -fpcc-struct-return for
  ilp32.
* config/loongarch/loongarch.opt: Add -march=loongarch32 and
  -mabi=ilp32d/ilp32f/ilp32s.
* config/loongarch/t-linux: Add ilp32 abi variants to multilib.
---
 contrib/config-list.mk|  1 +
 gcc/config.gcc| 61 ---
 .../loongarch/genopts/loongarch-strings   |  5 ++
 gcc/config/loongarch/genopts/loongarch.opt.in | 12 
 gcc/config/loongarch/gnu-user.h   |  3 +
 gcc/config/loongarch/linux.h  |  8 ++-
 gcc/config/loongarch/loongarch-c.cc   | 12 
 gcc/config/loongarch/loongarch-def.c  | 33 ++
 gcc/config/loongarch/loongarch-def.h  | 25 +---
 gcc/config/loongarch/loongarch-driver.h   |  4 ++
 gcc/config/loongarch/loongarch-opts.cc| 22 ++-
 gcc/config/loongarch/loongarch-opts.h | 20 --
 gcc/config/loongarch/loongarch-str.h  |  5 ++
 gcc/config/loongarch/loongarch.cc |  2 +-
 gcc/config/loongarch/loongarch.opt| 12 
 gcc/config/loongarch/t-linux  | 16 -
 16 files changed, 210 insertions(+), 31 deletions(-)

diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index e570b13c71b..3c00ce5410a 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -57,6 +57,7 @@ LIST = aarch64-elf aarch64-freebsd13 aarch64-linux-gnu 
aarch64-rtems \
   i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \
   ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \
   lm32-rtems lm32-uclinux \
+  loongarch32-linux-gnuf64 loongarch32-linux-gnuf32 loongarch32-linux-gnusf \
   loongarch64-linux-gnuf64 loongarch64-linux-gnuf32 loongarch64-linux-gnusf \
   m32c-elf m32r-elf m32rle-elf \
   m68k-elf m68k-netbsdelf \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 415e0e1ebc5..45e69b24b44 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4901,10 +4901,24 @@ case "${target}" in
arch_pattern arch_default   \
fpu_pattern  fpu_default\
tune_pattern tune_default   \
-   triplet_os   triplet_abi
+   triplet_os   triplet_abi\
+   triplet_cpu
 
# Infer ABI from the triplet.
case ${target} in
+   loongarch32-*-*-*f64)
+   abi_pattern="ilp32d"
+   ;;
+   loongarch32-*-*-*f32)
+   abi_pattern="ilp32f"
+   ;;
+   loongarch32-*-*-*sf)
+   abi_pattern="ilp32s"
+   ;;
+   loongarch32-*-*-*)
+   abi_pattern="ilp32[dfs]"
+   abi_default="ilp32d"
+   ;;
loongarch64-*-*-*f64)
abi_pattern="lp64d"
;;
@@ -4939,7 +4953,7 @@ case "${target}" in
 
# Perform initial sanity checks on --with-* options.
case ${with_arch} in
-   "" | loongarch64 | la464) ;; # OK, append here.
+   "" | loongarch32 | loongarch64 | la464) ;; # OK, append here.
native)
if test x${host} != x${target}; then
echo "--with-arch=native is illegal for 
cross-compiler." 1>&2
@@ -4958,7 +4972,7 @@ case "${target}" in
esac
 
case ${with_abi} in
- 

[PATCH 8/9] LoongArch: Do not emit SF/DF <-> DI conversion in loongarch32

2023-08-06 Thread Jiajie Chen via Gcc-patches
In loongarch32 target, conversions between SF/DF and DI are not
supported.

gcc/ChangeLog:

* config/loongarch/loongarch.md: Check TARGET_64BIT in insns
  regarding SF/DF <-> DI conversion.
---
 gcc/config/loongarch/loongarch.md | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index c611a8a822a..bced4b08569 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1504,7 +1504,7 @@
 (define_insn "floatdidf2"
   [(set (match_operand:DF 0 "register_operand" "=f")
(float:DF (match_operand:DI 1 "register_operand" "f")))]
-  "TARGET_DOUBLE_FLOAT"
+  "TARGET_DOUBLE_FLOAT && TARGET_64BIT"
   "ffint.d.l\t%0,%1"
   [(set_attr "type" "fcvt")
(set_attr "mode" "DF")
@@ -1522,7 +1522,7 @@
 (define_insn "floatdisf2"
   [(set (match_operand:SF 0 "register_operand" "=f")
(float:SF (match_operand:DI 1 "register_operand" "f")))]
-  "TARGET_DOUBLE_FLOAT"
+  "TARGET_DOUBLE_FLOAT && TARGET_64BIT"
   "ffint.s.l\t%0,%1"
   [(set_attr "type" "fcvt")
(set_attr "mode" "SF")
@@ -1576,7 +1576,7 @@
 (define_expand "fixuns_truncdfdi2"
   [(set (match_operand:DI 0 "register_operand")
(unsigned_fix:DI (match_operand:DF 1 "register_operand")))]
-  "TARGET_DOUBLE_FLOAT"
+  "TARGET_DOUBLE_FLOAT && TARGET_64BIT"
 {
   rtx reg1 = gen_reg_rtx (DFmode);
   rtx reg2 = gen_reg_rtx (DFmode);
@@ -1658,7 +1658,7 @@
 (define_expand "fixuns_truncsfdi2"
   [(set (match_operand:DI 0 "register_operand")
(unsigned_fix:DI (match_operand:SF 1 "register_operand")))]
-  "TARGET_DOUBLE_FLOAT"
+  "TARGET_DOUBLE_FLOAT && TARGET_64BIT"
 {
   rtx reg1 = gen_reg_rtx (SFmode);
   rtx reg2 = gen_reg_rtx (SFmode);
-- 
2.41.0



[PATCH 7/9] LoongArch: Fix signed 32-bit overflow for loongarch32 target

2023-08-06 Thread Jiajie Chen via Gcc-patches
When rhs equals to 0x7fff, adding 1 to rhs overflows SI, generating
invalid const_int.

gcc/ChangeLog:

* config/loongarch/loongarch.cc (loongarch_emit_int_compare):
  Call trunc_int_mode to ensure valid rhs.
---
 gcc/config/loongarch/loongarch.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/loongarch/loongarch.cc 
b/gcc/config/loongarch/loongarch.cc
index ff7904d49d5..5a9c99afc99 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -4283,6 +4283,7 @@ loongarch_emit_int_compare (enum rtx_code *code, rtx 
*op0, rtx *op1)
break;
 
  new_rhs = rhs + (increment ? 1 : -1);
+ new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
  if (loongarch_integer_cost (new_rhs)
< loongarch_integer_cost (rhs))
{
-- 
2.41.0



[PATCH 2/9] LoongArch: Fix default ISA setting

2023-08-06 Thread Jiajie Chen via Gcc-patches
When loongarch_arch_target is called, la_target has not been
initialized, thus the macro LARCH_ACTUAL_ARCH always equals to zero.

This commit fixes by expanding the macro and reading the latest value.
It permits -march=loongarch64 when the default target is loongarch32 and
vice versa.

gcc/ChangeLog:

* config/loongarch/loongarch-opts.cc (loongarch_config_target):
  Fix -march detection.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/arch-1.c: New test.
* gcc.target/loongarch/arch-2.c: New test.
* gcc.target/loongarch/arch-3.c: New test.
---
 gcc/config/loongarch/loongarch-opts.cc  | 5 -
 gcc/testsuite/gcc.target/loongarch/arch-1.c | 5 +
 gcc/testsuite/gcc.target/loongarch/arch-2.c | 5 +
 gcc/testsuite/gcc.target/loongarch/arch-3.c | 6 ++
 4 files changed, 20 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/arch-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/arch-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/arch-3.c

diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index 9fc0bbbcb6e..29c0c4468bb 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -246,7 +246,10 @@ loongarch_config_target (struct loongarch_target *target,
 config_target_isa:
 
   /* Get default ISA from "-march" or its default value.  */
-  t.isa = loongarch_cpu_default_isa[LARCH_ACTUAL_ARCH];
+  if (t.cpu_arch == TARGET_ARCH_NATIVE)
+t.isa = loongarch_cpu_default_isa[t.cpu_native];
+  else
+t.isa = loongarch_cpu_default_isa[t.cpu_arch];
 
   /* Apply incremental changes.  */
   /* "-march=native" overrides the default FPU type.  */
diff --git a/gcc/testsuite/gcc.target/loongarch/arch-1.c 
b/gcc/testsuite/gcc.target/loongarch/arch-1.c
new file mode 100644
index 000..379036ec76f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/arch-1.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=loongarch64 -mabi=lp64d" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/arch-2.c 
b/gcc/testsuite/gcc.target/loongarch/arch-2.c
new file mode 100644
index 000..55d646902a6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/arch-2.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-march=loongarch32 -mabi=ilp32d" } */
+int foo()
+{
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/arch-3.c 
b/gcc/testsuite/gcc.target/loongarch/arch-3.c
new file mode 100644
index 000..543b93883bd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/arch-3.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-march=loongarch64 -mabi=ilp32d" } */
+int foo()
+{
+}
+/* { dg-error "unable to implement ABI 'ilp32d' with instruction set 
'la64/fpu64'" "" { target *-*-* } 0 } */
-- 
2.41.0



[PATCH 3/9] LoongArch: Fix SI division for loongarch32 target

2023-08-06 Thread Jiajie Chen via Gcc-patches
Add TARGET_64BIT check for loongarch64-only handling of SI division. It
shall not promote SI to DI before division in loongarch32 target.

gcc/ChangeLog:

* config/loongarch/loongarch.md: Add TARGET_64BIT check for
  loongarch64-only handling of SI division.
---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index b37e070660f..95c5b25d22a 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -851,7 +851,7 @@
 (match_operand:GPR 2 "register_operand")))]
   ""
 {
- if (GET_MODE (operands[0]) == SImode)
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
   {
 rtx reg1 = gen_reg_rtx (DImode);
 rtx reg2 = gen_reg_rtx (DImode);
-- 
2.41.0



[PATCH 4/9] LoongArch: Fix movgr2frh.w operand order

2023-08-06 Thread Jiajie Chen via Gcc-patches
The operand order of movgr2frh.w was wrong. The correct order should be
`movgr2frh.w fd, rj`.

gcc/ChangeLog:

* config/loongarch/loongarch.md (movgr2frh): Correct
  movgr2frh.w operand order.
---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/loongarch.md 
b/gcc/config/loongarch/loongarch.md
index 95c5b25d22a..93d8bf5bcca 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -2297,7 +2297,7 @@
(match_operand:SPLITF 2 "register_operand" "0")]
UNSPEC_MOVGR2FRH))]
   "TARGET_DOUBLE_FLOAT"
-  "movgr2frh.w\t%z1,%0"
+  "movgr2frh.w\t%0,%z1"
   [(set_attr "move_type" "mgtf")
(set_attr "mode" "")])
 
-- 
2.41.0



[committed][_GLIBCXX_INLINE_VERSION] Add __cxa_call_terminate symbol export

2023-08-06 Thread François Dumont via Gcc-patches

libstdc++: [_GLIBCXX_INLINE_VERSION] Add __cxa_call_terminate symbol export

libstdc++-v3/ChangeLog:

    * config/abi/pre/gnu-versioned-namespace.ver: Add __cxa_call_terminate
    symbol export.
diff --git a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
index d7ef127cf02..267ab8fc719 100644
--- a/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
+++ b/libstdc++-v3/config/abi/pre/gnu-versioned-namespace.ver
@@ -164,6 +164,7 @@ CXXABI_2.0 {
 __cxa_begin_catch;
 __cxa_begin_cleanup;
 __cxa_call_unexpected;
+__cxa_call_terminate;
 __cxa_current_exception_type;
 __cxa_deleted_virtual;
 __cxa_demangle;


Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-06 Thread Prathamesh Kulkarni via Gcc-patches
On Fri, 4 Aug 2023 at 20:36, Richard Sandiford
 wrote:
>
> Full review this time, sorry for the skipping the tests earlier.
Thanks for the detailed review! Please find my responses inline below.
>
> Prathamesh Kulkarni  writes:
> > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
> > index 7e5494dfd39..680d0e54fd4 100644
> > --- a/gcc/fold-const.cc
> > +++ b/gcc/fold-const.cc
> > @@ -85,6 +85,10 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "vec-perm-indices.h"
> >  #include "asan.h"
> >  #include "gimple-range.h"
> > +#include 
>
> This should be included by defining INCLUDE_ALGORITHM instead.
Done. Just curious, why do we use this macro instead of directly
including  ?
>
> > +#include "tree-pretty-print.h"
> > +#include "gimple-pretty-print.h"
> > +#include "print-tree.h"
>
> Are these still needed, or were they for debugging?
Just for debugging, removed.
>
> >
> >  /* Nonzero if we are folding constants inside an initializer or a C++
> > manifestly-constant-evaluated context; zero otherwise.
> > @@ -10494,15 +10498,9 @@ fold_mult_zconjz (location_t loc, tree type, tree 
> > expr)
> >  static bool
> >  vec_cst_ctor_to_array (tree arg, unsigned int nelts, tree *elts)
> >  {
> > -  unsigned HOST_WIDE_INT i, nunits;
> > +  unsigned HOST_WIDE_INT i;
> >
> > -  if (TREE_CODE (arg) == VECTOR_CST
> > -  && VECTOR_CST_NELTS (arg).is_constant (&nunits))
> > -{
> > -  for (i = 0; i < nunits; ++i)
> > - elts[i] = VECTOR_CST_ELT (arg, i);
> > -}
> > -  else if (TREE_CODE (arg) == CONSTRUCTOR)
> > +  if (TREE_CODE (arg) == CONSTRUCTOR)
> >  {
> >constructor_elt *elt;
> >
> > @@ -10520,6 +10518,192 @@ vec_cst_ctor_to_array (tree arg, unsigned int 
> > nelts, tree *elts)
> >return true;
> >  }
> >
> > +/* Helper routine for fold_vec_perm_cst to check if SEL is a suitable
> > +   mask for VLA vec_perm folding.
> > +   REASON if specified, will contain the reason why SEL is not suitable.
> > +   Used only for debugging and unit-testing.
> > +   VERBOSE if enabled is used for debugging output.  */
> > +
> > +static bool
> > +valid_mask_for_fold_vec_perm_cst_p (tree arg0, tree arg1,
> > + const vec_perm_indices &sel,
> > + const char **reason = NULL,
> > + ATTRIBUTE_UNUSED bool verbose = false)
>
> Since verbose is no longer needed (good!), I think we should just remove it.
Done.
>
> > +{
> > +  unsigned sel_npatterns = sel.encoding ().npatterns ();
> > +  unsigned sel_nelts_per_pattern = sel.encoding ().nelts_per_pattern ();
> > +
> > +  if (!(pow2p_hwi (sel_npatterns)
> > + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg0))
> > + && pow2p_hwi (VECTOR_CST_NPATTERNS (arg1
> > +{
> > +  if (reason)
> > + *reason = "npatterns is not power of 2";
> > +  return false;
> > +}
> > +
> > +  /* We want to avoid cases where sel.length is not a multiple of 
> > npatterns.
> > + For eg: sel.length = 2 + 2x, and sel npatterns = 4.  */
> > +  poly_uint64 esel;
> > +  if (!multiple_p (sel.length (), sel_npatterns, &esel))
> > +{
> > +  if (reason)
> > + *reason = "sel.length is not multiple of sel_npatterns";
> > +  return false;
> > +}
> > +
> > +  if (sel_nelts_per_pattern < 3)
> > +return true;
> > +
> > +  for (unsigned pattern = 0; pattern < sel_npatterns; pattern++)
> > +{
> > +  poly_uint64 a1 = sel[pattern + sel_npatterns];
> > +  poly_uint64 a2 = sel[pattern + 2 * sel_npatterns];
> > +  HOST_WIDE_INT S;
>
> Trailing whitespace.  The convention is to use lowercase variable
> names, so please call this "step".
Fixed, thanks.
>
> > +  if (!poly_int64 (a2 - a1).is_constant (&S))
> > + {
> > +   if (reason)
> > + *reason = "step is not constant";
> > +   return false;
> > + }
> > +  // FIXME: Punt on S < 0 for now, revisit later.
> > +  if (S < 0)
> > + return false;
> > +  if (S == 0)
> > + continue;
> > +
> > +  if (!pow2p_hwi (S))
> > + {
> > +   if (reason)
> > + *reason = "step is not power of 2";
> > +   return false;
> > + }
> > +
> > +  /* Ensure that stepped sequence of the pattern selects elements
> > +  only from the same input vector if it's VLA.  */
>
> s/ if it's VLA//
Oops sorry, that was a relic of something else I was trying :)
Fixed, thanks.
>
> > +  uint64_t q1, qe;
> > +  poly_uint64 r1, re;
> > +  poly_uint64 ae = a1 + (esel - 2) * S;
> > +  poly_uint64 arg_len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> > +
> > +  if (!(can_div_trunc_p (a1, arg_len, &q1, &r1)
> > + && can_div_trunc_p (ae, arg_len, &qe, &re)
> > + && q1 == qe))
> > + {
> > +   if (reason)
> > + *reason = "crossed input vectors";
> > +   return false;
> > + }
> > +
>
> Probably worth a comment above the following code too:
>
>   /* Ensure that the stepped sequence always selects f