Re: [PATCH V4] VECT: Support loop len control on EXTRACT_LAST vectorization

2023-08-13 Thread Richard Biener via Gcc-patches
On Fri, 11 Aug 2023, juzhe.zh...@rivai.ai wrote:

> From: Ju-Zhe Zhong 
> 
> Hi, Richard and Richi.
> 
> This patch add support live vectorization by VEC_EXTRACT for LEN loop control.

OK.

Thanks,
Richard.

> Consider this following case:
> 
> #include 
> 
> #define EXTRACT_LAST(TYPE)\
>   TYPE __attribute__ ((noinline, noclone))\
>   test_##TYPE (TYPE *x, int n, TYPE value)\
>   {   \
> TYPE last;\
> for (int j = 0; j < n; ++j)   \
>   {   \
>   last = x[j];\
>   x[j] = last * value;\
>   }   \
> return last;  \
>   }
> 
> #define TEST_ALL(T)   \
>   T (uint8_t) \
> 
> TEST_ALL (EXTRACT_LAST)
> 
> ARM SVE IR:
> 
> Preheader:
>   max_mask_34 = .WHILE_ULT (0, bnd.5_6, { 0, ... });
> 
> Loop:
>   ...
>   # loop_mask_22 = PHI 
>   ...
>   vect_last_12.8_23 = .MASK_LOAD (_7, 8B, loop_mask_22);
>   vect__4.9_27 = vect_last_12.8_23 * vect_cst__26;
>   .MASK_STORE (_7, 8B, loop_mask_22, vect__4.9_27);
>   ...
>   next_mask_35 = .WHILE_ULT (_1, bnd.5_6, { 0, ... });
>   ...
> 
> Epilogue:
>   _25 = .EXTRACT_LAST (loop_mask_22, vect_last_12.8_23);
> 
> For RVV since we prefer len in loop control, after this patch for RVV:
> 
> Loop:
>   ...
>   loop_len_22 = SELECT_VL;
>   vect_last_12.8_23 = .MASK_LOAD (_7, 8B, loop_len_22);
>   vect__4.9_27 = vect_last_12.8_23 * vect_cst__26;
>   .MASK_STORE (_7, 8B, loop_len_22, vect__4.9_27);
>   ...
> 
> Epilogue:
>   _25 = .VEC_EXTRACT (loop_len_22 + bias - 1, vect_last_12.8_23);
> 
> Details of this approach:
> 
> 1. Step 1 - Add 'vect_can_vectorize_extract_last_with_len_p'  to enable live 
> vectorization
> for LEN loop control.
>
>This function we check whether target support:
> - Use LEN as the loop control.
> - Support VEC_EXTRACT optab.
> 
> 2. Step 2 - Record LEN for loop control if 
> 'vect_can_vectorize_extract_last_with_len_p' is true.
> 
> 3. Step 3 - Gerenate VEC_EXTRACT (v, LEN + BIAS - 1).
> 
> The only difference between mask and len is that len is using length 
> generated by SELECT_VL and
> use VEC_EXTRACT pattern. The rest of the live vectorization is totally the 
> same ARM SVE.
> 
> gcc/ChangeLog:
> 
>   * tree-vect-loop.cc (vectorizable_live_operation): Add loop len control.
> 
> ---
>  gcc/tree-vect-loop.cc | 78 ++-
>  1 file changed, 62 insertions(+), 16 deletions(-)
> 
> diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
> index bf8d677b584..a011e2dacb2 100644
> --- a/gcc/tree-vect-loop.cc
> +++ b/gcc/tree-vect-loop.cc
> @@ -10278,17 +10278,7 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
>/* No transformation required.  */
>if (loop_vinfo && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
>   {
> -   if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
> -OPTIMIZE_FOR_SPEED))
> - {
> -   if (dump_enabled_p ())
> - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> -  "can't operate on partial vectors "
> -  "because the target doesn't support extract "
> -  "last reduction.\n");
> -   LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo) = false;
> - }
> -   else if (slp_node)
> +   if (slp_node)
>   {
> if (dump_enabled_p ())
>   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
> @@ -10308,9 +10298,28 @@ vectorizable_live_operation (vec_info *vinfo, 
> stmt_vec_info stmt_info,
> else
>   {
> gcc_assert (ncopies == 1 && !slp_node);
> -   vect_record_loop_mask (loop_vinfo,
> -  &LOOP_VINFO_MASKS (loop_vinfo),
> -  1, vectype, NULL);
> +   if (direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
> +   OPTIMIZE_FOR_SPEED))
> + vect_record_loop_mask (loop_vinfo,
> +&LOOP_VINFO_MASKS (loop_vinfo),
> +1, vectype, NULL);
> +   else if (convert_optab_handler (vec_extract_optab,
> +   TYPE_MODE (vectype),
> +   TYPE_MODE (TREE_TYPE (vectype)))
> +!= CODE_FOR_nothing)
> + vect_record_loop_len (loop_vinfo,
> +   &LOOP_VINFO_LENS (loop_vinfo),
> +   1, vectype, 1);
> +   else
> + {
> +  

[PATCH] VECT: Apply MASK_LEN_{LOAD_LANES, STORE_LANES} into vectorizer

2023-08-13 Thread juzhe . zhong
From: Ju-Zhe Zhong 

Hi, Richard and Richi.

This patch is adding MASK_LEN_{LOAD_LANES,STORE_LANES} support into vectorizer.

Consider this simple case:

void __attribute__ ((noinline, noclone))
foo (int *__restrict a, int *__restrict b, int *__restrict c,
  int *__restrict d, int *__restrict e, int *__restrict f,
  int *__restrict g, int *__restrict h, int *__restrict j, int n)
{
  for (int i = 0; i < n; ++i)
{
  a[i] = j[i * 8];
  b[i] = j[i * 8 + 1];
  c[i] = j[i * 8 + 2];
  d[i] = j[i * 8 + 3];
  e[i] = j[i * 8 + 4];
  f[i] = j[i * 8 + 5];
  g[i] = j[i * 8 + 6];
  h[i] = j[i * 8 + 7];
}
}

RVV Gimple IR:

  _79 = .SELECT_VL (ivtmp_81, POLY_INT_CST [4, 4]);
  ivtmp_125 = _79 * 32;
  vect_array.8 = .MASK_LEN_LOAD_LANES (vectp_j.6_124, 32B, { -1, ... }, _79, 0);
  vect__8.9_122 = vect_array.8[0];
  vect__8.10_121 = vect_array.8[1];
  vect__8.11_120 = vect_array.8[2];
  vect__8.12_119 = vect_array.8[3];
  vect__8.13_118 = vect_array.8[4];
  vect__8.14_117 = vect_array.8[5];
  vect__8.15_116 = vect_array.8[6];
  vect__8.16_115 = vect_array.8[7];
  vect_array.8 ={v} {CLOBBER};
  ivtmp_114 = _79 * 4;
  .MASK_LEN_STORE (vectp_a.17_113, 32B, { -1, ... }, _79, 0, vect__8.9_122);
  .MASK_LEN_STORE (vectp_b.19_109, 32B, { -1, ... }, _79, 0, vect__8.10_121);
  .MASK_LEN_STORE (vectp_c.21_105, 32B, { -1, ... }, _79, 0, vect__8.11_120);
  .MASK_LEN_STORE (vectp_d.23_101, 32B, { -1, ... }, _79, 0, vect__8.12_119);
  .MASK_LEN_STORE (vectp_e.25_97, 32B, { -1, ... }, _79, 0, vect__8.13_118);
  .MASK_LEN_STORE (vectp_f.27_93, 32B, { -1, ... }, _79, 0, vect__8.14_117);
  .MASK_LEN_STORE (vectp_g.29_89, 32B, { -1, ... }, _79, 0, vect__8.15_116);
  .MASK_LEN_STORE (vectp_h.31_85, 32B, { -1, ... }, _79, 0, vect__8.16_115);

ASM:

foo:
lw  t4,8(sp)
ld  t5,0(sp)
ble t4,zero,.L5
.L3:
vsetvli t1,t4,e8,mf4,ta,ma
vlseg8e32.v v8,(t5)
sllit3,t1,2
sllit6,t1,5
vse32.v v8,0(a0)
vse32.v v9,0(a1)
vse32.v v10,0(a2)
vse32.v v11,0(a3)
vse32.v v12,0(a4)
vse32.v v13,0(a5)
vse32.v v14,0(a6)
vse32.v v15,0(a7)
sub t4,t4,t1
add t5,t5,t6
add a0,a0,t3
add a1,a1,t3
add a2,a2,t3
add a3,a3,t3
add a4,a4,t3
add a5,a5,t3
add a6,a6,t3
add a7,a7,t3
bne t4,zero,.L3
.L5:
ret

The details of the approach:

Step 1 - Modifiy the LANES LOAD/STORE support function 
(vect_load_lanes_supported/vect_store_lanes_supported):

+/* Return FN if vec_{masked_,mask_len,}load_lanes is available for COUNT
+   vectors of type VECTYPE.  MASKED_P says whether the masked form is needed. 
*/
 
-bool
+internal_fn
 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
   bool masked_p)
 {
-  if (masked_p)
-return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
-vec_mask_load_lanes_optab,
-vectype, count);
+  if (vect_lanes_optab_supported_p ("vec_mask_len_load_lanes",
+   vec_mask_len_load_lanes_optab,
+   vectype, count))
+return IFN_MASK_LEN_LOAD_LANES;
+  else if (masked_p)
+{
+  if (vect_lanes_optab_supported_p ("vec_mask_load_lanes",
+   vec_mask_load_lanes_optab,
+   vectype, count))
+   return IFN_MASK_LOAD_LANES;
+}
   else
-return vect_lanes_optab_supported_p ("vec_load_lanes",
-vec_load_lanes_optab,
-vectype, count);
+{
+  if (vect_lanes_optab_supported_p ("vec_load_lanes",
+   vec_load_lanes_optab,
+   vectype, count))
+   return IFN_LOAD_LANES;
+}
+  return IFN_LAST;
 }
 
Instead of returning TRUE or FALSE whether target support the LANES LOAD/STORE.
I change it into return internal_fn of the LANES LOAD/STORE that target support,
If target didn't support any LANE LOAD/STORE optabs, return IFN_LAST.

Step 2 - Build MASK_LEN_{LANES_LOAD,LANES_STORE} Gimple IR:

+ if (vect_store_lanes_supported (vectype, group_size, false)
+ == IFN_MASK_LEN_STORE_LANES)
+   {
+ if (loop_lens)
+   final_len = vect_get_loop_len (loop_vinfo, gsi, loop_lens,
+  ncopies, vectype, j, 1);
+ else
+   final_len = size_int (TYPE_VECTOR_SUBPARTS (vectype));
+ signed char biasval
+   = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo);
+ bias = build_int_cst (intQI_type_node, biasval);
+ if (!final_mask)
+   {
+ mask_vectype = t

Re: [PATCH v1] RISC-V: Support RVV VFWNMSAC rounding mode intrinsic API

2023-08-13 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-14 14:07
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFWNMSAC rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFWNMSAC as the below samples.
 
* __riscv_vfwnmsac_vv_f64m2_rm
* __riscv_vfwnmsac_vv_f64m2_rm_m
* __riscv_vfwnmsac_vf_f64m2_rm
* __riscv_vfwnmsac_vf_f64m2_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfwnmsac_frm): New class for frm.
(vfwnmsac_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwnmsac_frm): New intrinsic function definition.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-wnmsac.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 +
.../riscv/rvv/base/float-point-wnmsac.c   | 47 +++
4 files changed, 75 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 5a5da903cb2..b458560a040 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -608,6 +608,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfwnmsac
+*/
+class vfwnmsac_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul_neg_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul_neg (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2390,6 +2413,7 @@ static CONSTEXPR const vfwnmacc_frm vfwnmacc_frm_obj;
static CONSTEXPR const vfwmsac vfwmsac_obj;
static CONSTEXPR const vfwmsac_frm vfwmsac_frm_obj;
static CONSTEXPR const vfwnmsac vfwnmsac_obj;
+static CONSTEXPR const vfwnmsac_frm vfwnmsac_frm_obj;
static CONSTEXPR const unop vfsqrt_obj;
static CONSTEXPR const float_misc vfrsqrt7_obj;
static CONSTEXPR const float_misc vfrec7_obj;
@@ -2636,6 +2660,7 @@ BASE (vfwnmacc_frm)
BASE (vfwmsac)
BASE (vfwmsac_frm)
BASE (vfwnmsac)
+BASE (vfwnmsac_frm)
BASE (vfsqrt)
BASE (vfrsqrt7)
BASE (vfrec7)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 09356dd7ac8..85e8b9a3769 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -182,6 +182,7 @@ extern const function_base *const vfwnmacc_frm;
extern const function_base *const vfwmsac;
extern const function_base *const vfwmsac_frm;
extern const function_base *const vfwnmsac;
+extern const function_base *const vfwnmsac_frm;
extern const function_base *const vfsqrt;
extern const function_base *const vfrsqrt7;
extern const function_base *const vfrec7;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index e2a79607d04..7e2a4ab2969 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -382,6 +382,8 @@ DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, 
f_wwvv_ops)
DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops)
DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops)
DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwfv_ops)
// 13.8. Vector Floating-Point Square-Root Instruction
DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c
new file mode 100644
index 000..13eb306313c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat64m2_t
+test_vfwnmsac_vv_f32m1_rm (vfloat64m2_t vd, vfloat32m1_t op1, vfloat32m1_t op2,
+size_t vl) {
+  return __riscv_vfwnmsac_vv_f64m2_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat64m2_t
+test_vfwnmsac_vv_f32m1_rm_m (vbool32_t mask, vfloat64m2_t vd, vfloat32m1_t op1,
+  vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfwnmsac_vv_f64

Re: [PATCH v1] RISC-V: Support RVV VFWMSAC rounding mode intrinsic API

2023-08-13 Thread juzhe.zh...@rivai.ai

LGTM


juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-14 11:29
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFWMSAC rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFWMSAC as the below samples.
 
* __riscv_vfwmsac_vv_f64m2_rm
* __riscv_vfwmsac_vv_f64m2_rm_m
* __riscv_vfwmsac_vf_f64m2_rm
* __riscv_vfwmsac_vf_f64m2_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfwmsac_frm): New class for frm.
(vfwmsac_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwmsac_frm): New intrinsic function definition.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-wmsac.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 +
.../riscv/rvv/base/float-point-wmsac.c| 47 +++
4 files changed, 75 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 4a7f2b8e3e9..5a5da903cb2 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -585,6 +585,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfwmsac
+*/
+class vfwmsac_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul_scalar (MINUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul (MINUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2365,6 +2388,7 @@ static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
static CONSTEXPR const vfwnmacc vfwnmacc_obj;
static CONSTEXPR const vfwnmacc_frm vfwnmacc_frm_obj;
static CONSTEXPR const vfwmsac vfwmsac_obj;
+static CONSTEXPR const vfwmsac_frm vfwmsac_frm_obj;
static CONSTEXPR const vfwnmsac vfwnmsac_obj;
static CONSTEXPR const unop vfsqrt_obj;
static CONSTEXPR const float_misc vfrsqrt7_obj;
@@ -2610,6 +2634,7 @@ BASE (vfwmacc_frm)
BASE (vfwnmacc)
BASE (vfwnmacc_frm)
BASE (vfwmsac)
+BASE (vfwmsac_frm)
BASE (vfwnmsac)
BASE (vfsqrt)
BASE (vfrsqrt7)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 27c7deb4ec2..09356dd7ac8 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -180,6 +180,7 @@ extern const function_base *const vfwmacc_frm;
extern const function_base *const vfwnmacc;
extern const function_base *const vfwnmacc_frm;
extern const function_base *const vfwmsac;
+extern const function_base *const vfwmsac_frm;
extern const function_base *const vfwnmsac;
extern const function_base *const vfsqrt;
extern const function_base *const vfrsqrt7;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 481c3b899f2..e2a79607d04 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -380,6 +380,8 @@ DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, 
f_wwvv_ops)
DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwvv_ops)
DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops)
// 13.8. Vector Floating-Point Square-Root Instruction
DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c
new file mode 100644
index 000..886a0b13695
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat64m2_t
+test_vfwmsac_vv_f32m1_rm (vfloat64m2_t vd, vfloat32m1_t op1, vfloat32m1_t op2,
+   size_t vl) {
+  return __riscv_vfwmsac_vv_f64m2_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat64m2_t
+test_vfwmsac_vv_f32m1_rm_m (vbool32_t mask, vfloat64m2_t vd, vfloat32m1_t op1,
+ vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfwmsac_vv_f64m2_rm_m (mask, vd, op1, op2, 1

Re: [PATCH] rtl-optimization/110939 Really fix narrow comparison of memory and constant

2023-08-13 Thread Stefan Schulze Frielinghaus via Gcc-patches
On Sat, Aug 12, 2023 at 09:04:19AM +0800, Xi Ruoyao wrote:
> On Thu, 2023-08-10 at 15:04 +0200, Stefan Schulze Frielinghaus via Gcc-
> patches wrote:
> > In the former fix in commit 41ef5a34161356817807be3a2e51fbdbe575ae85 I
> > completely missed the fact that the normal form of a generated constant for 
> > a
> > mode with fewer bits than in HOST_WIDE_INT is a sign extended version of the
> > actual constant.  This even holds true for unsigned constants.
> > 
> > Fixed by masking out the upper bits for the incoming constant and sign
> > extending the resulting unsigned constant.
> > 
> > Bootstrapped and regtested on x64 and s390x.  Ok for mainline?
> 
> The patch fails to apply:
> 
> patching file gcc/combine.cc
> Hunk #1 FAILED at 11923.
> Hunk #2 FAILED at 11962.
> 
> It looks like some indents are tabs in the source file, but white spaces
> in the patch.

The patch itself applies cleanly.  This is due to my inlined diff in
order to raise some discussion, i.e., just remove the following from
the email and the patch applies:

> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index 468b7fde911..80c4ff0fbaf 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -11923,7 +11923,7 @@ simplify_compare_const (enum rtx_code code, 
> > machine_mode mode,
> >    /* (unsigned) < 0x8000 is equivalent to >= 0.  */
> >    else if (is_a  (mode, &int_mode)
> >    && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -  && ((unsigned HOST_WIDE_INT) const_op
> > +  && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > (int_mode))
> >    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 
> > 1)))
> >     {
> >   const_op = 0;
> > @@ -11962,7 +11962,7 @@ simplify_compare_const (enum rtx_code code, 
> > machine_mode mode,
> >    /* (unsigned) >= 0x8000 is equivalent to < 0.  */
> >    else if (is_a  (mode, &int_mode)
> >    && GET_MODE_PRECISION (int_mode) - 1 < HOST_BITS_PER_WIDE_INT
> > -  && ((unsigned HOST_WIDE_INT) const_op
> > +  && (((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK 
> > (int_mode))
> >    == HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 
> > 1)))
> >     {
> >   const_op = 0;

Looks like git am/apply is confused by that.

Cheers,
Stefan

> > 
> > For example, while bootstrapping on x64 the optimization is missed since
> > a LTU comparison in QImode is done and the constant equals
> > 0xff80.
> > 
> > Sorry for inlining another patch, but I would really like to make sure
> > that my understanding is correct, now, before I come up with another
> > patch.  Thus it would be great if someone could shed some light on this.
> > 
> > gcc/ChangeLog:
> > 
> > * combine.cc (simplify_compare_const): Properly handle unsigned
> > constants while narrowing comparison of memory and constants.
> > ---
> >  gcc/combine.cc | 19 ++-
> >  1 file changed, 10 insertions(+), 9 deletions(-)
> > 
> > diff --git a/gcc/combine.cc b/gcc/combine.cc
> > index e46d202d0a7..468b7fde911 100644
> > --- a/gcc/combine.cc
> > +++ b/gcc/combine.cc
> > @@ -12003,14 +12003,15 @@ simplify_compare_const (enum rtx_code code, 
> > machine_mode mode,
> >    && !MEM_VOLATILE_P (op0)
> >    /* The optimization makes only sense for constants which are big 
> > enough
> >  so that we have a chance to chop off something at all.  */
> > -  && (unsigned HOST_WIDE_INT) const_op > 0xff
> > -  /* Bail out, if the constant does not fit into INT_MODE.  */
> > -  && (unsigned HOST_WIDE_INT) const_op
> > -    < ((HOST_WIDE_INT_1U << (GET_MODE_PRECISION (int_mode) - 1) << 1) 
> > - 1)
> > +  && ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode)) > 
> > 0xff
> >    /* Ensure that we do not overflow during normalization.  */
> > -  && (code != GTU || (unsigned HOST_WIDE_INT) const_op < 
> > HOST_WIDE_INT_M1U))
> > +  && (code != GTU
> > + || ((unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode))
> > +    < HOST_WIDE_INT_M1U)
> > +  && trunc_int_for_mode (const_op, int_mode) == const_op)
> >  {
> > -  unsigned HOST_WIDE_INT n = (unsigned HOST_WIDE_INT) const_op;
> > +  unsigned HOST_WIDE_INT n
> > +   = (unsigned HOST_WIDE_INT) const_op & GET_MODE_MASK (int_mode);
> >    enum rtx_code adjusted_code;
> >  
> >    /* Normalize code to either LEU or GEU.  */
> > @@ -12051,15 +12052,15 @@ simplify_compare_const (enum rtx_code code, 
> > machine_mode mode,
> > HOST_WIDE_INT_PRINT_HEX ") to (MEM %s "
> > HOST_WIDE_INT_PRINT_HEX ").\n", GET_MODE_NAME (int_mode),
> > GET_MODE_NAME (narrow_mode_iter), GET_RTX_NAME (code),
> > -   (unsigned HOST_WIDE_INT)const_op, GET_RTX_NAME 
> > (adjusted_code),
> > -   n);
> > +   (unsigned HOST_WIDE_INT) co

Re: [PATCH 0/2] RISC-V: Constant FP Optimization with 'Zfa'

2023-08-13 Thread Tsukasa OI via Gcc-patches
Oh my, I forgot to change the subject of PATCH 0/2.
That should have been "RISC-V: Constant FP Optimization with 'Zfa'", the
same subject as PATCH 2/2.

Sorry for confusion!

On 2023/08/14 14:32, Tsukasa OI wrote:
> Hello,
> 
> and... I think this might be my first *large* patch set for GCC
> contribution and definitely the first one to touch the machine description.
> 
> So, please review it carefully.
> 
> 
> Background
> ===
> 
> This patch set adds an optimization to FP constant initialization using a
> FLI instruction, which is a part of the 'Zfa' extension which provides
> additional floating-point instructions.
> 
> FLI instructions ("fli.h" for binary16, "fli.s" for binary32, "fli.d" for
> binary64 and "fli.q" for binary128 [which can be ignored because current
> GCC for RISC-V does not natively support binary128]) provide an
> load-immediate operation for following 32 immediates.
> 
> | Binary Encoding | Immediate (and its part of binary representation) |
> | --- | --|
> |`0` ( 0) | -1.0  (-0b1.00 * 2^(+ 0)) |
> |`1` ( 1) | Minimum positive normal value |
> | | sign=[0] exponent=[0..01] significand=[000..000]  |
> |`00010` ( 2) | 1.00*2^(-16)  (+0b1.00 * 2^(-16)) |
> |`00011` ( 3) | 1.00*2^(-15)  (+0b1.00 * 2^(-15)) |
> |`00100` ( 4) | 1.00*2^(- 8)  (+0b1.00 * 2^(- 8)) |
> |`00101` ( 5) | 1.00*2^(- 7)  (+0b1.00 * 2^(- 7)) |
> |`00110` ( 6) | 1.00*2^(- 4)  (+0b1.00 * 2^(- 4)) = 0.0625|
> |`00111` ( 7) | 1.00*2^(- 3)  (+0b1.00 * 2^(- 3)) = 0.125 |
> |`01000` ( 8) | 1.00*2^(- 2)  (+0b1.00 * 2^(- 2)) : 0.25  |
> |`01001` ( 9) | 1.25*2^(- 2)  (+0b1.01 * 2^(- 2)) : 0.3125|
> |`01010` (10) | 1.50*2^(- 2)  (+0b1.10 * 2^(- 2)) : 0.375 |
> |`01011` (11) | 1.75*2^(- 2)  (+0b1.11 * 2^(- 2)) : 0.4375|
> |`01100` (12) | 1.00*2^(- 1)  (+0b1.00 * 2^(- 1)) : 0.5   |
> |`01101` (13) | 1.25*2^(- 1)  (+0b1.01 * 2^(- 1)) : 0.625 |
> |`01110` (14) | 1.50*2^(- 1)  (+0b1.10 * 2^(- 1)) : 0.75  |
> |`0` (15) | 1.75*2^(- 1)  (+0b1.11 * 2^(- 1)) : 0.875 |
> |`1` (16) | 1.00*2^(+ 0)  (+0b1.00 * 2^(+ 0)) : 1.0   |
> |`10001` (17) | 1.25*2^(+ 0)  (+0b1.01 * 2^(+ 0)) : 1.25  |
> |`10010` (18) | 1.50*2^(+ 0)  (+0b1.10 * 2^(+ 0)) : 1.5   |
> |`10011` (19) | 1.75*2^(+ 0)  (+0b1.11 * 2^(+ 0)) : 1.75  |
> |`10100` (20) | 1.00*2^(+ 1)  (+0b1.00 * 2^(+ 1)) : 2.0   |
> |`10101` (21) | 1.25*2^(+ 1)  (+0b1.01 * 2^(+ 1)) : 2.5   |
> |`10110` (22) | 1.50*2^(+ 1)  (+0b1.10 * 2^(+ 1)) : 3.0   |
> |`10111` (23) | 1.00*2^(+ 2)  (+0b1.00 * 2^(+ 2)) = 4 |
> |`11000` (24) | 1.00*2^(+ 3)  (+0b1.00 * 2^(+ 3)) = 8 |
> |`11001` (25) | 1.00*2^(+ 4)  (+0b1.00 * 2^(+ 4)) = 16|
> |`11010` (26) | 1.00*2^(+ 7)  (+0b1.00 * 2^(+ 7)) = 128   |
> |`11011` (27) | 1.00*2^(+ 8)  (+0b1.00 * 2^(+ 8)) = 256   |
> |`11100` (28) | 1.00*2^(+15)  (+0b1.00 * 2^(+15)) = 32768 |
> |`11101` (29) | 1.00*2^(+16)  (+0b1.00 * 2^(+16)) = 65536 |
> | | On "fli.h", this is equivalent to positive inf.   |
> |`0` (30) | Positive infinity |
> | | sign=[0] exponent=[1..11] significand=[000..000]  |
> |`1` (31) | Canonical NaN (positive, quiet and zero payload)  |
> | | sign=[0] exponent=[1..11] significand=[100..000]  |
> 
> Currently, initializing a FP constant (except zero) involves memory and its
> use can be reduced by FLI instructions.
> 
> We may have a room to generate much complex constants with multiple FLI
> instructions (e.g. like long integer constants) but for starter, we can
> begin with optimizing one FP constant initialization with one FLI
> instruction (and because FP arithmetic often requires larger latency,
> benefits of making multiple FLI sequence is not high compared to integers).
> 
> 
> FLI FP constant checking
> =
> 
> An instruction with a similar role to RISC-V's FLI instructions is the Arm/
> AArch64's vmov.f32 instruction. It provides a load-immediate operation for
> constant that can be represented in the following form:
> 
>> (-1)^s * 0b1. * 2^r   (where -3 <= r <= +4; fits in 3-bits)
> 
> This patch is largely influenced by AArch64's handling but
> compared to this, handling RISC-V's FLI FP constant can be a little tricky.
> 
> *   FLI normally generates only values with sign bit 0 except the binary
> encoding 0 (which loads -1.0 with sign bit 1).
> *   Not only finite values, FLI can generate positive infinity and
> canonical NaN.
> *   Because FLI can generate canonical NaN, handling NaN is preferred 

Re: [PATCH v2 0/3] RISC-V: Add stub support for existing extensions

2023-08-13 Thread Tsukasa OI via Gcc-patches
Hi,

I'll explain a little more background.

A major case which this patch set will be required is when a developer
use inline assembly for instructions not yet natively supported by GCC
or will not be supported as intrinsics.

On such cases, GCC should at least accept corresponding extension via
-march (although there's a workaround using ".option" directives in the
inline assembly but that's tedious).

Thanks,
Tsukasa


On 2023/08/14 15:09, Tsukasa OI wrote:
> PATCH v1:
> 
> 
> 
> Changes: v1 -> v2 (only in PATCH 3/3)
> ==
> 
> Removed: 'Zvkn' -> 'Zvknha' implication (not to cause test failure)
> Added:   'Zfa' -> 'F' implication (just I forgot to add in PATCH v1)
> 
> 
> Thanks,
> Tsukasa
> 
> 
> 
> 
> Tsukasa OI (3):
>   RISC-V: Add stub support for existing extensions (privileged)
>   RISC-V: Add stub support for existing extensions (vendor)
>   RISC-V: Add stub support for existing extensions (unprivileged)
> 
>  gcc/common/config/riscv/riscv-common.cc| 44 ++
>  gcc/testsuite/gcc.target/riscv/predef-29.c | 35 +
>  gcc/testsuite/gcc.target/riscv/predef-30.c | 27 +
>  gcc/testsuite/gcc.target/riscv/predef-31.c | 31 +++
>  4 files changed, 137 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/predef-29.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/predef-30.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/predef-31.c
> 
> 
> base-commit: 614052dd4ea083e086712809c754ffebd9361316


Re: [PATCH 1/2] light expander sra v0

2023-08-13 Thread Jiufu Guo via Gcc-patches


Hi,

Jiufu Guo  writes:

> Hi,
>
> There are a few PRs about the issues on the struct parameters and
> returns, like PRs 69143/65421/108073.
>
> we could consider introducing a light SRA in the expander to
> handle those parameters and returns in aggregate type, if they
> are passed through registers.  For access to the fields of
> the parameters or returns, the corresponding scalar registers
> can be used.
>
> As discussed:
> https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619884.html
>
> This is an initial patch for the light-expander-sra.

In this patch, there are a few places that can be enhanced. e.g.
- support the reverse storage accessing.
- support accessing fields on the part of the registers.
- support mixed vector/TI modes.
- support accessing on-call stmt and asm stmt.
- ...
An enhancement, I'm investigating to do first: when querying the
scalarized rtx value for a tree expression, a TREE->RTX map is
used.  It may be better to bind the scalar rtx value to the
tree-type expression directly (like DECL_RTL/INCOMING_RTL).
Then 'get_scalarized_rtx' can be simpler.
But I did not figure out a suitable field of TREE for this.

Thanks for any suggestions!

BR,
Jeff (Jiufu Guo)

>
> Bootstrapped and regtested on x86_64-redhat-linux, and
> powerpc64{,le}-linux-gnu.
>
> Is it ok for trunk?
>
>
> BR,
> Jeff (Jiufu Guo)
>
>
>   PR target/65421
>   PR target/69143
>
> gcc/ChangeLog:
>
>   * cfgexpand.cc (expand_shift): Extern declare.
>   (struct access): New class.
>   (struct expand_sra): New class.
>   (expand_sra::build_access): New member function.
>   (expand_sra::visit_base): Likewise.
>   (expand_sra::analyze_default_stmt): Likewise.
>   (expand_sra::analyze_assign): Likewise.
>   (expand_sra::add_sra_candidate): Likewise.
>   (expand_sra::collect_sra_candidates): Likewise.
>   (expand_sra::valid_scalariable_accesses): Likewise.
>   (expand_sra::prepare_expander_sra): Likewise.
>   (expand_sra::expand_sra): Class constructor.
>   (expand_sra::~expand_sra): Class destructor.
>   (expand_sra::get_scalarized_rtx): New member function.
>   (extract_one_reg): New function.
>   (extract_sub_reg): New function.
>   (expand_sra::scalarize_access): New member function.
>   (expand_sra::scalarize_accesses): New member function.
>   (get_scalar_rtx_for_aggregate_expr): New function.
>   (set_scalar_rtx_for_aggregate_access): New function.
>   (set_scalar_rtx_for_returns): New function.
>   (expand_return): Call get_scalar_rtx_for_aggregate_expr.
>   (expand_debug_expr): Call get_scalar_rtx_for_aggregate_expr.
>   (pass_expand::execute): Update to use the expand_sra.
>   * expr.cc (get_scalar_rtx_for_aggregate_expr): Extern declare.
>   (expand_assignment): Call get_scalar_rtx_for_aggregate_expr.
>   (expand_expr_real): Call get_scalar_rtx_for_aggregate_expr.
>   * function.cc (set_scalar_rtx_for_aggregate_access):  Extern declare.
>   (set_scalar_rtx_for_returns): Extern declare.
>   (assign_parm_setup_block): Call set_scalar_rtx_for_aggregate_access.
>   (assign_parms): Call set_scalar_rtx_for_aggregate_access. 
>   (expand_function_start): Call set_scalar_rtx_for_returns.
>   * tree-sra.h (struct base_access): New class.
>   (struct default_analyzer): New class.
>   (scan_function): New function template.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/powerpc/pr102024.C: Updated.
>   * gcc.target/powerpc/pr108073.c: New test.
>   * gcc.target/powerpc/pr65421-1.c: New test.
>   * gcc.target/powerpc/pr65421-2.c: New test.
>
> ---
>  gcc/cfgexpand.cc | 478 ++-
>  gcc/expr.cc  |  15 +-
>  gcc/function.cc  |  28 +-
>  gcc/tree-sra.h   |  80 +++-
>  gcc/testsuite/g++.target/powerpc/pr102024.C  |   2 +-
>  gcc/testsuite/gcc.target/powerpc/pr108073.c  |  29 ++
>  gcc/testsuite/gcc.target/powerpc/pr65421-1.c |   6 +
>  gcc/testsuite/gcc.target/powerpc/pr65421-2.c |  32 ++
>  8 files changed, 660 insertions(+), 10 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-2.c
>
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index 
> edf292cfbe95ac2711faee7769e839cb4edb0dd3..21a09ebac96bbcddc67da73c42f470c6d5f60e6c
>  100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "output.h"
>  #include "builtins.h"
>  #include "opts.h"
> +#include "tree-sra.h"
>  
>  /* Some systems use __main in a way incompatible with its use in gcc, in 
> these
> cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN 
> to
> @@ -97,6 +98,472 @@ static bool defer_stack_allocation (tree, bo

[PATCH v2 2/3] RISC-V: Add stub support for existing extensions (vendor)

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

After commit c283c4774d1c ("RISC-V: Throw compilation error for unknown
extensions") changed how do we handle unknown extensions, we have no
guarantee that we can share the same architectural string with Binutils
(specifically, the assembler).

To avoid compilation errors on shared Assembler-C/C++ projects, GCC should
support almost all extensions that Binutils support, even if the GCC does
not touch a thing.

This commit adds stub supported vendor extensions to
riscv_ext_version_table (no riscv_implied_info entries to add; all
information is copied from Binutils' bfd/elfxx-riscv.c).

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc (riscv_ext_version_table):
Add stub support for all vendor extensions supported by Binutils.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-30.c: New test for a stub
vendor extension 'XVentanaCondOps'.
---
 gcc/common/config/riscv/riscv-common.cc|  2 ++
 gcc/testsuite/gcc.target/riscv/predef-30.c | 27 ++
 2 files changed, 29 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-30.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 91b0316acfea..0c351105e015 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -298,6 +298,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"xtheadmempair", ISA_SPEC_CLASS_NONE, 1, 0},
   {"xtheadsync", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"xventanacondops", ISA_SPEC_CLASS_NONE, 1, 0},
+
   /* Terminate the list.  */
   {NULL, ISA_SPEC_CLASS_NONE, 0, 0}
 };
diff --git a/gcc/testsuite/gcc.target/riscv/predef-30.c 
b/gcc/testsuite/gcc.target/riscv/predef-30.c
new file mode 100644
index ..9784b9ce5033
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-30.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i_xventanacondops -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_xventanacondops)
+#error "__riscv_xventanacondops"
+#endif
+
+  return 0;
+}
-- 
2.41.0



[PATCH v2 1/3] RISC-V: Add stub support for existing extensions (privileged)

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

After commit c283c4774d1c ("RISC-V: Throw compilation error for unknown
extensions") changed how do we handle unknown extensions, we have no
guarantee that we can share the same architectural string with Binutils
(specifically, the assembler).

To avoid compilation errors on shared Assembler-C/C++ projects, GCC should
support almost all extensions that Binutils support, even if the GCC does
not touch a thing.

As a start, this commit adds stub supported *privileged* extensions to
riscv_ext_version_table and its implications to riscv_implied_info
(all information is copied from Binutils' bfd/elfxx-riscv.c).

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_implied_info): Add implications from privileged extensions.
(riscv_ext_version_table): Add stub support for all privileged
extensions supported by Binutils.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-29.c: New test for a stub privileged
extension 'Smstateen' with some implications.
---
 gcc/common/config/riscv/riscv-common.cc| 18 +++
 gcc/testsuite/gcc.target/riscv/predef-29.c | 35 ++
 2 files changed, 53 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-29.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 21f83f26371f..91b0316acfea 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -130,6 +130,14 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"zhinx", "zhinxmin"},
   {"zhinxmin", "zfinx"},
 
+  {"smaia", "ssaia"},
+  {"smstateen", "ssstateen"},
+  {"smepmp", "zicsr"},
+  {"ssaia", "zicsr"},
+  {"sscofpmf", "zicsr"},
+  {"ssstateen", "zicsr"},
+  {"sstc", "zicsr"},
+
   {NULL, NULL}
 };
 
@@ -264,8 +272,18 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
 
   {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"smaia", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"smepmp",ISA_SPEC_CLASS_NONE, 1, 0},
+  {"smstateen", ISA_SPEC_CLASS_NONE, 1, 0},
+
+  {"ssaia", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"sscofpmf",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"ssstateen", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"sstc",  ISA_SPEC_CLASS_NONE, 1, 0},
+
   {"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
   {"svnapot", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"svpbmt",  ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"xtheadba", ISA_SPEC_CLASS_NONE, 1, 0},
   {"xtheadbb", ISA_SPEC_CLASS_NONE, 1, 0},
diff --git a/gcc/testsuite/gcc.target/riscv/predef-29.c 
b/gcc/testsuite/gcc.target/riscv/predef-29.c
new file mode 100644
index ..61c6429be558
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-29.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i_smstateen -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_zicsr)
+#error "__riscv_zicsr"
+#endif
+
+#if !defined(__riscv_smstateen)
+#error "__riscv_smstateen"
+#endif
+
+#if !defined(__riscv_ssstateen)
+#error "__riscv_ssstateen"
+#endif
+
+  return 0;
+}
-- 
2.41.0



[PATCH v2 3/3] RISC-V: Add stub support for existing extensions (unprivileged)

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

After commit c283c4774d1c ("RISC-V: Throw compilation error for unknown
extensions") changed how do we handle unknown extensions, we have no
guarantee that we can share the same architectural string with Binutils
(specifically, the assembler).

To avoid compilation errors on shared Assembler-C/C++ projects, GCC should
support almost all extensions that Binutils support, even if the GCC does
not touch a thing.

This commit adds stub supported standard unprivileged extensions to
riscv_ext_version_table and its implications to riscv_implied_info
(all information is copied from Binutils' bfd/elfxx-riscv.c except not yet
merged 'Zce', 'Zcmp' and 'Zcmt' support).

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_implied_info): Add implications from unprivileged extensions.
(riscv_ext_version_table): Add stub support for all unprivileged
extensions supported by Binutils as well as 'Zce', 'Zcmp', 'Zcmt'.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/predef-31.c: New test for a stub unprivileged
extension 'Zcb' with some implications.
---
 gcc/common/config/riscv/riscv-common.cc| 24 +
 gcc/testsuite/gcc.target/riscv/predef-31.c | 31 ++
 2 files changed, 55 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-31.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 0c351105e015..27e9072899bf 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -121,6 +121,20 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"zvksg", "zvks"},
   {"zvksg", "zvkg"},
 
+  {"zcb", "zca"},
+  {"zcd", "zca"},
+  {"zcd", "d"},
+  {"zcf", "zca"},
+  {"zcf", "f"},
+  {"zce", "zca"},
+  {"zce", "zcb"},
+  {"zce", "zcmp"},
+  {"zce", "zcmt"},
+  {"zcmp", "zca"},
+  {"zcmt", "zca"},
+  {"zcmt", "zcicsr"},
+
+  {"zfa", "f"},
   {"zfh", "zfhmin"},
   {"zfhmin", "f"},
   {"zvfhmin", "zve32f"},
@@ -197,6 +211,14 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
 
   {"zawrs", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zca",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zcb",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zce",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zcmp", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zcmt", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zcd",  ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zcf",  ISA_SPEC_CLASS_NONE, 1, 0},
+
   {"zba", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbb", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zbc", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -219,6 +241,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zkt",   ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"zihintntl", ISA_SPEC_CLASS_NONE, 1, 0},
+  {"zihintpause", ISA_SPEC_CLASS_NONE, 2, 0},
 
   {"zicboz",ISA_SPEC_CLASS_NONE, 1, 0},
   {"zicbom",ISA_SPEC_CLASS_NONE, 1, 0},
@@ -265,6 +288,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zvl32768b", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvl65536b", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfa",   ISA_SPEC_CLASS_NONE, 0, 1},
   {"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfhmin",   ISA_SPEC_CLASS_NONE, 1, 0},
diff --git a/gcc/testsuite/gcc.target/riscv/predef-31.c 
b/gcc/testsuite/gcc.target/riscv/predef-31.c
new file mode 100644
index ..4ea11442f995
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/predef-31.c
@@ -0,0 +1,31 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64i_zcb -mabi=lp64 -mcmodel=medlow 
-misa-spec=20191213" } */
+
+int main () {
+
+#ifndef __riscv_arch_test
+#error "__riscv_arch_test"
+#endif
+
+#if __riscv_xlen != 64
+#error "__riscv_xlen"
+#endif
+
+#if !defined(__riscv_i) || (__riscv_i != (2 * 1000 * 1000 + 1 * 1000))
+#error "__riscv_i"
+#endif
+
+#if defined(__riscv_e)
+#error "__riscv_e"
+#endif
+
+#if !defined(__riscv_zca)
+#error "__riscv_zca"
+#endif
+
+#if !defined(__riscv_zcb)
+#error "__riscv_zcb"
+#endif
+
+  return 0;
+}
-- 
2.41.0



[PATCH v2 0/3] RISC-V: Add stub support for existing extensions

2023-08-13 Thread Tsukasa OI via Gcc-patches
PATCH v1:



Changes: v1 -> v2 (only in PATCH 3/3)
==

Removed: 'Zvkn' -> 'Zvknha' implication (not to cause test failure)
Added:   'Zfa' -> 'F' implication (just I forgot to add in PATCH v1)


Thanks,
Tsukasa




Tsukasa OI (3):
  RISC-V: Add stub support for existing extensions (privileged)
  RISC-V: Add stub support for existing extensions (vendor)
  RISC-V: Add stub support for existing extensions (unprivileged)

 gcc/common/config/riscv/riscv-common.cc| 44 ++
 gcc/testsuite/gcc.target/riscv/predef-29.c | 35 +
 gcc/testsuite/gcc.target/riscv/predef-30.c | 27 +
 gcc/testsuite/gcc.target/riscv/predef-31.c | 31 +++
 4 files changed, 137 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-29.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-30.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/predef-31.c


base-commit: 614052dd4ea083e086712809c754ffebd9361316
-- 
2.41.0



Re: [PATCH v9] RISC-V: Add the 'zfa' extension, version 0.2

2023-08-13 Thread Jin Ma via Gcc-patches
Additional links:
v10, the patch that needs to be reviewed again:
http://patchwork.ozlabs.org/project/gcc/patch/20230814055033.1995-1-ji...@linux.alibaba.com/

v9 and the previous review comments:
http://patchwork.ozlabs.org/project/gcc/patch/20230515131628.953-1-ji...@linux.alibaba.com/

Zfa patch in master branch of binutils-gdb
https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=1f3fc45bddc7147a2e59346a59290094137ef1e1

[PATCH v1] RISC-V: Support RVV VFWNMSAC rounding mode intrinsic API

2023-08-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFWNMSAC as the below samples.

* __riscv_vfwnmsac_vv_f64m2_rm
* __riscv_vfwnmsac_vv_f64m2_rm_m
* __riscv_vfwnmsac_vf_f64m2_rm
* __riscv_vfwnmsac_vf_f64m2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfwnmsac_frm): New class for frm.
(vfwnmsac_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwnmsac_frm): New intrinsic function definition.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-wnmsac.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 25 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 +
 .../riscv/rvv/base/float-point-wnmsac.c   | 47 +++
 4 files changed, 75 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 5a5da903cb2..b458560a040 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -608,6 +608,29 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfwnmsac
+*/
+class vfwnmsac_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_neg_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_neg (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2390,6 +2413,7 @@ static CONSTEXPR const vfwnmacc_frm vfwnmacc_frm_obj;
 static CONSTEXPR const vfwmsac vfwmsac_obj;
 static CONSTEXPR const vfwmsac_frm vfwmsac_frm_obj;
 static CONSTEXPR const vfwnmsac vfwnmsac_obj;
+static CONSTEXPR const vfwnmsac_frm vfwnmsac_frm_obj;
 static CONSTEXPR const unop vfsqrt_obj;
 static CONSTEXPR const float_misc vfrsqrt7_obj;
 static CONSTEXPR const float_misc vfrec7_obj;
@@ -2636,6 +2660,7 @@ BASE (vfwnmacc_frm)
 BASE (vfwmsac)
 BASE (vfwmsac_frm)
 BASE (vfwnmsac)
+BASE (vfwnmsac_frm)
 BASE (vfsqrt)
 BASE (vfrsqrt7)
 BASE (vfrec7)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 09356dd7ac8..85e8b9a3769 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -182,6 +182,7 @@ extern const function_base *const vfwnmacc_frm;
 extern const function_base *const vfwmsac;
 extern const function_base *const vfwmsac_frm;
 extern const function_base *const vfwnmsac;
+extern const function_base *const vfwnmsac_frm;
 extern const function_base *const vfsqrt;
 extern const function_base *const vfrsqrt7;
 extern const function_base *const vfrec7;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index e2a79607d04..7e2a4ab2969 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -382,6 +382,8 @@ DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, 
f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops)
 DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwnmsac_frm, alu_frm, full_preds, f_wwfv_ops)
 
 // 13.8. Vector Floating-Point Square-Root Instruction
 DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c
new file mode 100644
index 000..13eb306313c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmsac.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat64m2_t
+test_vfwnmsac_vv_f32m1_rm (vfloat64m2_t vd, vfloat32m1_t op1, vfloat32m1_t op2,
+  size_t vl) {
+  return __riscv_vfwnmsac_vv_f64m2_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat64m2_t
+test_vfwnmsac_vv_f32m1_rm_m (vbool32_t mask, vfloat64m2_t vd, vfloat32m1_t op1,
+vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfwnmsac_vv_f64m2_rm_m (mask, vd, op1, op2, 1, vl);
+}
+
+vfloat64m2_t
+test_vf

Re: [PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 13:58 +0800, Xi Ruoyao via Gcc-patches wrote:
> On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> > * Support options for LoongArch SIMD extensions:
> >   new configure options --with-simd={none,lsx,lasx};
> >   new driver options -m[no]-l[a]sx / -msimd={none,lsx,lasx}.
> 
> I suggest to rename --with-simd= to --with-ext= and accept a comma-
> separated ISA extension list, because we have non-SIMD ISA extensions.
> For example, "--with-ext=lasx,lbt" will make -mlasx, -mlsx (implied),
> and -mlbt the default.  I prefer "-mlasx" over "-msimd=lasx" because "-
> mlasx" is shorter anyway (if there is no real reason to make -mlasx and
> -msimd=lasx two different things).

Perhaps just "--with-feature" or "--with-loongarch-feature", then we can
even fold -mstrict-align here, like "--with-feature=lbt,strict-align".


-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v9] RISC-V: Add the 'zfa' extension, version 0.2

2023-08-13 Thread Jin Ma via Gcc-patches
> > Hi Jin Ma,
> > 
> > On 5/16/23 00:06, jinma via Gcc-patches wrote:
> > > On 5/15/23 07:16, Jin Ma wrote:
> > >>
> > >> Do we also need to check Z[FDH]INX too?
> > >>
> > >> Otherwise it looks pretty good.  We just need to wait for everything to
> > >> freeze and finalization on the assembler interface.
> > >>
> > >> jeff
> > > Yes, you are right, we also need to check Z[FDH]INX. I will send a patch
> > > again to fix it after others give some review comments.
> > 
> > Can we please revisit this and get this merged upstream.
> > Seems like gcc is supporting frozen but not ratified extensions.
> > 
> > Thx,
> > -Vineet
> 
> OK, I will check and resend a patch about this in a few days.
> 
> Thanks,
> Jin

Done, and please review again. Compared with the v9 version two months ago,
the previous review comments have been modified. At the same time, the variable
riscv_zfa_subext have been added to riscv.opt to enable zfa extension.

Re: [PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> * Support options for LoongArch SIMD extensions:
>   new configure options --with-simd={none,lsx,lasx};
>   new driver options -m[no]-l[a]sx / -msimd={none,lsx,lasx}.

I suggest to rename --with-simd= to --with-ext= and accept a comma-
separated ISA extension list, because we have non-SIMD ISA extensions. 
For example, "--with-ext=lasx,lbt" will make -mlasx, -mlsx (implied),
and -mlbt the default.  I prefer "-mlasx" over "-msimd=lasx" because "-
mlasx" is shorter anyway (if there is no real reason to make -mlasx and
-msimd=lasx two different things).

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH v10] RISC-V: Add support for the Zfa extension

2023-08-13 Thread Jin Ma via Gcc-patches
This patch adds the 'Zfa' extension for riscv, which is based on:
https://github.com/riscv/riscv-isa-manual/commits/zfb

The binutils-gdb for 'Zfa' extension:
https://sourceware.org/pipermail/binutils/2023-April/127060.html

What needs special explanation is:
1, According to riscv-spec, "The FCVTMO D.W.D instruction was added principally 
to
  accelerate the processing of JavaScript Numbers.", so it seems that no 
implementation
  is required.

2, The instructions FMINM and FMAXM correspond to C23 library function fminimum 
and fmaximum.
  Therefore, this patch has simply implemented the pattern of fminm3 
and
  fmaxm3 to prepare for later.

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc: Add zfa extension version, which 
depends on
the F extension.
* config/riscv/constraints.md (zfli): Constrain the floating point 
number that the
instructions FLI.H/S/D can load.
* config/riscv/iterators.md (ceil): New.
* config/riscv/riscv-opts.h (MASK_ZFA): New.
(TARGET_ZFA): New.
* config/riscv/riscv-protos.h (riscv_float_const_rtx_index_for_fli): 
New.
* config/riscv/riscv.cc (riscv_float_const_rtx_index_for_fli): New.
(riscv_cannot_force_const_mem): If instruction FLI.H/S/D can be used, 
memory is
not applicable.
(riscv_const_insns): Likewise.
(riscv_legitimize_const_move): Likewise.
(riscv_split_64bit_move_p): If instruction FLI.H/S/D can be used, no 
split is
required.
(riscv_split_doubleword_move): Likewise.
(riscv_output_move): Output the mov instructions in zfa extension.
(riscv_print_operand): Output the floating-point value of the FLI.H/S/D 
immediate
in assembly.
(riscv_secondary_memory_needed): Likewise.
* config/riscv/riscv.md (fminm3): New.
(fmaxm3): New.
(movsidf2_low_rv32): New.
(movsidf2_high_rv32): New.
(movdfsisi3_rv32): New.
(f_quiet4_zfa): New.
* config/riscv/riscv.opt: New.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zfa-fleq-fltq.c: New test.
* gcc.target/riscv/zfa-fli-zfh.c: New test.
* gcc.target/riscv/zfa-fli.c: New test.
* gcc.target/riscv/zfa-fmovh-fmovp.c: New test.
* gcc.target/riscv/zfa-fround.c: New test.
---
 gcc/common/config/riscv/riscv-common.cc   |   7 +
 gcc/config/riscv/constraints.md   |  21 +-
 gcc/config/riscv/iterators.md |   5 +
 gcc/config/riscv/riscv-opts.h |   3 +
 gcc/config/riscv/riscv-protos.h   |   1 +
 gcc/config/riscv/riscv.cc | 205 +-
 gcc/config/riscv/riscv.md | 145 +++--
 gcc/config/riscv/riscv.opt|   3 +
 .../gcc.target/riscv/zfa-fleq-fltq.c  |  20 ++
 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c  |  42 
 gcc/testsuite/gcc.target/riscv/zfa-fli.c  |  80 +++
 .../gcc.target/riscv/zfa-fmovh-fmovp.c|  10 +
 gcc/testsuite/gcc.target/riscv/zfa-fround.c   |  43 
 13 files changed, 549 insertions(+), 36 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fleq-fltq.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-zfh.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fmovh-fmovp.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fround.c

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 21f83f26371..c16986e1762 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -123,6 +123,9 @@ static const riscv_implied_info_t riscv_implied_info[] =
 
   {"zfh", "zfhmin"},
   {"zfhmin", "f"},
+
+  {"zfa", "f"},
+
   {"zvfhmin", "zve32f"},
   {"zvfh", "zve32f"},
   {"zvfh", "zfhmin"},
@@ -262,6 +265,8 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zvfhmin",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfh",  ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfa", ISA_SPEC_CLASS_NONE, 0, 1},
+
   {"zmmul", ISA_SPEC_CLASS_NONE, 1, 0},
 
   {"svinval", ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1405,6 +1410,8 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zvfhmin",   &gcc_options::x_riscv_zf_subext, MASK_ZVFHMIN},
   {"zvfh",  &gcc_options::x_riscv_zf_subext, MASK_ZVFH},
 
+  {"zfa",   &gcc_options::x_riscv_zfa_subext, MASK_ZFA},
+
   {"zmmul", &gcc_options::x_riscv_zm_subext, MASK_ZMMUL},
 
   {"svinval", &gcc_options::x_riscv_sv_subext, MASK_SVINVAL},
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 44525b2da49..3f52bc76f67 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -118,6 +118,19 @@ (define_constraint "T"
   (and (match_operand 0 "move_operand")
(match_test "CONSTANT_P (op)")))
 
+;; Zfa constraints.
+
+(define_constraint "zfli"
+  "A floa

[PATCH 2/2] combine nonconstant_array walker and expander_sra walker

2023-08-13 Thread Jiufu Guo via Gcc-patches
Hi,

In the light-expander-sra, each statement in each basic-block of a function
need to be analyzed, and there is a similar behavior in checking variable
which need to be stored in the stack.

These per-stmt analyses can be combined to improve cache locality.

Bootstrapped and regtested on x86_64-redhat-linux, and
powerpc64{,le}-linux-gnu.

Is it ok for trunk?

BR,
Jeff (Jiufu Guo)

gcc/ChangeLog:

* cfgexpand.cc (discover_nonconstant_array_refs): Deleted.
(struct array_and_sra_walk): New class.
(pass_expand::execute): Call scan_function on array_and_sra_walk.

---
 gcc/cfgexpand.cc | 104 +++
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 
21a09ebac96bbcddc67da73c42f470c6d5f60e6c..dc3ebe45275cc4b1c0873b4c6e5f6cbe2491ab8c
 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -6843,59 +6843,59 @@ avoid_type_punning_on_regs (tree t, bitmap 
forced_stack_vars)
 bitmap_set_bit (forced_stack_vars, DECL_UID (base));
 }
 
-/* RTL expansion is not able to compile array references with variable
-   offsets for arrays stored in single register.  Discover such
-   expressions and mark variables as addressable to avoid this
-   scenario.  */
+/* Beside light-sra, walk stmts to discover expressions of array references
+   with variable offsets for arrays and mark variables as addressable to
+   avoid to be stored in single register. */
 
-static void
-discover_nonconstant_array_refs (bitmap forced_stack_vars)
+struct array_and_sra_walk : public expand_sra
 {
-  basic_block bb;
-  gimple_stmt_iterator gsi;
+  array_and_sra_walk (bitmap map) : wi{}, forced_stack_vars (map)
+  {
+wi.info = forced_stack_vars;
+  };
 
-  walk_stmt_info wi = {};
-  wi.info = forced_stack_vars;
-  FOR_EACH_BB_FN (bb, cfun)
-for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+  void pre_analyze_stmt (gimple *stmt)
+  {
+expand_sra::pre_analyze_stmt (stmt);
+if (!is_gimple_debug (stmt))
+  walk_gimple_op (stmt, discover_nonconstant_array_refs_r, &wi);
+if (gimple_vdef (stmt))
   {
-   gimple *stmt = gsi_stmt (gsi);
-   if (!is_gimple_debug (stmt))
+   tree t = gimple_get_lhs (stmt);
+   if (t && REFERENCE_CLASS_P (t))
+ avoid_type_punning_on_regs (t, forced_stack_vars);
+  }
+  }
+
+  void analyze_call (gcall *call)
+  {
+expand_sra::analyze_call (call);
+if (gimple_call_internal_p (call))
+  {
+   tree cand = NULL_TREE;
+   switch (gimple_call_internal_fn (call))
  {
-   walk_gimple_op (stmt, discover_nonconstant_array_refs_r, &wi);
-   gcall *call = dyn_cast  (stmt);
-   if (call && gimple_call_internal_p (call))
- {
-   tree cand = NULL_TREE;
-   switch (gimple_call_internal_fn (call))
- {
- case IFN_LOAD_LANES:
-   /* The source must be a MEM.  */
-   cand = gimple_call_arg (call, 0);
-   break;
- case IFN_STORE_LANES:
-   /* The destination must be a MEM.  */
-   cand = gimple_call_lhs (call);
-   break;
- default:
-   break;
- }
-   if (cand)
- cand = get_base_address (cand);
-   if (cand
-   && DECL_P (cand)
-   && use_register_for_decl (cand))
- bitmap_set_bit (forced_stack_vars, DECL_UID (cand));
- }
-   if (gimple_vdef (stmt))
- {
-   tree t = gimple_get_lhs (stmt);
-   if (t && REFERENCE_CLASS_P (t))
- avoid_type_punning_on_regs (t, forced_stack_vars);
- }
+ case IFN_LOAD_LANES:
+   /* The source must be a MEM.  */
+   cand = gimple_call_arg (call, 0);
+   break;
+ case IFN_STORE_LANES:
+   /* The destination must be a MEM.  */
+   cand = gimple_call_lhs (call);
+   break;
+ default:
+   break;
  }
+   if (cand)
+ cand = get_base_address (cand);
+   if (cand && DECL_P (cand) && use_register_for_decl (cand))
+ bitmap_set_bit (forced_stack_vars, DECL_UID (cand));
   }
-}
+  };
+
+  walk_stmt_info wi;
+  bitmap forced_stack_vars;
+};
 
 /* This function sets crtl->args.internal_arg_pointer to a virtual
register if DRAP is needed.  Local register allocator will replace
@@ -7091,12 +7091,12 @@ pass_expand::execute (function *fun)
avoid_deep_ter_for_debug (gsi_stmt (gsi), 0);
 }
 
-  /* Mark arrays indexed with non-constant indices with TREE_ADDRESSABLE.  */
+  /* Mark arrays indexed with non-constant indices with TREE_ADDRESSABLE.
+ And scan expressions for possible SRA accesses. */
   auto_bitmap forced_stack_vars;
-  discover_nonconstant_array_refs (force

[PATCH 1/2] light expander sra v0

2023-08-13 Thread Jiufu Guo via Gcc-patches
Hi,

There are a few PRs about the issues on the struct parameters and
returns, like PRs 69143/65421/108073.

we could consider introducing a light SRA in the expander to
handle those parameters and returns in aggregate type, if they
are passed through registers.  For access to the fields of
the parameters or returns, the corresponding scalar registers
can be used.

As discussed:
https://gcc.gnu.org/pipermail/gcc-patches/2023-May/619884.html

This is an initial patch for the light-expander-sra.

Bootstrapped and regtested on x86_64-redhat-linux, and
powerpc64{,le}-linux-gnu.

Is it ok for trunk?


BR,
Jeff (Jiufu Guo)


PR target/65421
PR target/69143

gcc/ChangeLog:

* cfgexpand.cc (expand_shift): Extern declare.
(struct access): New class.
(struct expand_sra): New class.
(expand_sra::build_access): New member function.
(expand_sra::visit_base): Likewise.
(expand_sra::analyze_default_stmt): Likewise.
(expand_sra::analyze_assign): Likewise.
(expand_sra::add_sra_candidate): Likewise.
(expand_sra::collect_sra_candidates): Likewise.
(expand_sra::valid_scalariable_accesses): Likewise.
(expand_sra::prepare_expander_sra): Likewise.
(expand_sra::expand_sra): Class constructor.
(expand_sra::~expand_sra): Class destructor.
(expand_sra::get_scalarized_rtx): New member function.
(extract_one_reg): New function.
(extract_sub_reg): New function.
(expand_sra::scalarize_access): New member function.
(expand_sra::scalarize_accesses): New member function.
(get_scalar_rtx_for_aggregate_expr): New function.
(set_scalar_rtx_for_aggregate_access): New function.
(set_scalar_rtx_for_returns): New function.
(expand_return): Call get_scalar_rtx_for_aggregate_expr.
(expand_debug_expr): Call get_scalar_rtx_for_aggregate_expr.
(pass_expand::execute): Update to use the expand_sra.
* expr.cc (get_scalar_rtx_for_aggregate_expr): Extern declare.
(expand_assignment): Call get_scalar_rtx_for_aggregate_expr.
(expand_expr_real): Call get_scalar_rtx_for_aggregate_expr.
* function.cc (set_scalar_rtx_for_aggregate_access):  Extern declare.
(set_scalar_rtx_for_returns): Extern declare.
(assign_parm_setup_block): Call set_scalar_rtx_for_aggregate_access.
(assign_parms): Call set_scalar_rtx_for_aggregate_access. 
(expand_function_start): Call set_scalar_rtx_for_returns.
* tree-sra.h (struct base_access): New class.
(struct default_analyzer): New class.
(scan_function): New function template.

gcc/testsuite/ChangeLog:

* g++.target/powerpc/pr102024.C: Updated.
* gcc.target/powerpc/pr108073.c: New test.
* gcc.target/powerpc/pr65421-1.c: New test.
* gcc.target/powerpc/pr65421-2.c: New test.

---
 gcc/cfgexpand.cc | 478 ++-
 gcc/expr.cc  |  15 +-
 gcc/function.cc  |  28 +-
 gcc/tree-sra.h   |  80 +++-
 gcc/testsuite/g++.target/powerpc/pr102024.C  |   2 +-
 gcc/testsuite/gcc.target/powerpc/pr108073.c  |  29 ++
 gcc/testsuite/gcc.target/powerpc/pr65421-1.c |   6 +
 gcc/testsuite/gcc.target/powerpc/pr65421-2.c |  32 ++
 8 files changed, 660 insertions(+), 10 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-2.c

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 
edf292cfbe95ac2711faee7769e839cb4edb0dd3..21a09ebac96bbcddc67da73c42f470c6d5f60e6c
 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -74,6 +74,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "builtins.h"
 #include "opts.h"
+#include "tree-sra.h"
 
 /* Some systems use __main in a way incompatible with its use in gcc, in these
cases use the macros NAME__MAIN to give a quoted symbol and SYMBOL__MAIN to
@@ -97,6 +98,472 @@ static bool defer_stack_allocation (tree, bool);
 
 static void record_alignment_for_reg_var (unsigned int);
 
+extern rtx
+expand_shift (enum tree_code, machine_mode, rtx, poly_int64, rtx, int);
+
+/* For light SRA in expander about paramaters and returns.  */
+struct access : public base_access
+{
+  /* The rtx for the access: link to incoming/returning register(s).  */
+  rtx rtx_val;
+};
+
+typedef struct access *access_p;
+
+struct expand_sra : public default_analyzer
+{
+  expand_sra ();
+  ~expand_sra ();
+
+  /* Now use default APIs, no actions for
+ pre_analyze_stmt, analyze_return.  */
+
+  /* overwrite analyze_default_stmt.  */
+  void analyze_default_stmt (gimple *);
+
+  /* overwrite analyze phi,call,asm .  */
+  void analyze_phi (gphi *stmt) { analyze_default_stmt (stmt); };
+  void analyze_call (gcall *stmt

Re: [PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 13:38 +0800, Xi Ruoyao wrote:
> 
> > However, for LoongArch, we do not want such a "toplevel" library
> > installation since the default ABI may change.  We expect all
> > multilib variants of libraries to be installed to their designated
> > ABI-specific subdirs (e.g. base/lp64d) of the GCC libdir, so that
> > the default ABI can be configured arbitrarily (with --with-abi)
> > while the gcc libdir layout stays consistent.  This could be
> > helpful for the distribution packaging of GCC libraries.
> 
> Have you tested a --disable-multilib configuration?  To me with --
> disable-configuration everything should be still in the toplevel

I mean --disable-multilib configuration, not "--disable-configuration".

> directory, not any sub-directory.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:

> However, for LoongArch, we do not want such a "toplevel" library
> installation since the default ABI may change.  We expect all
> multilib variants of libraries to be installed to their designated
> ABI-specific subdirs (e.g. base/lp64d) of the GCC libdir, so that
> the default ABI can be configured arbitrarily (with --with-abi)
> while the gcc libdir layout stays consistent.  This could be
> helpful for the distribution packaging of GCC libraries.

Have you tested a --disable-multilib configuration?  To me with --
disable-configuration everything should be still in the toplevel
directory, not any sub-directory.

/* snip */

> ChangeLog:
> 
>     * config-ml.in: add loongarch support.  Allow overriding

Use a tab, not 8 white spaces.  Likewise for all patches in the series.

>     toplevel multisubdir.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH 2/2] RISC-V: Constant FP Optimization with 'Zfa'

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

This commit implements an optimization for assignments from a FP constant
to a FP register using a FLI instruction from the 'Zfa' extension.

To this purpose, it adds the constraint "H" and adds hardfloat move
instructions a "H -> f" variant.  Because FLI instruction constraint is
a bit complex, it adds the riscv_get_float_fli_const function to parse
a floating point constant if appropriate and the validness is contained
in its return value.

It also modifies the cost model for floating point constants and implements
simple yet bit-accurate printer for valid finite FLI constants.

This optimization is partially based on AArch64
(vmov instruction handling).

gcc/ChangeLog:

* config/riscv/constraints.md (H): New.
* config/riscv/riscv-protos.h (enum riscv_float_fli_const_type):
New to identify the FLI constant type.
(struct riscv_float_fli_const): New to represent an optional
FLI constant.
* config/riscv/riscv.cc (riscv_get_float_fli_const): New function
to parse a CONST_DOUBLE and return optionally-valid FLI constant.
(riscv_const_insns): Modify CONST_DOUBLE cost model.
(riscv_output_move): Add FLI instruction outputs.
(riscv_print_operand): Print a finite FLI constant as a hexadecimal
FP representation or a string operand "min", "inf" or "nan".
* config/riscv/riscv.md (movhf_hardfloat, movsf_hardfloat,
movdf_hardfloat_rv32, movdf_hardfloat_rv64): Add "H" variant
for 'Zfa' extension-based FP constant moves.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zfa-fli-1.c: New test.
* gcc.target/riscv/zfa-fli-2.c: Ditto.
* gcc.target/riscv/zfa-fli-3.c: Ditto.
* gcc.target/riscv/zfa-fli-4.c: Ditto.
* gcc.target/riscv/zfa-fli-5.c: Ditto.
* gcc.target/riscv/zfa-fli-6.c: Ditto.
* gcc.target/riscv/zfa-fli-7.c: Ditto.
* gcc.target/riscv/zfa-fli-8.c: Ditto.
---
 gcc/config/riscv/constraints.md|   7 +
 gcc/config/riscv/riscv-protos.h|  34 +++
 gcc/config/riscv/riscv.cc  | 250 -
 gcc/config/riscv/riscv.md  |  24 +-
 gcc/testsuite/gcc.target/riscv/zfa-fli-1.c |  24 ++
 gcc/testsuite/gcc.target/riscv/zfa-fli-2.c |  24 ++
 gcc/testsuite/gcc.target/riscv/zfa-fli-3.c |  14 ++
 gcc/testsuite/gcc.target/riscv/zfa-fli-4.c | 111 +
 gcc/testsuite/gcc.target/riscv/zfa-fli-5.c |  98 
 gcc/testsuite/gcc.target/riscv/zfa-fli-6.c |  61 +
 gcc/testsuite/gcc.target/riscv/zfa-fli-7.c |  30 +++
 gcc/testsuite/gcc.target/riscv/zfa-fli-8.c |  39 
 12 files changed, 692 insertions(+), 24 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zfa-fli-8.c

diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 44525b2da491..d57c72ef14f0 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -98,6 +98,13 @@
   (and (match_code "const_double")
(match_test "op == CONST0_RTX (mode)")))
 
+;; Floating-point constant that can be generated by a FLI instruction
+;; in the 'Zfa' standard extension.
+(define_constraint "H"
+  "@internal"
+  (and (match_code "const_double")
+   (match_test "riscv_get_float_fli_const (op).valid")))
+
 (define_memory_constraint "A"
   "An address that is held in a general-purpose register."
   (and (match_code "mem")
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 2fbed04ff84c..6effa2437251 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -80,6 +80,39 @@ struct riscv_address_info {
   enum riscv_symbol_type symbol_type;
 };
 
+/* Classifies a floating point constant possibly retrieved by
+   the FLI instructions.
+
+   RISCV_FLOAT_CONST_MIN
+   The minimum positive normal value for given mode.
+
+   RISCV_FLOAT_CONST_INF
+   Positive infinity.
+
+   RISCV_FLOAT_CONST_NAN
+   Canonical NaN (positive, quiet and zero payload NaN).
+
+   RISCV_FLOAT_CONST_FINITE
+   A finite number.  */
+enum riscv_float_fli_const_type {
+  RISCV_FLOAT_CONST_MIN,
+  RISCV_FLOAT_CONST_INF,
+  RISCV_FLOAT_CONST_NAN,
+  RISCV_FLOAT_CONST_FINITE,
+};
+
+/* Information about a floating point constant possibly retrieved by
+   the FLI instructions.  */
+struct riscv_float_fli_const {
+  bool valid: 1;
+  bool sign: 1;
+  enum riscv_float_fli_const_type type: 2;
+  /* Highest 2 bits of IEEE754 mantissa on RISCV_FLOAT_CONST_FINITE.  */
+  unsigned int mantissa_belo

[PATCH 1/2] RISC-V: Add support for the 'Zfa' extension

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

This commit adds support for the 'Zfa' extension containing additional
floating point instructions, version 0.1 (stable and approved).

gcc/ChangeLog:

* common/config/riscv/riscv-common.cc
(riscv_implied_info): Add implication 'Zfa' -> 'F'.
(riscv_ext_version_table) Add support for the 'Zfa' extension.
(riscv_ext_flag_table) Set MASK_ZFA if 'Zfa' is available.
* config/riscv/riscv-opts.h (MASK_ZFA, TARGET_ZFA): New.
---
 gcc/common/config/riscv/riscv-common.cc | 3 +++
 gcc/config/riscv/riscv-opts.h   | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc
index 21f83f26371f..01d68856bc40 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -121,6 +121,7 @@ static const riscv_implied_info_t riscv_implied_info[] =
   {"zvksg", "zvks"},
   {"zvksg", "zvkg"},
 
+  {"zfa", "f"},
   {"zfh", "zfhmin"},
   {"zfhmin", "f"},
   {"zvfhmin", "zve32f"},
@@ -257,6 +258,7 @@ static const struct riscv_ext_version 
riscv_ext_version_table[] =
   {"zvl32768b", ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvl65536b", ISA_SPEC_CLASS_NONE, 1, 0},
 
+  {"zfa",   ISA_SPEC_CLASS_NONE, 0, 1},
   {"zfh",   ISA_SPEC_CLASS_NONE, 1, 0},
   {"zfhmin",ISA_SPEC_CLASS_NONE, 1, 0},
   {"zvfhmin",   ISA_SPEC_CLASS_NONE, 1, 0},
@@ -1400,6 +1402,7 @@ static const riscv_ext_flag_table_t 
riscv_ext_flag_table[] =
   {"zvl32768b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL32768B},
   {"zvl65536b", &gcc_options::x_riscv_zvl_flags, MASK_ZVL65536B},
 
+  {"zfa",   &gcc_options::x_riscv_zf_subext, MASK_ZFA},
   {"zfhmin",&gcc_options::x_riscv_zf_subext, MASK_ZFHMIN},
   {"zfh",   &gcc_options::x_riscv_zf_subext, MASK_ZFH},
   {"zvfhmin",   &gcc_options::x_riscv_zf_subext, MASK_ZVFHMIN},
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index aeea805b3425..e31ec7c4074a 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -249,11 +249,13 @@ enum riscv_entity
 #define MASK_ZFH  (1 << 1)
 #define MASK_ZVFHMIN  (1 << 2)
 #define MASK_ZVFH (1 << 3)
+#define MASK_ZFA  (1 << 4)
 
 #define TARGET_ZFHMIN  ((riscv_zf_subext & MASK_ZFHMIN) != 0)
 #define TARGET_ZFH ((riscv_zf_subext & MASK_ZFH) != 0)
 #define TARGET_ZVFHMIN ((riscv_zf_subext & MASK_ZVFHMIN) != 0)
 #define TARGET_ZVFH((riscv_zf_subext & MASK_ZVFH) != 0)
+#define TARGET_ZFA ((riscv_zf_subext & MASK_ZFA) != 0)
 
 #define MASK_ZMMUL  (1 << 0)
 #define TARGET_ZMMUL((riscv_zm_subext & MASK_ZMMUL) != 0)
-- 
2.41.0



[PATCH 0/2] RISC-V: Make "prefetch.i" built-in usable

2023-08-13 Thread Tsukasa OI via Gcc-patches
Hello,

and... I think this might be my first *large* patch set for GCC
contribution and definitely the first one to touch the machine description.

So, please review it carefully.


Background
===

This patch set adds an optimization to FP constant initialization using a
FLI instruction, which is a part of the 'Zfa' extension which provides
additional floating-point instructions.

FLI instructions ("fli.h" for binary16, "fli.s" for binary32, "fli.d" for
binary64 and "fli.q" for binary128 [which can be ignored because current
GCC for RISC-V does not natively support binary128]) provide an
load-immediate operation for following 32 immediates.

| Binary Encoding | Immediate (and its part of binary representation) |
| --- | --|
|`0` ( 0) | -1.0  (-0b1.00 * 2^(+ 0)) |
|`1` ( 1) | Minimum positive normal value |
| | sign=[0] exponent=[0..01] significand=[000..000]  |
|`00010` ( 2) | 1.00*2^(-16)  (+0b1.00 * 2^(-16)) |
|`00011` ( 3) | 1.00*2^(-15)  (+0b1.00 * 2^(-15)) |
|`00100` ( 4) | 1.00*2^(- 8)  (+0b1.00 * 2^(- 8)) |
|`00101` ( 5) | 1.00*2^(- 7)  (+0b1.00 * 2^(- 7)) |
|`00110` ( 6) | 1.00*2^(- 4)  (+0b1.00 * 2^(- 4)) = 0.0625|
|`00111` ( 7) | 1.00*2^(- 3)  (+0b1.00 * 2^(- 3)) = 0.125 |
|`01000` ( 8) | 1.00*2^(- 2)  (+0b1.00 * 2^(- 2)) : 0.25  |
|`01001` ( 9) | 1.25*2^(- 2)  (+0b1.01 * 2^(- 2)) : 0.3125|
|`01010` (10) | 1.50*2^(- 2)  (+0b1.10 * 2^(- 2)) : 0.375 |
|`01011` (11) | 1.75*2^(- 2)  (+0b1.11 * 2^(- 2)) : 0.4375|
|`01100` (12) | 1.00*2^(- 1)  (+0b1.00 * 2^(- 1)) : 0.5   |
|`01101` (13) | 1.25*2^(- 1)  (+0b1.01 * 2^(- 1)) : 0.625 |
|`01110` (14) | 1.50*2^(- 1)  (+0b1.10 * 2^(- 1)) : 0.75  |
|`0` (15) | 1.75*2^(- 1)  (+0b1.11 * 2^(- 1)) : 0.875 |
|`1` (16) | 1.00*2^(+ 0)  (+0b1.00 * 2^(+ 0)) : 1.0   |
|`10001` (17) | 1.25*2^(+ 0)  (+0b1.01 * 2^(+ 0)) : 1.25  |
|`10010` (18) | 1.50*2^(+ 0)  (+0b1.10 * 2^(+ 0)) : 1.5   |
|`10011` (19) | 1.75*2^(+ 0)  (+0b1.11 * 2^(+ 0)) : 1.75  |
|`10100` (20) | 1.00*2^(+ 1)  (+0b1.00 * 2^(+ 1)) : 2.0   |
|`10101` (21) | 1.25*2^(+ 1)  (+0b1.01 * 2^(+ 1)) : 2.5   |
|`10110` (22) | 1.50*2^(+ 1)  (+0b1.10 * 2^(+ 1)) : 3.0   |
|`10111` (23) | 1.00*2^(+ 2)  (+0b1.00 * 2^(+ 2)) = 4 |
|`11000` (24) | 1.00*2^(+ 3)  (+0b1.00 * 2^(+ 3)) = 8 |
|`11001` (25) | 1.00*2^(+ 4)  (+0b1.00 * 2^(+ 4)) = 16|
|`11010` (26) | 1.00*2^(+ 7)  (+0b1.00 * 2^(+ 7)) = 128   |
|`11011` (27) | 1.00*2^(+ 8)  (+0b1.00 * 2^(+ 8)) = 256   |
|`11100` (28) | 1.00*2^(+15)  (+0b1.00 * 2^(+15)) = 32768 |
|`11101` (29) | 1.00*2^(+16)  (+0b1.00 * 2^(+16)) = 65536 |
| | On "fli.h", this is equivalent to positive inf.   |
|`0` (30) | Positive infinity |
| | sign=[0] exponent=[1..11] significand=[000..000]  |
|`1` (31) | Canonical NaN (positive, quiet and zero payload)  |
| | sign=[0] exponent=[1..11] significand=[100..000]  |

Currently, initializing a FP constant (except zero) involves memory and its
use can be reduced by FLI instructions.

We may have a room to generate much complex constants with multiple FLI
instructions (e.g. like long integer constants) but for starter, we can
begin with optimizing one FP constant initialization with one FLI
instruction (and because FP arithmetic often requires larger latency,
benefits of making multiple FLI sequence is not high compared to integers).


FLI FP constant checking
=

An instruction with a similar role to RISC-V's FLI instructions is the Arm/
AArch64's vmov.f32 instruction. It provides a load-immediate operation for
constant that can be represented in the following form:

> (-1)^s * 0b1. * 2^r   (where -3 <= r <= +4; fits in 3-bits)

This patch is largely influenced by AArch64's handling but
compared to this, handling RISC-V's FLI FP constant can be a little tricky.

*   FLI normally generates only values with sign bit 0 except the binary
encoding 0 (which loads -1.0 with sign bit 1).
*   Not only finite values, FLI can generate positive infinity and
canonical NaN.
*   Because FLI can generate canonical NaN, handling NaN is preferred but
FLI only generates canonical NaN.  Since we can easily create a non-
canonical NaN with __builtin_nan ("[PAYLOAD]") and that could be a
direct return value of a function, we must reject non-canonical NaNs
(otherwise it'll generate "fli.d fa0,nan" where NaN is non-canonical).
*   Exponent range and mantissa constraint is a bit tricky.
On binary encodings 8-22,

Re: [PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> loongarch64)
> -   tune_pattern="loongarch64|la464"
> -   tune_default="la464"
> +   tune_pattern="native|abi-default|loongarch64|la464"

I think we can remove tune_pattern completely.  There is no reason to
limit --with-tune setting based on --with-arch setting.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1 3/6] LoongArch: define preprocessing macros "__loongarch_{arch,tune}"

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> These are exported according to the LoongArch Toolchain Conventions[1]
> as a replacement of the obsolete "_LOONGARCH_{ARCH,TUNE}" macros,
> which are expanded to strings representing the actual architecture
> and microarchitecture of the target.
> 
> [1] currently relased at https://github.com/loongson/LoongArch-Documentation
>     /blob/main/docs/LoongArch-toolchain-conventions-EN.adoc
> 
> gcc/ChangeLog:
> 
>     * gcc/config/loongarch/loongarch-c.cc: Export macros
>     "__loongarch_{arch,tune}" in the preprocessor.

Ok.  I think this can be applied anyway (regardless of other patches).

> ---
>  gcc/config/loongarch/loongarch-c.cc | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/gcc/config/loongarch/loongarch-c.cc 
> b/gcc/config/loongarch/loongarch-c.cc
> index 660c68f0e06..7bee037cc4a 100644
> --- a/gcc/config/loongarch/loongarch-c.cc
> +++ b/gcc/config/loongarch/loongarch-c.cc
> @@ -64,6 +64,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
>    LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch);
>    LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune);
>  
> +  LARCH_CPP_SET_PROCESSOR ("__loongarch_arch", la_target.cpu_arch);
> +  LARCH_CPP_SET_PROCESSOR ("__loongarch_tune", la_target.cpu_tune);
> +
>    /* Base architecture / ABI.  */
>    if (TARGET_64BIT)
>  {

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> The configure script and the GCC driver are updated so that
> it is easier to customize and control GCC builds for targeting
> different LoongArch implementations.
> 
> * Support options for LoongArch SIMD extensions:
>   new configure options --with-simd={none,lsx,lasx};
>   new driver options -m[no]-l[a]sx / -msimd={none,lsx,lasx}.

What's the relationship between -mlasx and -msimd=lasx?  What will
happen if the user specifies -mlasx -msimd=none or -mlasx -msimd=lsx?

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [PATCH v1 4/6] LoongArch: use -mstrict-align by default when building libraries

2023-08-13 Thread Xi Ruoyao via Gcc-patches
On Mon, 2023-08-14 at 11:57 +0800, Yang Yujie wrote:
> LoongArch processors may not support memory accesses without natural
> alignments.  Building libraries with -mstrict-align may help with
> toolchain binary compatiblity and performance on these implementations
> (e.g. Loongson 2K1000LA).

I don't think it's a good idea.  You should provide a configuration-time
option (maybe named --with-strict-align) to make -mstrict-align the
default instead, thus both the libraries and the compiled user code will
be suitable for 2K1000.

> With this patch, no significant performance degredation is observed on
> current mainstream LoongArch processors.
> 
> gcc/ChangeLog:
> 
>     * gcc/config/t-linux: add -mstrict-align via self_specs
>     when building GCC libraries.
> ---
>  gcc/config/loongarch/t-linux | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
> index 75bb430c555..2a170d600a9 100644
> --- a/gcc/config/loongarch/t-linux
> +++ b/gcc/config/loongarch/t-linux
> @@ -35,6 +35,9 @@ gen_mlib_spec = $(if $(word 2,$1),\
>  # clean up the result of DRIVER_SELF_SPEC to avoid conflict
>  lib_build_self_spec  = %  
> +# build libraries with -mstrict-align by default
> +lib_build_self_spec += -mstrict-align
> +
>  # append user-specified build options from --with-multilib-list
>  lib_build_self_spec += $(foreach mlib,$(subst $(comma), 
> ,$(TM_MULTILIB_CONFIG)),\
> $(call gen_mlib_spec,$(subst /, ,$(mlib

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH] x86: Update model values for Raptorlake.

2023-08-13 Thread Cui, Lili via Gcc-patches
Committed as obvious, and backported to GCC13.

Lili.


Update model values for Raptorlake according to SDM.

gcc/ChangeLog

* common/config/i386/cpuinfo.h (get_intel_cpu): Add model value 0xba
to Raptorlake.
---
 gcc/common/config/i386/cpuinfo.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h
index ae48bc17771..dd7f0f6abfd 100644
--- a/gcc/common/config/i386/cpuinfo.h
+++ b/gcc/common/config/i386/cpuinfo.h
@@ -537,6 +537,7 @@ get_intel_cpu (struct __processor_model *cpu_model,
 case 0x9a:
   /* Alder Lake.  */
 case 0xb7:
+case 0xba:
 case 0xbf:
   /* Raptor Lake.  */
 case 0xaa:
-- 
2.25.1



[PATCH v1 1/6] LoongArch: a symmetric multilib subdir layout

2023-08-13 Thread Yang Yujie
Multilib in gcc is asymmetric. i.e. a "default" / "toplevel"
library variant is always built and installed, and:

 * it is installed directly under the gcc libdir (with no suffix);
 * it is selected / searched / linked against when the gcc driver
   does not receive any option from MULTILIB_OPTIONS.

Meanwhile, multilib options from MULTILIB_OPTIONS may add suffixes
(--print-multi-lib) to this base directory and find compatible
library variants there.

However, for LoongArch, we do not want such a "toplevel" library
installation since the default ABI may change.  We expect all
multilib variants of libraries to be installed to their designated
ABI-specific subdirs (e.g. base/lp64d) of the GCC libdir, so that
the default ABI can be configured arbitrarily (with --with-abi)
while the gcc libdir layout stays consistent.  This could be
helpful for the distribution packaging of GCC libraries.

This patch achieves this by overriding ${with_multisubdir} of
the "toplevel" library and disabling the duplicate "multilib"
variant (which exists because LA's driver always generates a
normalized "-mabi=" option from self_spec even if it is not
given on the command line, causing the semantics of "toplevel"
library to be duplicate with a non-toplevel one).

Other architectures stay unaffected as long as they do not override
${with_multisubdir} in config-ml.in.

ChangeLog:

* config-ml.in: add loongarch support.  Allow overriding
toplevel multisubdir.

libgcc/ChangeLog:

* config/loongarch/t-loongarch: make symlinks of toplevel
libgcc.a under gcc/ for passing regression tests.
---
 config-ml.in| 39 -
 libgcc/config/loongarch/t-loongarch | 16 
 2 files changed, 54 insertions(+), 1 deletion(-)

diff --git a/config-ml.in b/config-ml.in
index 68854a4f16c..914cafb4b50 100644
--- a/config-ml.in
+++ b/config-ml.in
@@ -383,6 +383,18 @@ mips*-*-*)
  done
fi
;;
+loongarch*-*-*)
+   old_multidirs="${multidirs}"
+   multidirs=""
+   for x in ${old_multidirs}; do
+ case "$x" in
+   `${CC-gcc} --print-multi-directory`) : ;;
+   *) multidirs="${multidirs} ${x}" ;;
+ esac
+   done
+
+   with_multisubdir=`${CC-gcc} --print-multi-directory`
+   ;;
 msp430-*-*)
if [ x$enable_no_exceptions = xno ]
then
@@ -509,7 +521,7 @@ multi-do:
  compiler="$(CC)"; \
  for i in `$${compiler} --print-multi-lib 2>/dev/null`; do \
dir=`echo $$i | sed -e 's/;.*$$//'`; \
-   if [ "$${dir}" = "." ]; then \
+   if [ "$${dir}" = "." ] || [ "/$${dir}" = "$(MULTISUBDIR)" ]; then \
  true; \
else \
  if [ -d ../$${dir}/$${lib} ]; then \
@@ -595,6 +607,12 @@ if [ -z "${with_multisubdir}" ]; then
   ml_subdir=
   ml_builddotdot=
   : # ml_srcdotdot= # already set
+elif [ "${ml_toplevel_p}" = yes ]; then
+  : # When ml_* is set by ${host}.
+  ml_subdir="/${with_multisubdir}"
+  ml_builddotdot=
+  ml_builddotdot_link=`echo ${with_multisubdir} | sed -e 's:[^/][^/]*:..:g'`/
+  : # ml_srcdotdot= # already set
 else
   ml_subdir="/${with_multisubdir}"
   # The '[^/][^/]*' appears that way to work around a SunOS sed bug.
@@ -654,6 +672,25 @@ mv Makefile.tem ${Makefile}
 
 if [ "${ml_toplevel_p}" = yes ]; then
 
+# If multisubdir is set on the top level, create a symbolic link
+# to cope with in-tree regression tests (see dejagnu: libgloss.exp).
+
+if [ -n "${with_multisubdir}" ]; then
+  if [ "${ml_verbose}" = --verbose ]; then
+echo "Creating multilib link (${with_multisubdir}) for the default 
library."
+echo "pwd: `${PWDCMD-pwd}`"
+  fi
+
+  ml_origdir=`${PWDCMD-pwd}`
+  ml_libdir=`echo "$ml_origdir" | sed -e 's,^.*/,,'`
+  # cd to top-level-build-dir/${with_target_subdir}
+  cd ..
+
+  mkdir -p "${with_multisubdir}"
+  ln -sf "${ml_builddotdot_link}${ml_libdir}" "${with_multisubdir}/"
+  cd "${ml_origdir}"
+fi
+
 # We must freshly configure each subdirectory.  This bit of code is
 # actually partially stolen from the main configure script.  FIXME.
 
diff --git a/libgcc/config/loongarch/t-loongarch 
b/libgcc/config/loongarch/t-loongarch
index 2a7dbf6ca83..791a8c52f24 100644
--- a/libgcc/config/loongarch/t-loongarch
+++ b/libgcc/config/loongarch/t-loongarch
@@ -5,3 +5,19 @@ softfp_int_modes := si di
 softfp_extensions :=
 softfp_truncations :=
 softfp_exclude_libgcc2 := n
+
+# Since we employ a symmetric multilib layout, i.e. the default lib
+# always gets installed to its ${multisubdir} (see config-ml.in),
+# we need to copy it (again) into the GCC directory
+# (without the multilib suffix) in order to keep some regression
+# tests working (libstdc++), because "too many things knows
+# about the layout of the build tree" for now (libgcc/Makefile.in).
+
+# If we are on the top level (default library),
+# copy libgcc into gcc build directory.
+ifneq ($(MULTIDO),true)
+all: install-default-lib
+.PHONY: i

[PATCH v1 6/6] LoongArch: support loongarch*-elf target

2023-08-13 Thread Yang Yujie
gcc/ChangeLog:

* config.gcc: add loongarch*-elf target.
* config/loongarch/elf.h: New file.
Link against newlib by default.

libgcc/ChangeLog:

* config.host: add loongarch*-elf target.
---
 gcc/config.gcc | 21 ---
 gcc/config/loongarch/elf.h | 52 ++
 libgcc/config.host |  9 +--
 3 files changed, 76 insertions(+), 6 deletions(-)
 create mode 100644 gcc/config/loongarch/elf.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 9412f73fe35..0661f13ebc8 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -2491,6 +2491,18 @@ loongarch*-*-linux*)
gcc_cv_initfini_array=yes
;;
 
+loongarch*-*-elf*)
+   tm_file="elfos.h newlib-stdint.h ${tm_file}"
+   tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
+   tmake_file="${tmake_file} loongarch/t-linux"
+   gnu_ld=yes
+   gas=yes
+
+   # For .init_array support.  The configure script cannot always
+   # automatically detect that GAS supports it, yet we require it.
+   gcc_cv_initfini_array=yes
+   ;;
+
 mips*-*-netbsd*)   # NetBSD/mips, either endian.
target_cpu_default="MASK_ABICALLS"
tm_file="elfos.h ${tm_file} mips/elf.h ${nbsd_tm_file} mips/netbsd.h"
@@ -4904,16 +4916,16 @@ case "${target}" in
 
# Infer ABI from the triplet.
case ${target} in
-   loongarch64-*-*-*f64)
+   loongarch64-*f64)
abi_pattern="lp64d"
;;
-   loongarch64-*-*-*f32)
+   loongarch64-*f32)
abi_pattern="lp64f"
;;
-   loongarch64-*-*-*sf)
+   loongarch64-*sf)
abi_pattern="lp64s"
;;
-   loongarch64-*-*-*)
+   loongarch64-*)
abi_pattern="lp64[dfs]"
abi_default="lp64d"
;;
@@ -4930,6 +4942,7 @@ case "${target}" in
case ${target} in
  *-linux-gnu*)  triplet_os="linux-gnu";;
  *-linux-musl*) triplet_os="linux-musl";;
+ *-elf*)triplet_os="elf";;
  *)
  echo "Unsupported target ${target}." 1>&2
  exit 1
diff --git a/gcc/config/loongarch/elf.h b/gcc/config/loongarch/elf.h
new file mode 100644
index 000..39e2cbc7b5e
--- /dev/null
+++ b/gcc/config/loongarch/elf.h
@@ -0,0 +1,52 @@
+/* Definitions for LoongArch ELF-based systems.
+   Copyright (C) 1998-2023 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* Define the size of the wide character type.  */
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+
+/* GNU-specific SPEC definitions.  */
+#define GNU_USER_LINK_EMULATION "elf" ABI_GRLEN_SPEC "loongarch"
+
+#undef GNU_USER_TARGET_LINK_SPEC
+#define GNU_USER_TARGET_LINK_SPEC \
+  "%{shared} -m " GNU_USER_LINK_EMULATION
+
+
+/* Link against Newlib libraries, because the ELF backend assumes Newlib.
+   Handle the circular dependence between libc and libgloss. */
+#undef  LIB_SPEC
+#define LIB_SPEC "--start-group -lc %{!specs=nosys.specs:-lgloss} --end-group"
+
+#undef LINK_SPEC
+#define LINK_SPEC GNU_USER_TARGET_LINK_SPEC
+
+#undef  STARTFILE_SPEC
+#define STARTFILE_SPEC "crt0%O%s crtbegin%O%s"
+
+#undef  ENDFILE_SPEC
+#define ENDFILE_SPEC "crtend%O%s"
+
+#undef SUBTARGET_CC1_SPEC
+#define SUBTARGET_CC1_SPEC "%{profile:-p}"
+
diff --git a/libgcc/config.host b/libgcc/config.host
index c94d69d84b7..6a112a07b14 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -136,7 +136,7 @@ hppa*-*-*)
 lm32*-*-*)
cpu_type=lm32
;;
-loongarch*-*-*)
+loongarch*-*)
cpu_type=loongarch
tmake_file="loongarch/t-loongarch"
if test "${libgcc_cv_loongarch_hard_float}" = yes; then
@@ -944,7 +944,7 @@ lm32-*-uclinux*)
 extra_parts="$extra_parts crtbegin.o crtendS.o crtbeginT.o"
 tmake_file="lm32/t-lm32 lm32/t-uclinux t-libgcc-pic t-softfp-sfdf 
t-softfp"
;;
-loongarch*-*-linux*)
+loongarch*-linux*)
extra_parts="$extra_parts crtfastmath.o"
tmake_file="${tmake_file} t-crtfm loongarch/t-crtstuff"
case ${host}

[PATCH v1 4/6] LoongArch: use -mstrict-align by default when building libraries

2023-08-13 Thread Yang Yujie
LoongArch processors may not support memory accesses without natural
alignments.  Building libraries with -mstrict-align may help with
toolchain binary compatiblity and performance on these implementations
(e.g. Loongson 2K1000LA).

With this patch, no significant performance degredation is observed on
current mainstream LoongArch processors.

gcc/ChangeLog:

* gcc/config/t-linux: add -mstrict-align via self_specs
when building GCC libraries.
---
 gcc/config/loongarch/t-linux | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 75bb430c555..2a170d600a9 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -35,6 +35,9 @@ gen_mlib_spec = $(if $(word 2,$1),\
 # clean up the result of DRIVER_SELF_SPEC to avoid conflict
 lib_build_self_spec  = %

[PATCH v1 2/6] LoongArch: improved target configuration interface

2023-08-13 Thread Yang Yujie
The configure script and the GCC driver are updated so that
it is easier to customize and control GCC builds for targeting
different LoongArch implementations.

* Support options for LoongArch SIMD extensions:
  new configure options --with-simd={none,lsx,lasx};
  new driver options -m[no]-l[a]sx / -msimd={none,lsx,lasx}.

* Mark some LoongArch-specific "flags" with overlapping state-changing
  semantics as "driver deferred" so that they could be processed in the
  order they appear in the GCC driver. In this way, the final result can
  be canonicalized into "parameters" (options with "=") for reliable use in
  specs rules or by the compiler proper.

* Enforce the priority of configuration paths (for ={fpu,tune,simd}):
  -m > -march-implied > --with- > --with-arch-implied.

* Allow the user to control the compiler options used when building
  GCC libraries for each multilib variant via --with-multilib-list
  and --with-multilib-default.  This could become more useful when
  we have 32-bit support later.

  Example 1: the following configure option
--with-multilib-list=lp64d/la464/mno-strict-align/msimd=lsx,lp64s/mfpu=32
  | || |
-mabi=ABI  -march=ARCH  a list of other options
  (mandatory)  (optional) (optional)

   builds two sets of libraries:
  lp64d ABI (the default ABI if no --with-abi is given,
 built with "-march=la464 -mno-strict-align")
  lp64s ABI (built with "-march=abi-default -mfpu=32")

  Example 2: the following 3 configure options

--with-arch=loongarch64
--with-multilib-list=lp64d,lp64f,lp64s/la464
--with-multilib-default=fixed/mno-strict-align/mfpu=64
 ||   |
-march=ARCH   a list of other options
 (optional)(optional)

is equivalent to (in terms of building libraries):

--with-multilib-list=\
lp64d/loongarch64/mno-strict-align/mfpu=64,\
lp64f/loongarch64/mno-strict-align/mfpu=64,\
lp64s/la464

  Note:
1. the GCC driver and compiler proper does not support
   "-march=fixed". "fixed" that appear here acts as a placeholder for
   "use whatever ARCH in --with-arch=ARCH" (or the default value
   of --with-arch=ARCH if --with-arch is not explicitly configured).

2. if the ARCH part is omitted, "-march=abi-default"
   is used for building all library variants, which
   practically means enabling the minimal ISA features
   that can support the given ABI.

gcc/ChangeLog:

* gcc/config.gcc: Add new configuration options --with-arch
and --with-multilib-default; slightly adjust the handling
of configure-time defaults.
* gcc/config/loongarch/genopts/loongarch-strings: Add keyword
"abi-default" as in "-march=abi-default".
* gcc/config/loongarch/loongarch-str.h: Likewise
* gcc/config/loongarch/genopts/loongarch.opt.in: Mark flags
as "driver deferred" so that a state machine could be applied.
* gcc/config/loongarch/loongarch.opt: Likewise.
* gcc/config/loongarch/loongarch-cpu.cc: refactor code for
the new internal representation of -march=native.
* gcc/config/loongarch/loongarch-cpu.h: Likewise.
* gcc/config/loongarch/loongarch-c.cc: Likewise.
* gcc/config/loongarch/loongarch-def.c: add SIMD attributes
for struct loongarch_target.
* gcc/config/loongarch/loongarch-def.h: Likewise.
* gcc/config/loongarch/loongarch-driver.cc: Implement a state
machine to canonicalize flags into parameters.
* gcc/config/loongarch/loongarch-driver.h: Likewise.
* gcc/config/loongarch/loongarch-opts.cc: Use the new internal
target representation as input as well as the output.
* gcc/config/loongarch/loongarch-opts.h: Likewise.
* gcc/config/loongarch/loongarch.cc: Likewise
* gcc/config/loongarch/t-linux: Support building GCC libraries
with customized compiler options using specs.
* gcc/doc/invoke.texi: document -m[no-]l[a]sx and -msimd=.
* gcc/doc/install.texi: document --with-multilib-default
and --with-multilib list.
---
 gcc/config.gcc| 253 +
 .../loongarch/genopts/loongarch-strings   |   8 +-
 gcc/config/loongarch/genopts/loongarch.opt.in |  62 +--
 gcc/config/loongarch/la464.md |  32 +-
 gcc/config/loongarch/loongarch-c.cc   |   4 +-
 gcc/config/loongarch/loongarch-cpu.cc | 260 -
 gcc/config/loongarch/loongarch-cpu.h  |   3 +-
 gcc/config/loongarch/loongarch-def.c  |  55 ++-
 gcc/config/loongarch/loongarch-def.h  |  57 +--
 gcc/config/loongarch/loongarch-driver.cc  | 205 +-
 gcc/config/loongarch/loongarch-driver.h   |  40 +-
 gcc/config/loongarch/loongarch-opts.cc| 352 ++

[PATCH v1 3/6] LoongArch: define preprocessing macros "__loongarch_{arch, tune}"

2023-08-13 Thread Yang Yujie
These are exported according to the LoongArch Toolchain Conventions[1]
as a replacement of the obsolete "_LOONGARCH_{ARCH,TUNE}" macros,
which are expanded to strings representing the actual architecture
and microarchitecture of the target.

[1] currently relased at https://github.com/loongson/LoongArch-Documentation
/blob/main/docs/LoongArch-toolchain-conventions-EN.adoc

gcc/ChangeLog:

* gcc/config/loongarch/loongarch-c.cc: Export macros
"__loongarch_{arch,tune}" in the preprocessor.
---
 gcc/config/loongarch/loongarch-c.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/loongarch/loongarch-c.cc 
b/gcc/config/loongarch/loongarch-c.cc
index 660c68f0e06..7bee037cc4a 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -64,6 +64,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", la_target.cpu_arch);
   LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", la_target.cpu_tune);
 
+  LARCH_CPP_SET_PROCESSOR ("__loongarch_arch", la_target.cpu_arch);
+  LARCH_CPP_SET_PROCESSOR ("__loongarch_tune", la_target.cpu_tune);
+
   /* Base architecture / ABI.  */
   if (TARGET_64BIT)
 {
-- 
2.41.0



[PATCH v1 5/6] LoongArch: export headers for building GCC plugins

2023-08-13 Thread Yang Yujie
gcc/ChangeLog:
* gcc/config/loongarch/t-loongarch: include loongarch-def.h,
loongarch-tune.h and loongarch-driver.h in OPTIONS_H_EXTRA.

Co-authored-by: Lulu Cheng 
---
 gcc/config/loongarch/t-loongarch | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
index 4ca08035bc8..ea633c69f7b 100644
--- a/gcc/config/loongarch/t-loongarch
+++ b/gcc/config/loongarch/t-loongarch
@@ -20,6 +20,9 @@
 LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\
 $(filter LA_MULTIARCH_TRIPLET=%,$(tm_defines)))
 
+OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
+  $(srcdir)/config/loongarch/loongarch-tune.h \
+  $(srcdir)/config/loongarch/loongarch-driver.h
 # String definition header
 LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h
 
@@ -62,7 +65,7 @@ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c 
$(LA_STR_H)
 $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
 s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
-   $(srcdir)/config/loongarch/genopts/loongarch-strings $(LA_STR_H)
+   $(srcdir)/config/loongarch/genopts/loongarch-strings
$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \
 $(srcdir)/config/loongarch/genopts/loongarch.opt.in \
 > tmp-loongarch.opt
-- 
2.41.0



[PATCH v1] RISC-V: Support RVV VFWMSAC rounding mode intrinsic API

2023-08-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFWMSAC as the below samples.

* __riscv_vfwmsac_vv_f64m2_rm
* __riscv_vfwmsac_vv_f64m2_rm_m
* __riscv_vfwmsac_vf_f64m2_rm
* __riscv_vfwmsac_vf_f64m2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfwmsac_frm): New class for frm.
(vfwmsac_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwmsac_frm): New intrinsic function definition.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-wmsac.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 25 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 +
 .../riscv/rvv/base/float-point-wmsac.c| 47 +++
 4 files changed, 75 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 4a7f2b8e3e9..5a5da903cb2 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -585,6 +585,29 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfwmsac
+*/
+class vfwmsac_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_scalar (MINUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul (MINUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2365,6 +2388,7 @@ static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
 static CONSTEXPR const vfwnmacc vfwnmacc_obj;
 static CONSTEXPR const vfwnmacc_frm vfwnmacc_frm_obj;
 static CONSTEXPR const vfwmsac vfwmsac_obj;
+static CONSTEXPR const vfwmsac_frm vfwmsac_frm_obj;
 static CONSTEXPR const vfwnmsac vfwnmsac_obj;
 static CONSTEXPR const unop vfsqrt_obj;
 static CONSTEXPR const float_misc vfrsqrt7_obj;
@@ -2610,6 +2634,7 @@ BASE (vfwmacc_frm)
 BASE (vfwnmacc)
 BASE (vfwnmacc_frm)
 BASE (vfwmsac)
+BASE (vfwmsac_frm)
 BASE (vfwnmsac)
 BASE (vfsqrt)
 BASE (vfrsqrt7)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 27c7deb4ec2..09356dd7ac8 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -180,6 +180,7 @@ extern const function_base *const vfwmacc_frm;
 extern const function_base *const vfwnmacc;
 extern const function_base *const vfwnmacc_frm;
 extern const function_base *const vfwmsac;
+extern const function_base *const vfwmsac_frm;
 extern const function_base *const vfwnmsac;
 extern const function_base *const vfsqrt;
 extern const function_base *const vfrsqrt7;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 481c3b899f2..e2a79607d04 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -380,6 +380,8 @@ DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, 
f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
 DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwmsac_frm, alu_frm, full_preds, f_wwfv_ops)
 
 // 13.8. Vector Floating-Point Square-Root Instruction
 DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c
new file mode 100644
index 000..886a0b13695
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wmsac.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat64m2_t
+test_vfwmsac_vv_f32m1_rm (vfloat64m2_t vd, vfloat32m1_t op1, vfloat32m1_t op2,
+ size_t vl) {
+  return __riscv_vfwmsac_vv_f64m2_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat64m2_t
+test_vfwmsac_vv_f32m1_rm_m (vbool32_t mask, vfloat64m2_t vd, vfloat32m1_t op1,
+   vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfwmsac_vv_f64m2_rm_m (mask, vd, op1, op2, 1, vl);
+}
+
+vfloat64m2_t
+test_vfwmsac_vf_f32m1_rm (vfloat64m2

[PATCH] RISC-V: Deduplicate #error messages in testsuite

2023-08-13 Thread Tsukasa OI via Gcc-patches
From: Tsukasa OI 

"#error Feature macro not defined" is required to test the existence of an
extension through the preprocessor.  However, multiple occurrence of the
exact same error message will confuse the developer once an error is
encountered.

This commit replaces such error messages to
"#error Feature macro for `EXT' not defined" to make which
macro is missing.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/zvkn.c: Deduplicate #error messages.
* gcc.target/riscv/zvkn-1.c: Ditto.
* gcc.target/riscv/zvknc.c: Ditto.
* gcc.target/riscv/zvknc-1.c: Ditto.
* gcc.target/riscv/zvknc-2.c: Ditto.
* gcc.target/riscv/zvkng.c: Ditto.
* gcc.target/riscv/zvkng-1.c: Ditto.
* gcc.target/riscv/zvkng-2.c: Ditto.
* gcc.target/riscv/zvks.c: Ditto.
* gcc.target/riscv/zvks-1.c: Ditto.
* gcc.target/riscv/zvksc.c: Ditto.
* gcc.target/riscv/zvksc-1.c: Ditto.
* gcc.target/riscv/zvksc-2.c: Ditto.
* gcc.target/riscv/zvksg.c: Ditto.
* gcc.target/riscv/zvksg-1.c: Ditto.
* gcc.target/riscv/zvksg-2.c: Ditto.
---
 gcc/testsuite/gcc.target/riscv/zvkn-1.c  | 10 +-
 gcc/testsuite/gcc.target/riscv/zvkn.c| 10 +-
 gcc/testsuite/gcc.target/riscv/zvknc-1.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvknc-2.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvknc.c   | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvkng-1.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvkng-2.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvkng.c   | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvks-1.c  | 10 +-
 gcc/testsuite/gcc.target/riscv/zvks.c| 10 +-
 gcc/testsuite/gcc.target/riscv/zvksc-1.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvksc-2.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvksc.c   | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvksg-1.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvksg-2.c | 14 +++---
 gcc/testsuite/gcc.target/riscv/zvksg.c   | 14 +++---
 16 files changed, 104 insertions(+), 104 deletions(-)

diff --git a/gcc/testsuite/gcc.target/riscv/zvkn-1.c 
b/gcc/testsuite/gcc.target/riscv/zvkn-1.c
index 83935b068320..84a3b577a496 100644
--- a/gcc/testsuite/gcc.target/riscv/zvkn-1.c
+++ b/gcc/testsuite/gcc.target/riscv/zvkn-1.c
@@ -3,23 +3,23 @@
 /* { dg-options "-march=rv32gc_zvkned_zvknhb_zvbb_zvkt" { target { rv32 } } } 
*/
 
 #ifndef __riscv_zvkn
-#error Feature macro not defined
+#error Feature macro for `Zvkn' not defined
 #endif
 
 #ifndef __riscv_zvkned
-#error Feature macro not defined
+#error Feature macro for `Zvkned' not defined
 #endif
 
 #ifndef __riscv_zvknhb
-#error Feature macro not defined
+#error Feature macro for `Zvknhb' not defined
 #endif
 
 #ifndef __riscv_zvbb
-#error Feature macro not defined
+#error Feature macro for `Zvbb' not defined
 #endif
 
 #ifndef __riscv_zvkt
-#error Feature macro not defined
+#error Feature macro for `Zvkt' not defined
 #endif
 
 int
diff --git a/gcc/testsuite/gcc.target/riscv/zvkn.c 
b/gcc/testsuite/gcc.target/riscv/zvkn.c
index af3db40c8f68..b42c1881b628 100644
--- a/gcc/testsuite/gcc.target/riscv/zvkn.c
+++ b/gcc/testsuite/gcc.target/riscv/zvkn.c
@@ -3,23 +3,23 @@
 /* { dg-options "-march=rv32gc_zvkn" { target { rv32 } } } */
 
 #ifndef __riscv_zvkn
-#error Feature macro not defined
+#error Feature macro for `Zvkn' not defined
 #endif
 
 #ifndef __riscv_zvkned
-#error Feature macro not defined
+#error Feature macro for `Zvkned' not defined
 #endif
 
 #ifndef __riscv_zvknhb
-#error Feature macro not defined
+#error Feature macro for `Zvknhb' not defined
 #endif
 
 #ifndef __riscv_zvbb
-#error Feature macro not defined
+#error Feature macro for `Zvbb' not defined
 #endif
 
 #ifndef __riscv_zvkt
-#error Feature macro not defined
+#error Feature macro for `Zvkt' not defined
 #endif
 
 int
diff --git a/gcc/testsuite/gcc.target/riscv/zvknc-1.c 
b/gcc/testsuite/gcc.target/riscv/zvknc-1.c
index eca276708954..5c456a6add78 100644
--- a/gcc/testsuite/gcc.target/riscv/zvknc-1.c
+++ b/gcc/testsuite/gcc.target/riscv/zvknc-1.c
@@ -3,31 +3,31 @@
 /* { dg-options "-march=rv32gc_zvkned_zvknhb_zvbb_zvkt_zvbc" { target { rv32 } 
} } */
 
 #ifndef __riscv_zvknc
-#error Feature macro not defined
+#error Feature macro for `Zvknc' not defined
 #endif
 
 #ifndef __riscv_zvkn
-#error Feature macro not defined
+#error Feature macro for `Zvkn' not defined
 #endif
 
 #ifndef __riscv_zvkned
-#error Feature macro not defined
+#error Feature macro for `Zvkned' not defined
 #endif
 
 #ifndef __riscv_zvknhb
-#error Feature macro not defined
+#error Feature macro for `Zvknhb' not defined
 #endif
 
 #ifndef __riscv_zvbb
-#error Feature macro not defined
+#error Feature macro for `Zvbb' not defined
 #endif
 
 #ifndef __riscv_zvkt
-#error Feature macro not defined
+#error Feature macro for `Zvkt' not defined
 #endif
 
 #ifndef __riscv_zvbc
-#error Feature macro not defin

Re: [PATCH] Generate vmovapd instead of vmovsd for moving DFmode between SSE_REGS.

2023-08-13 Thread Hongtao Liu via Gcc-patches
cc

On Mon, Aug 14, 2023 at 10:46 AM liuhongt  wrote:
>
> vmovapd can enable register renaming and have same code size as
> vmovsd. Similar for vmovsh vs vmovaps, vmovaps is 1 byte less than
> vmovsh.
>
> When TARGET_AVX512VL is not available, still generate
> vmovsd/vmovss/vmovsh to avoid vmovapd/vmovaps zmm16-31.
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk?
>
> gcc/ChangeLog:
>
> * config/i386/i386.md (movdf_internal): Generate vmovapd instead of
> vmovsd when moving DFmode between SSE_REGS.
> (movhi_internal): Generate vmovdqa instead of vmovsh when
> moving HImode between SSE_REGS.
> (mov_internal): Use vmovaps instead of vmovsh when
> moving HF/BFmode between SSE_REGS.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/pr89229-4a.c: Adjust testcase.
> ---
>  gcc/config/i386/i386.md| 20 +---
>  gcc/testsuite/gcc.target/i386/pr89229-4a.c |  4 +---
>  2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index c906d75b13e..77182e34fe1 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -2961,8 +2961,12 @@ (define_insn "*movhi_internal"
> ]
> (const_string "TI"))
> (eq_attr "alternative" "12")
> - (cond [(match_test "TARGET_AVX512FP16")
> + (cond [(match_test "TARGET_AVX512VL")
> +  (const_string "TI")
> +(match_test "TARGET_AVX512FP16")
>(const_string "HF")
> +(match_test "TARGET_AVX512F")
> +  (const_string "SF")
>  (match_test "TARGET_AVX")
>(const_string "TI")
>  (ior (not (match_test "TARGET_SSE2"))
> @@ -4099,8 +4103,12 @@ (define_insn "*movdf_internal"
>
>/* movaps is one byte shorter for non-AVX targets.  */
>(eq_attr "alternative" "13,17")
> -(cond [(match_test "TARGET_AVX")
> +(cond [(match_test "TARGET_AVX512VL")
> + (const_string "V2DF")
> +   (match_test "TARGET_AVX512F")
>   (const_string "DF")
> +   (match_test "TARGET_AVX")
> + (const_string "V2DF")
> (ior (not (match_test "TARGET_SSE2"))
>  (match_test "optimize_function_for_size_p 
> (cfun)"))
>   (const_string "V4SF")
> @@ -4380,8 +4388,14 @@ (define_insn "*mov_internal"
>(const_string "HI")
>(const_string "TI"))
>(eq_attr "alternative" "5")
> -(cond [(match_test "TARGET_AVX512FP16")
> +(cond [(match_test "TARGET_AVX512VL")
> +   (const_string "V4SF")
> +   (match_test "TARGET_AVX512FP16")
>   (const_string "HF")
> +   (match_test "TARGET_AVX512F")
> + (const_string "SF")
> +   (match_test "TARGET_AVX")
> + (const_string "V4SF")
> (ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
>  (match_test "TARGET_SSE_SPLIT_REGS"))
>   (const_string "V4SF")
> diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c 
> b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
> index 5bc10d25619..8869650b0ad 100644
> --- a/gcc/testsuite/gcc.target/i386/pr89229-4a.c
> +++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
> @@ -1,4 +1,4 @@
> -/* { dg-do compile { target { ! ia32 } } } */
> +/* { dg-do assemble { target { ! ia32 } } } */
>  /* { dg-options "-O2 -march=skylake-avx512" } */
>
>  extern double d;
> @@ -12,5 +12,3 @@ foo1 (double x)
>asm volatile ("" : "+v" (xmm17));
>d = xmm17;
>  }
> -
> -/* { dg-final { scan-assembler-not "vmovapd" } } */
> --
> 2.31.1
>


-- 
BR,
Hongtao


[PATCH] Generate vmovapd instead of vmovsd for moving DFmode between SSE_REGS.

2023-08-13 Thread liuhongt via Gcc-patches
vmovapd can enable register renaming and have same code size as
vmovsd. Similar for vmovsh vs vmovaps, vmovaps is 1 byte less than
vmovsh.

When TARGET_AVX512VL is not available, still generate
vmovsd/vmovss/vmovsh to avoid vmovapd/vmovaps zmm16-31.

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk?

gcc/ChangeLog:

* config/i386/i386.md (movdf_internal): Generate vmovapd instead of
vmovsd when moving DFmode between SSE_REGS.
(movhi_internal): Generate vmovdqa instead of vmovsh when
moving HImode between SSE_REGS.
(mov_internal): Use vmovaps instead of vmovsh when
moving HF/BFmode between SSE_REGS.

gcc/testsuite/ChangeLog:

* gcc.target/i386/pr89229-4a.c: Adjust testcase.
---
 gcc/config/i386/i386.md| 20 +---
 gcc/testsuite/gcc.target/i386/pr89229-4a.c |  4 +---
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index c906d75b13e..77182e34fe1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -2961,8 +2961,12 @@ (define_insn "*movhi_internal"
]
(const_string "TI"))
(eq_attr "alternative" "12")
- (cond [(match_test "TARGET_AVX512FP16")
+ (cond [(match_test "TARGET_AVX512VL")
+  (const_string "TI")
+(match_test "TARGET_AVX512FP16")
   (const_string "HF")
+(match_test "TARGET_AVX512F")
+  (const_string "SF")
 (match_test "TARGET_AVX")
   (const_string "TI")
 (ior (not (match_test "TARGET_SSE2"))
@@ -4099,8 +4103,12 @@ (define_insn "*movdf_internal"
 
   /* movaps is one byte shorter for non-AVX targets.  */
   (eq_attr "alternative" "13,17")
-(cond [(match_test "TARGET_AVX")
+(cond [(match_test "TARGET_AVX512VL")
+ (const_string "V2DF")
+   (match_test "TARGET_AVX512F")
  (const_string "DF")
+   (match_test "TARGET_AVX")
+ (const_string "V2DF")
(ior (not (match_test "TARGET_SSE2"))
 (match_test "optimize_function_for_size_p (cfun)"))
  (const_string "V4SF")
@@ -4380,8 +4388,14 @@ (define_insn "*mov_internal"
   (const_string "HI")
   (const_string "TI"))
   (eq_attr "alternative" "5")
-(cond [(match_test "TARGET_AVX512FP16")
+(cond [(match_test "TARGET_AVX512VL")
+   (const_string "V4SF")
+   (match_test "TARGET_AVX512FP16")
  (const_string "HF")
+   (match_test "TARGET_AVX512F")
+ (const_string "SF")
+   (match_test "TARGET_AVX")
+ (const_string "V4SF")
(ior (match_test "TARGET_SSE_PARTIAL_REG_DEPENDENCY")
 (match_test "TARGET_SSE_SPLIT_REGS"))
  (const_string "V4SF")
diff --git a/gcc/testsuite/gcc.target/i386/pr89229-4a.c 
b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
index 5bc10d25619..8869650b0ad 100644
--- a/gcc/testsuite/gcc.target/i386/pr89229-4a.c
+++ b/gcc/testsuite/gcc.target/i386/pr89229-4a.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-do assemble { target { ! ia32 } } } */
 /* { dg-options "-O2 -march=skylake-avx512" } */
 
 extern double d;
@@ -12,5 +12,3 @@ foo1 (double x)
   asm volatile ("" : "+v" (xmm17));
   d = xmm17;
 }
-
-/* { dg-final { scan-assembler-not "vmovapd" } } */
-- 
2.31.1



Re: [PATCH V2] Support -m[no-]gather -m[no-]scatter to enable/disable vectorization for all gather/scatter instructions

2023-08-13 Thread Hongtao Liu via Gcc-patches
On Fri, Aug 11, 2023 at 2:02 PM liuhongt via Gcc-patches
 wrote:
>
> Rename original use_gather to use_gather_8parts, Support
> -mtune-ctrl={,^}use_gather to set/clear tune features
> use_gather_{2parts, 4parts, 8parts}. Support the new option -mgather
> as alias of -mtune-ctrl=, use_gather, ^use_gather.
>
> Similar for use_scatter.
>
> How about this version?
I'll commit the patch if there's no objections in the next 24 hours.
>
> gcc/ChangeLog:
>
> * config/i386/i386-builtins.cc
> (ix86_vectorize_builtin_gather): Adjust for use_gather_8parts.
> * config/i386/i386-options.cc (parse_mtune_ctrl_str):
> Set/Clear tune features use_{gather,scatter}_{2parts, 4parts,
> 8parts} for -mtune-crtl={,^}{use_gather,use_scatter}.
> * config/i386/i386.cc (ix86_vectorize_builtin_scatter): Adjust
> for use_scatter_8parts
> * config/i386/i386.h (TARGET_USE_GATHER): Rename to ..
> (TARGET_USE_GATHER_8PARTS): .. this.
> (TARGET_USE_SCATTER): Rename to ..
> (TARGET_USE_SCATTER_8PARTS): .. this.
> * config/i386/x86-tune.def (X86_TUNE_USE_GATHER): Rename to
> (X86_TUNE_USE_GATHER_8PARTS): .. this.
> (X86_TUNE_USE_SCATTER): Rename to
> (X86_TUNE_USE_SCATTER_8PARTS): .. this.
> * config/i386/i386.opt: Add new options mgather, mscatter.
> ---
>  gcc/config/i386/i386-builtins.cc |  2 +-
>  gcc/config/i386/i386-options.cc  | 54 +++-
>  gcc/config/i386/i386.cc  |  2 +-
>  gcc/config/i386/i386.h   |  8 ++---
>  gcc/config/i386/i386.opt |  8 +
>  gcc/config/i386/x86-tune.def |  4 +--
>  6 files changed, 56 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/config/i386/i386-builtins.cc 
> b/gcc/config/i386/i386-builtins.cc
> index 356b6dfd5fb..8a0b8dfe073 100644
> --- a/gcc/config/i386/i386-builtins.cc
> +++ b/gcc/config/i386/i386-builtins.cc
> @@ -1657,7 +1657,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
>   ? !TARGET_USE_GATHER_2PARTS
>   : (known_eq (TYPE_VECTOR_SUBPARTS (mem_vectype), 4u)
>  ? !TARGET_USE_GATHER_4PARTS
> -: !TARGET_USE_GATHER)))
> +: !TARGET_USE_GATHER_8PARTS)))
>  return NULL_TREE;
>
>if ((TREE_CODE (index_type) != INTEGER_TYPE
> diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
> index 127ee24203c..b8d038af69d 100644
> --- a/gcc/config/i386/i386-options.cc
> +++ b/gcc/config/i386/i386-options.cc
> @@ -1731,20 +1731,46 @@ parse_mtune_ctrl_str (struct gcc_options *opts, bool 
> dump)
>curr_feature_string++;
>clear = true;
>  }
> -  for (i = 0; i < X86_TUNE_LAST; i++)
> -{
> -  if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> -{
> -  ix86_tune_features[i] = !clear;
> -  if (dump)
> -fprintf (stderr, "Explicitly %s feature %s\n",
> - clear ? "clear" : "set", 
> ix86_tune_feature_names[i]);
> -  break;
> -}
> -}
> -  if (i == X86_TUNE_LAST)
> -   error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> -  clear ? curr_feature_string - 1 : curr_feature_string);
> +
> +  if (!strcmp (curr_feature_string, "use_gather"))
> +   {
> + ix86_tune_features[X86_TUNE_USE_GATHER_2PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_GATHER_4PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_GATHER_8PARTS] = !clear;
> + if (dump)
> +   fprintf (stderr, "Explicitly %s features use_gather_2parts,"
> +" use_gather_4parts, use_gather_8parts\n",
> +clear ? "clear" : "set");
> +
> +   }
> +  else if (!strcmp (curr_feature_string, "use_scatter"))
> +   {
> + ix86_tune_features[X86_TUNE_USE_SCATTER_2PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_SCATTER_4PARTS] = !clear;
> + ix86_tune_features[X86_TUNE_USE_SCATTER_8PARTS] = !clear;
> + if (dump)
> +   fprintf (stderr, "Explicitly %s features use_scatter_2parts,"
> +" use_scatter_4parts, use_scatter_8parts\n",
> +clear ? "clear" : "set");
> +   }
> +  else
> +   {
> + for (i = 0; i < X86_TUNE_LAST; i++)
> +   {
> + if (!strcmp (curr_feature_string, ix86_tune_feature_names[i]))
> +   {
> + ix86_tune_features[i] = !clear;
> + if (dump)
> +   fprintf (stderr, "Explicitly %s feature %s\n",
> +clear ? "clear" : "set", 
> ix86_tune_feature_names[i]);
> + break;
> +   }
> +   }
> +
> + if (i == X86_TUNE_LAST)
> +   error ("unknown parameter to option %<-mtune-ctrl%>: %s",
> +  clear ? curr_feature_string - 1 : curr_feature_string);
> +

[PATCH v1] RISC-V: Support RVV VFWNMACC rounding mode intrinsic API

2023-08-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFWNMACC as the below samples.

* __riscv_vfwnmacc_vv_f64m2_rm
* __riscv_vfwnmacc_vv_f64m2_rm_m
* __riscv_vfwnmacc_vf_f64m2_rm
* __riscv_vfwnmacc_vf_f64m2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfwnmacc_frm): New class for frm.
(vfwnmacc_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwnmacc_frm): New intrinsic function definition.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-wnmacc.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 25 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  2 +
 .../riscv/rvv/base/float-point-wnmacc.c   | 47 +++
 4 files changed, 75 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmacc.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index e84d6d1d047..4a7f2b8e3e9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -562,6 +562,29 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfwnmacc
+*/
+class vfwnmacc_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_neg_scalar (MINUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_neg (MINUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2340,6 +2363,7 @@ static CONSTEXPR const vfmsub_frm vfmsub_frm_obj;
 static CONSTEXPR const vfwmacc vfwmacc_obj;
 static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
 static CONSTEXPR const vfwnmacc vfwnmacc_obj;
+static CONSTEXPR const vfwnmacc_frm vfwnmacc_frm_obj;
 static CONSTEXPR const vfwmsac vfwmsac_obj;
 static CONSTEXPR const vfwnmsac vfwnmsac_obj;
 static CONSTEXPR const unop vfsqrt_obj;
@@ -2584,6 +2608,7 @@ BASE (vfmsub_frm)
 BASE (vfwmacc)
 BASE (vfwmacc_frm)
 BASE (vfwnmacc)
+BASE (vfwnmacc_frm)
 BASE (vfwmsac)
 BASE (vfwnmsac)
 BASE (vfsqrt)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index acbc7d42fbe..27c7deb4ec2 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -178,6 +178,7 @@ extern const function_base *const vfmsub_frm;
 extern const function_base *const vfwmacc;
 extern const function_base *const vfwmacc_frm;
 extern const function_base *const vfwnmacc;
+extern const function_base *const vfwnmacc_frm;
 extern const function_base *const vfwmsac;
 extern const function_base *const vfwnmsac;
 extern const function_base *const vfsqrt;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 0b73a5bcbc5..481c3b899f2 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -378,6 +378,8 @@ DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops)
 
 DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwnmacc_frm, alu_frm, full_preds, f_wwfv_ops)
 
 // 13.8. Vector Floating-Point Square-Root Instruction
 DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmacc.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmacc.c
new file mode 100644
index 000..2602289ec88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-wnmacc.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat64m2_t
+test_vfwnmacc_vv_f32m1_rm (vfloat64m2_t vd, vfloat32m1_t op1, vfloat32m1_t op2,
+  size_t vl) {
+  return __riscv_vfwnmacc_vv_f64m2_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat64m2_t
+test_vfwnmacc_vv_f32m1_rm_m (vbool32_t mask, vfloat64m2_t vd, vfloat32m1_t op1,
+vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfwnmacc_vv_f64m2_rm_m (mask, vd, op1, op2, 1, vl);
+}
+
+vfloat64m2_t
+test_vfwnmacc_vf_f32m1_rm (vfloat64m2_t vd, float32_t op1, vfloat32m1_t op2,
+  

[committed] MMIX: Switch to lra_in_progress

2023-08-13 Thread Hans-Peter Nilsson
This is just a mechanical update.
It fixes no observed problems for LRA.

* config/mmix/predicates.md (mmix_address_operand): Use
lra_in_progress, not reload_in_progress.
---
 gcc/config/mmix/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/mmix/predicates.md b/gcc/config/mmix/predicates.md
index 64e77fa92d00..3c98f2686aa8 100644
--- a/gcc/config/mmix/predicates.md
+++ b/gcc/config/mmix/predicates.md
@@ -158,7 +158,7 @@ (define_predicate "mmix_reg_or_8bit_operand"
 ;; See also comment above the "*call_real" pattern.
 
 (define_predicate "mmix_address_operand"
-  (if_then_else (match_test "reload_in_progress || reload_completed")
+  (if_then_else (match_test "lra_in_progress || reload_completed")
 (match_test "strict_memory_address_p (Pmode, op)")
 (match_test "memory_address_p (Pmode, op)")))
 
-- 
2.30.2



[committed] MMIX: Re-enable LRA

2023-08-13 Thread Hans-Peter Nilsson
After fixing the one problem for MMIX, there's just one 
test-case regressing between reload and LRA.
-- 8< --

* config/mmix/mmix.cc: Re-enable LRA.
---
 gcc/config/mmix/mmix.cc | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/mmix/mmix.cc b/gcc/config/mmix/mmix.cc
index 5160794d9d03..347430927499 100644
--- a/gcc/config/mmix/mmix.cc
+++ b/gcc/config/mmix/mmix.cc
@@ -274,9 +274,6 @@ static HOST_WIDE_INT mmix_starting_frame_offset (void);
 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS mmix_preferred_output_reload_class
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pmmix_legitimate_address_p
 #undef TARGET_LEGITIMATE_CONSTANT_P
-- 
2.30.2



[PATCHv4, rs6000] Generate mfvsrwz for all subtargets and remove redundant zero extend [PR106769]

2023-08-13 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch modifies vsx extract expand and generates mfvsrwz/stxsiwx
for all sub targets when the mode is V4SI and the extracted element is word
1 from BE order. Also this patch adds a insn pattern for mfvsrwz which
helps eliminate redundant zero extend.

  Compared to last version, the main change is to put the word index
checking in the split condition of "*vsx_extract_v4si_w023". Also modified
some comments.
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625380.html

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.

Thanks
Gui Haochen

ChangeLog
rs6000: Generate mfvsrwz for all platform and remove redundant zero extend

mfvsrwz has lower latency than xxextractuw or vextuw[lr]x.  So it should be
generated even with p9 vector enabled.  Also the instruction is already
zero extended.  A combine pattern is needed to eliminate redundant zero
extend instructions.

gcc/
PR target/106769
* config/rs6000/vsx.md (expand vsx_extract_): Set it only
for V8HI and V16QI.
(vsx_extract_v4si): New expand for V4SI extraction.
(vsx_extract_v4si_w1): New insn pattern for V4SI extraction on
word 1 from BE order.   
(*mfvsrwz): New insn pattern for mfvsrwz.
(*vsx_extract__di_p9): Assert that it won't be generated on
word 1 from BE order.
(*vsx_extract_si): Remove.
(*vsx_extract_v4si_w023): New insn and split pattern on word 0, 2,
3 from BE order.

gcc/testsuite/
PR target/106769
* gcc.target/powerpc/pr106769.h: New.
* gcc.target/powerpc/pr106769-p8.c: New.
* gcc.target/powerpc/pr106769-p9.c: New.

patch.diff
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0a34ceebeb5..1cbdc2f1c01 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -3722,9 +3722,9 @@ (define_insn "vsx_xxpermdi2__1"
 (define_expand  "vsx_extract_"
   [(parallel [(set (match_operand: 0 "gpc_reg_operand")
   (vec_select:
-   (match_operand:VSX_EXTRACT_I 1 "gpc_reg_operand")
+   (match_operand:VSX_EXTRACT_I2 1 "gpc_reg_operand")
(parallel [(match_operand:QI 2 "const_int_operand")])))
- (clobber (match_scratch:VSX_EXTRACT_I 3))])]
+ (clobber (match_scratch:VSX_EXTRACT_I2 3))])]
   "VECTOR_MEM_VSX_P (mode) && TARGET_DIRECT_MOVE_64BIT"
 {
   /* If we have ISA 3.0, we can do a xxextractuw/vextractu{b,h}.  */
@@ -3736,6 +3736,63 @@ (define_expand  "vsx_extract_"
 }
 })

+(define_expand  "vsx_extract_v4si"
+  [(parallel [(set (match_operand:SI 0 "gpc_reg_operand")
+  (vec_select:SI
+   (match_operand:V4SI 1 "gpc_reg_operand")
+   (parallel [(match_operand:QI 2 "const_0_to_3_operand")])))
+ (clobber (match_scratch:V4SI 3))])]
+  "TARGET_DIRECT_MOVE_64BIT"
+{
+  /* The word 1 (BE order) can be extracted by mfvsrwz/stxsiwx.  So just
+ fall through to vsx_extract_v4si_w1.  */
+  if (TARGET_P9_VECTOR
+  && INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2))
+{
+  emit_insn (gen_vsx_extract_v4si_p9 (operands[0], operands[1],
+ operands[2]));
+  DONE;
+}
+})
+
+/* Extract from word 1 (BE order);  */
+(define_insn "vsx_extract_v4si_w1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,wa,Z,wa")
+   (vec_select:SI
+(match_operand:V4SI 1 "gpc_reg_operand" "v,v,v,0")
+(parallel [(match_operand:QI 2 "const_0_to_3_operand" "n,n,n,n")])))
+   (clobber (match_scratch:V4SI 3 "=v,v,v,v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+{
+   if (which_alternative == 0)
+ return "mfvsrwz %0,%x1";
+
+   if (which_alternative == 1)
+ return "xxlor %x0,%x1,%x1";
+
+   if (which_alternative == 2)
+ return "stxsiwx %x1,%y0";
+
+   return ASM_COMMENT_START " vec_extract to same register";
+}
+  [(set_attr "type" "mfvsr,veclogical,fpstore,*")
+   (set_attr "length" "4,4,4,0")
+   (set_attr "isa" "p8v,*,p8v,*")])
+
+(define_insn "*mfvsrwz"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (zero_extend:DI
+ (vec_select:SI
+   (match_operand:V4SI 1 "vsx_register_operand" "wa")
+   (parallel [(match_operand:QI 2 "const_int_operand" "n")]
+   (clobber (match_scratch:V4SI 3 "=v"))]
+  "TARGET_DIRECT_MOVE_64BIT
+   && INTVAL (operands[2]) == (BYTES_BIG_ENDIAN ? 1 : 2)"
+  "mfvsrwz %0,%x1"
+  [(set_attr "type" "mfvsr")
+   (set_attr "isa" "p8v")])
+
 (define_insn "vsx_extract__p9"
   [(set (match_operand: 0 "gpc_reg_operand" "=r,")
(vec_select:
@@ -3807,6 +3864,9 @@ (define_insn_and_split "*vsx_extract__di_p9"
(parallel [(match_dup 2)])))
  (clobber (match_dup 3))])]
 {
+  gcc_assert (mode != V4SImode
+ || INTVAL (operands[2]) != (BYTES_BIG_ENDIAN ? 1 : 2));
+
   operands[4] = gen_rtx_REG (mode, REGNO (operands[0]

[committed] MMIX: Handle LRA FP-to-SP-elimination oddity

2023-08-13 Thread Hans-Peter Nilsson
When LRA is in progress, it can try and validate insns
half-way through frame-pointer (FP) to stack-pointer (SP)
elimination.  Operands have then been substituted where the
offset is from the SP elimination but the register is the
(hard) frame-pointer:

lra-eliminations.cc:lra_eliminate_regs_1:370:
 rtx to = subst_p ? ep->to_rtx : ep->from_rtx;

In this regard reload played nicely.  Unfortunately, the
frame_pointer_operand predicate in mmix/predicates.md barfs
on such an address.  This broke the use of the MMIX
frame_pointer_operand predicate (and the Yf constraint),
used only in the nonlocal_goto_receiver expansion (which is
used in e.g. code generated for C++ "catch").

Force MMIX frame_pointer_operand to accept an FP+offset for
the duration of lra_in_progress.

* config/mmix/predicates.md (frame_pointer_operand): Handle FP+offset
when lra_in_progress.
---
 gcc/config/mmix/predicates.md | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/config/mmix/predicates.md b/gcc/config/mmix/predicates.md
index 4a9b0177a786..64e77fa92d00 100644
--- a/gcc/config/mmix/predicates.md
+++ b/gcc/config/mmix/predicates.md
@@ -171,4 +171,14 @@ (define_predicate "frame_pointer_operand"
 (match_code "plus")
 (match_code "reg" "0")
 (match_code "const_int" "1")
-(match_test "XEXP (op, 0) == stack_pointer_rtx"
+(ior
+ (match_test "XEXP (op, 0) == stack_pointer_rtx")
+ ;; We can temporarily have a FP+offset here, where we (for FP)
+ ;; accept only FP and the equivalent elimination of SP+offset.
+ ;; See lra_eliminate_regs_1 in lra-eliminations.cc c:a line 370:
+ ;;  "rtx to = subst_p ? ep->to_rtx : ep->from_rtx;"
+ (and
+  (match_test "lra_in_progress")
+  (ior
+   (match_test "XEXP (op, 0) == hard_frame_pointer_rtx")
+   (match_test "XEXP (op, 0) == frame_pointer_rtx")))
-- 
2.30.2



[committed] Disable LRA for MMIX.

2023-08-13 Thread Hans-Peter Nilsson
Since the change r14-383-gfaf8bea79b6256 "Enable LRA on
several ports", mmix has been broken building libstdc++-v3:

libtool: compile: /obj/./gcc/xgcc -shared-libgcc -B/obj/./gcc
-nostdinc++ -L/obj/mmix/libstdc++-v3/src
-L/obj/mmix/libstdc++-v3/src/.libs
-L/obj/mmix/libstdc++-v3/libsupc++/.libs -nostdinc -B/obj/mmix/newlib/
-isystem /obj/mmix/newlib/targ-include -isystem
/gcctop/newlib/libc/include -B/obj/mmix/libgloss/mmix
-L/obj/mmix/libgloss/libnosys -L/gcctop/libgloss/mmix
-B/home/hp/tmp/mmix230811-00/pre/mmix/bin/
-B/home/hp/tmp/mmix230811-00/pre/mmix/lib/ -isystem
/home/hp/tmp/mmix230811-00/pre/mmix/include -isystem
/home/hp/tmp/mmix230811-00/pre/mmix/sys-include
-I/gcctop/libstdc++-v3/../libgcc -I/obj/mmix/libstdc++-v3/include/mmix
-I/obj/mmix/libstdc++-v3/include -I/gcctop/libstdc++-v3/libsupc++
-fno-implicit-templates -Wall -Wextra -Wwrite-strings -Wcast-qual
-Wabi=2 -fdiagnostics-show-location=once -ffunction-sections
-fdata-sections -frandom-seed=eh_type.lo -g -O2 -c
/gcctop/libstdc++-v3/libsupc++/eh_type.cc -o eh_type.o
/gcctop/libstdc++-v3/libsupc++/eh_terminate.cc: In function 'void
__cxxabiv1::__terminate(std::terminate_handler)':
/gcctop/libstdc++-v3/libsupc++/eh_terminate.cc:53:1: error: unable to
generate reloads for:

   53 | }
  | ^
(insn 31 36 44 4 (parallel [
(unspec_volatile [
(plus:DI (reg/f:DI 253 $253)
(const_int 24 [0x18]))
] 1)
(clobber (reg:DI 275))
(clobber (reg:DI 259 rJ))
]) "/gcctop/libstdc++-v3/libsupc++/eh_terminate.cc":51:3
 discrim 1 63 {*nonlocal_goto_receiver_expanded}
 (expr_list:REG_UNUSED (reg:DI 275)
(expr_list:REG_UNUSED (reg:DI 259 rJ)
(nil
during RTL pass: reload
/gcctop/libstdc++-v3/libsupc++/eh_terminate.cc:53:1:
internal compiler error: in curr_insn_transform, at lra-constraints.cc:4281

This commit temporarily reverts the MMIX part of
r14-383-gfaf8bea79b6256 back to reload.

* config/mmix/mmix.cc: Disable LRA for MMIX.
---
 gcc/config/mmix/mmix.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/config/mmix/mmix.cc b/gcc/config/mmix/mmix.cc
index 347430927499..5160794d9d03 100644
--- a/gcc/config/mmix/mmix.cc
+++ b/gcc/config/mmix/mmix.cc
@@ -274,6 +274,9 @@ static HOST_WIDE_INT mmix_starting_frame_offset (void);
 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS mmix_preferred_output_reload_class
 
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_false
+
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_Pmmix_legitimate_address_p
 #undef TARGET_LEGITIMATE_CONSTANT_P
-- 
2.30.2



Re: [committed] Enable LRA on several ports

2023-08-13 Thread Hans-Peter Nilsson
On Mon, 1 May 2023, Jeff Law wrote:

> 
> Spurred by Segher's RFC, I went ahead and tested several ports with LRA
> enabled.  Not surprisingly, many failed, but a few built their full set of
> libraries successful and of those a few even ran their testsuites with no
> regressions.  In fact, enabling LRA fixes a small number of failures on the
> iq2000 port.
> 
> This patch converts the ports which built their libraries and have test
> results that are as good as or better than without LRA.There may be minor
> code quality regressions or there may be minor code quality improvements --
> I'm leaving that for the port maintainers to own going forward.

How do you configure your builds?  Perhaps your cross-builds 
exclude C++?  I found that this (r14-383) broke MMIX building 
libstdc++-v3 from that commit up to and including r14-3180.
See commit r14-3187.

Thankfully there was just one single gotcha.  I temporarily 
reverted the LRA change for MMIX so that I can get honest 
repeatable baseline results.  There seems to have been one 
test-case regressing from the LRA switch (PR53948), thus I 
re-enabled LRA for MMIX again.  Sorry for the late reaction.

brgds, H-P


RE: [PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API

2023-08-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, August 14, 2023 9:04 AM
To: Li, Pan2 ; gcc-patches 
Cc: Li, Pan2 ; Wang, Yanzhang ; 
kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API

LGTm


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-08-13 16:02
To: gcc-patches
CC: juzhe.zhong; 
pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API
From: Pan Li mailto:pan2...@intel.com>>

This patch would like to support the rounding mode API for the
VFWMACC as the below samples.

* __riscv_vfwmacc_vv_f64m2_rm
* __riscv_vfwmacc_vv_f64m2_rm_m
* __riscv_vfwmacc_vf_f64m2_rm
* __riscv_vfwmacc_vf_f64m2_rm_m

Signed-off-by: Pan Li mailto:pan2...@intel.com>>

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfwmacc_frm): New class for vfwmacc frm.
(vfwmacc_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwmacc_frm): Function definition for vfwmacc.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_widen_ternop_insn): Add frm support.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-fwmacc.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  3 ++
gcc/config/riscv/riscv-vector-builtins.cc | 22 +++--
.../riscv/rvv/base/float-point-fwmacc.c   | 47 +++
5 files changed, 93 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-fwmacc.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index e14e9aa7809..e84d6d1d047 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -539,6 +539,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfwmacc
+*/
+class vfwmacc_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2315,6 +2338,7 @@ static CONSTEXPR const vfnmadd_frm vfnmadd_frm_obj;
static CONSTEXPR const vfmsub vfmsub_obj;
static CONSTEXPR const vfmsub_frm vfmsub_frm_obj;
static CONSTEXPR const vfwmacc vfwmacc_obj;
+static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
static CONSTEXPR const vfwnmacc vfwnmacc_obj;
static CONSTEXPR const vfwmsac vfwmsac_obj;
static CONSTEXPR const vfwnmsac vfwnmsac_obj;
@@ -2558,6 +2582,7 @@ BASE (vfnmadd_frm)
BASE (vfmsub)
BASE (vfmsub_frm)
BASE (vfwmacc)
+BASE (vfwmacc_frm)
BASE (vfwnmacc)
BASE (vfwmsac)
BASE (vfwnmsac)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index e60cebab4ae..acbc7d42fbe 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -176,6 +176,7 @@ extern const function_base *const vfnmadd_frm;
extern const function_base *const vfmsub;
extern const function_base *const vfmsub_frm;
extern const function_base *const vfwmacc;
+extern const function_base *const vfwmacc_frm;
extern const function_base *const vfwnmacc;
extern const function_base *const vfwmsac;
extern const function_base *const vfwnmsac;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index d75b281eebe..0b73a5bcbc5 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -376,6 +376,9 @@ DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwfv_ops)
DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwvv_ops)
DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+
// 13.8. Vector Floating-Point Square-Root Instruction
DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index abab06c00ed..ad4a9098620 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+

RE: [PATCH v1] RISC-V: Support RVV VFNMSUB rounding mode intrinsic API

2023-08-13 Thread Li, Pan2 via Gcc-patches
Committed, thanks Juzhe.

Pan

From: juzhe.zh...@rivai.ai 
Sent: Monday, August 14, 2023 9:04 AM
To: Li, Pan2 ; gcc-patches 
Cc: jeffreyalaw ; Li, Pan2 ; Wang, 
Yanzhang ; kito.cheng 
Subject: Re: [PATCH v1] RISC-V: Support RVV VFNMSUB rounding mode intrinsic API

LGTM


juzhe.zh...@rivai.ai

From: pan2.li
Date: 2023-08-12 12:48
To: gcc-patches
CC: juzhe.zhong; 
jeffreyalaw; pan2.li; 
yanzhang.wang; 
kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFNMSUB rounding mode intrinsic API
From: Pan Li mailto:pan2...@intel.com>>

This patch would like to support the rounding mode API for the
VFNMSUB as the below samples.

* __riscv_vfnmsub_vv_f32m1_rm
* __riscv_vfnmsub_vv_f32m1_rm_m
* __riscv_vfnmsub_vf_f32m1_rm
* __riscv_vfnmsub_vf_f32m1_rm_m

Signed-off-by: Pan Li mailto:pan2...@intel.com>>

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfnmsub_frm): New class for vfnmsub frm.
(vfnmsub_frm): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfnmsub_frm): New function declaration.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-nmsub.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 +
.../riscv/rvv/base/float-point-nmsub.c| 47 +++
4 files changed, 75 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 381bc72c784..e14e9aa7809 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -516,6 +516,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfnmsub
+*/
+class vfnmsub_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_ternop_insn (
+ false, code_for_pred_mul_neg_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_ternop_insn (
+ false, code_for_pred_mul_neg (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2282,6 +2305,7 @@ static CONSTEXPR const vfnmsac_frm vfnmsac_frm_obj;
static CONSTEXPR const vfmadd vfmadd_obj;
static CONSTEXPR const vfmadd_frm vfmadd_frm_obj;
static CONSTEXPR const vfnmsub vfnmsub_obj;
+static CONSTEXPR const vfnmsub_frm vfnmsub_frm_obj;
static CONSTEXPR const vfnmacc vfnmacc_obj;
static CONSTEXPR const vfnmacc_frm vfnmacc_frm_obj;
static CONSTEXPR const vfmsac vfmsac_obj;
@@ -2524,6 +2548,7 @@ BASE (vfnmsac_frm)
BASE (vfmadd)
BASE (vfmadd_frm)
BASE (vfnmsub)
+BASE (vfnmsub_frm)
BASE (vfnmacc)
BASE (vfnmacc_frm)
BASE (vfmsac)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 99cfbfd78c8..e60cebab4ae 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -166,6 +166,7 @@ extern const function_base *const vfnmsac_frm;
extern const function_base *const vfmadd;
extern const function_base *const vfmadd_frm;
extern const function_base *const vfnmsub;
+extern const function_base *const vfnmsub_frm;
extern const function_base *const vfnmacc;
extern const function_base *const vfnmacc_frm;
extern const function_base *const vfmsac;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 75235ec01d3..d75b281eebe 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -363,6 +363,8 @@ DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, 
f__ops)
DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvfv_ops)
DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f__ops)
DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvfv_ops)
+DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f__ops)
+DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvfv_ops)
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwvv_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c
new file mode 100644
index 000..1b3e939b1e1
--- /dev/null
+++ b/gcc/test

[pushed]LRA]: Fix asserts for output stack pointer reloads

2023-08-13 Thread Vladimir Makarov via Gcc-patches
The following patch fixes useless asserts in my latest patch 
implementing output stack pointer reloads.
commit 18b417fe1a46d37738243267c1f559cd0acc4886
Author: Vladimir N. Makarov 
Date:   Sun Aug 13 20:54:58 2023 -0400

[LRA]: Fix asserts for output stack pointer reloads

The patch implementing output stack pointer reloads contained superfluous
asserts.  The patch makes them useful.

gcc/ChangeLog:

* lra-constraints.cc (curr_insn_transform): Set done_p up and
check it on true after processing output stack pointer reload.

diff --git a/gcc/lra-constraints.cc b/gcc/lra-constraints.cc
index 26239908747..8d9443adeb6 100644
--- a/gcc/lra-constraints.cc
+++ b/gcc/lra-constraints.cc
@@ -4852,6 +4852,7 @@ curr_insn_transform (bool check_only_p)
&& SET_DEST (set) == stack_pointer_rtx)
  {
lra_assert (!done_p);
+   done_p = true;
curr_id->sp_offset = 0;
lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
id->sp_offset = sp_offset;
@@ -4860,7 +4861,7 @@ curr_insn_transform (bool check_only_p)
   "Moving sp offset from insn %u to %u\n",
   INSN_UID (curr_insn), INSN_UID (insn));
  }
-  lra_assert (!done_p);
+  lra_assert (done_p);
 }
   return change_p;
 }


Re: [PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API

2023-08-13 Thread juzhe.zh...@rivai.ai
LGTm



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-13 16:02
To: gcc-patches
CC: juzhe.zhong; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFWMACC as the below samples.
 
* __riscv_vfwmacc_vv_f64m2_rm
* __riscv_vfwmacc_vv_f64m2_rm_m
* __riscv_vfwmacc_vf_f64m2_rm
* __riscv_vfwmacc_vf_f64m2_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfwmacc_frm): New class for vfwmacc frm.
(vfwmacc_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwmacc_frm): Function definition for vfwmacc.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_widen_ternop_insn): Add frm support.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-fwmacc.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  3 ++
gcc/config/riscv/riscv-vector-builtins.cc | 22 +++--
.../riscv/rvv/base/float-point-fwmacc.c   | 47 +++
5 files changed, 93 insertions(+), 5 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-fwmacc.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index e14e9aa7809..e84d6d1d047 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -539,6 +539,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfwmacc
+*/
+class vfwmacc_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+ code_for_pred_widen_mul (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2315,6 +2338,7 @@ static CONSTEXPR const vfnmadd_frm vfnmadd_frm_obj;
static CONSTEXPR const vfmsub vfmsub_obj;
static CONSTEXPR const vfmsub_frm vfmsub_frm_obj;
static CONSTEXPR const vfwmacc vfwmacc_obj;
+static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
static CONSTEXPR const vfwnmacc vfwnmacc_obj;
static CONSTEXPR const vfwmsac vfwmsac_obj;
static CONSTEXPR const vfwnmsac vfwnmsac_obj;
@@ -2558,6 +2582,7 @@ BASE (vfnmadd_frm)
BASE (vfmsub)
BASE (vfmsub_frm)
BASE (vfwmacc)
+BASE (vfwmacc_frm)
BASE (vfwnmacc)
BASE (vfwmsac)
BASE (vfwnmsac)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index e60cebab4ae..acbc7d42fbe 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -176,6 +176,7 @@ extern const function_base *const vfnmadd_frm;
extern const function_base *const vfmsub;
extern const function_base *const vfmsub_frm;
extern const function_base *const vfwmacc;
+extern const function_base *const vfwmacc_frm;
extern const function_base *const vfwnmacc;
extern const function_base *const vfwmsac;
extern const function_base *const vfwnmsac;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index d75b281eebe..0b73a5bcbc5 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -376,6 +376,9 @@ DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwfv_ops)
DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwvv_ops)
DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops)
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+
// 13.8. Vector Floating-Point Square-Root Instruction
DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index abab06c00ed..ad4a9098620 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3771,17 +3771,29 @@ function_expander::use_widen_ternop_insn (insn_code 
icode)
 add_all_one_mask_operand (mask_mode ());
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
-add_input_operand (argno);
+{
+  if (base->has_rounding_mode_operand_p ()
+   && argno == call_expr_nargs (exp) - 2)
+ {
+   /* Since the rounding mode argument position is not consistent with
+  the instruction pattern, we need to skip rounding mode argument
+

Re: [PATCH v1] RISC-V: Support RVV VFNMSUB rounding mode intrinsic API

2023-08-13 Thread juzhe.zh...@rivai.ai
LGTM



juzhe.zh...@rivai.ai
 
From: pan2.li
Date: 2023-08-12 12:48
To: gcc-patches
CC: juzhe.zhong; jeffreyalaw; pan2.li; yanzhang.wang; kito.cheng
Subject: [PATCH v1] RISC-V: Support RVV VFNMSUB rounding mode intrinsic API
From: Pan Li 
 
This patch would like to support the rounding mode API for the
VFNMSUB as the below samples.
 
* __riscv_vfnmsub_vv_f32m1_rm
* __riscv_vfnmsub_vv_f32m1_rm_m
* __riscv_vfnmsub_vf_f32m1_rm
* __riscv_vfnmsub_vf_f32m1_rm_m
 
Signed-off-by: Pan Li 
 
gcc/ChangeLog:
 
* config/riscv/riscv-vector-builtins-bases.cc
(class vfnmsub_frm): New class for vfnmsub frm.
(vfnmsub_frm): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfnmsub_frm): New function declaration.
 
gcc/testsuite/ChangeLog:
 
* gcc.target/riscv/rvv/base/float-point-nmsub.c: New test.
---
.../riscv/riscv-vector-builtins-bases.cc  | 25 ++
.../riscv/riscv-vector-builtins-bases.h   |  1 +
.../riscv/riscv-vector-builtins-functions.def |  2 +
.../riscv/rvv/base/float-point-nmsub.c| 47 +++
4 files changed, 75 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c
 
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 381bc72c784..e14e9aa7809 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -516,6 +516,29 @@ public:
   }
};
+/* Implements below instructions for frm
+   - vfnmsub
+*/
+class vfnmsub_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_ternop_insn (
+ false, code_for_pred_mul_neg_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_ternop_insn (
+ false, code_for_pred_mul_neg (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
/* Implements vrsub.  */
class vrsub : public function_base
{
@@ -2282,6 +2305,7 @@ static CONSTEXPR const vfnmsac_frm vfnmsac_frm_obj;
static CONSTEXPR const vfmadd vfmadd_obj;
static CONSTEXPR const vfmadd_frm vfmadd_frm_obj;
static CONSTEXPR const vfnmsub vfnmsub_obj;
+static CONSTEXPR const vfnmsub_frm vfnmsub_frm_obj;
static CONSTEXPR const vfnmacc vfnmacc_obj;
static CONSTEXPR const vfnmacc_frm vfnmacc_frm_obj;
static CONSTEXPR const vfmsac vfmsac_obj;
@@ -2524,6 +2548,7 @@ BASE (vfnmsac_frm)
BASE (vfmadd)
BASE (vfmadd_frm)
BASE (vfnmsub)
+BASE (vfnmsub_frm)
BASE (vfnmacc)
BASE (vfnmacc_frm)
BASE (vfmsac)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index 99cfbfd78c8..e60cebab4ae 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -166,6 +166,7 @@ extern const function_base *const vfnmsac_frm;
extern const function_base *const vfmadd;
extern const function_base *const vfmadd_frm;
extern const function_base *const vfnmsub;
+extern const function_base *const vfnmsub_frm;
extern const function_base *const vfnmacc;
extern const function_base *const vfnmacc_frm;
extern const function_base *const vfmsac;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 75235ec01d3..d75b281eebe 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -363,6 +363,8 @@ DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, 
f__ops)
DEF_RVV_FUNCTION (vfnmadd_frm, alu_frm, full_preds, f_vvfv_ops)
DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f__ops)
DEF_RVV_FUNCTION (vfmsub_frm, alu_frm, full_preds, f_vvfv_ops)
+DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f__ops)
+DEF_RVV_FUNCTION (vfnmsub_frm, alu_frm, full_preds, f_vvfv_ops)
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
DEF_RVV_FUNCTION (vfwmacc, alu, full_preds, f_wwvv_ops)
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c 
b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c
new file mode 100644
index 000..1b3e939b1e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/float-point-nmsub.c
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+typedef float float32_t;
+
+vfloat32m1_t
+test_riscv_vfnmsub_vv_f32m1_rm (vfloat32m1_t vd, vfloat32m1_t op1,
+ vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfnmsub_vv_f32m1_rm (vd, op1, op2, 0, vl);
+}
+
+vfloat32m1_t
+test_vfnmsub_vv_f32m1_rm_m (vbool32_t mask, vfloat32m1_t vd, vfloat32m1_t op1,
+ vfloat32m1_t op2, size_t vl) {
+  return __riscv_vfnmsub_vv_f32m1_rm

Re: [PATCH 0/3] fortran: fix length one character dummy args [PR110419]

2023-08-13 Thread Harald Anlauf via Gcc-patches

Hi Mikael,

Am 09.08.23 um 22:21 schrieb Mikael Morin via Gcc-patches:

Hello,

I propose with this patchset a fix for the test value_9.f90 which has been
failing on 32 bits powerpc since it was added a few weeks back (see PR110360
and PR110419).

The problem is an argument type mismatch between a procedure declaration,
and the argument value for a call of that same procedure, in the specific
case of length one character dummy arguments with the value attribute.
Admittedly, our argument passing conventions [1] for those are currently
unspecified.

Before PR110360, character dummy arguments with value attribute were
arrays passed by value, but the actual argument was still passed as
reference.  PR110360 changed that to pass length one dummies as bare
character (i.e. scalar integer), like in the bind(c) case (but with length
argument still present).  However, the argument type in the function declaration
wasn't changed at the same time, so the test was failing on big-endian 32 bits
targets.  Surprisingly, on most targets the middle-end, back-end and runtime
are happy to get a scalar value passed where a length one array is expected.

This can be fixed, either by reverting back to arguments represented as
arrays passed by value with calls fixed, or by keeping the new
representation with single characters for arguments and fixing the procedure
types accordingly.

I haven't really tried the first way, this is using the second one.
The first patch is a preliminary refactoring.  The main change is the
second patch.  It modifies the types of length one character dummy argsuments
with value attribute in the procedure declarations, so that they are scalar
integer types, consistently with how arguments are passed for calls.
The third patch is a change of error codes in the testcase.

I have regression tested this on x86_64-unknown-linux-gnu, and
powerpc64-unknown-linux-gnu (both -m32 and -m64).
OK for master?


this looks good to me.

There was only one thing I was uncertain what the right way is:
you chose to use mpz_cmp_ui in the length check in the new helper
function gfc_length_one_character_type_p, while in many other places
the length check uses mpz_cmp_si.

Admittedly, a negative (effective/declared) character length can never
occur, except maybe at intermediate times during resolution before this
is fixed up in accordance with the standard.  So this is probably more
a cosmetic decision, and you can decide to use either variant.

Thanks for the patch!

Harald



[1] https://gcc.gnu.org/onlinedocs/gfortran/Argument-passing-conventions.html


Mikael Morin (3):
   fortran: New predicate gfc_length_one_character_type_p
   fortran: Fix length one character dummy arg type [PR110419]
   testsuite: Use distinct explicit error codes in value_9.f90

  gcc/fortran/check.cc  |   7 +-
  gcc/fortran/decl.cc   |   4 +-
  gcc/fortran/gfortran.h|  15 +++
  gcc/fortran/trans-expr.cc |  39 ---
  gcc/fortran/trans-types.cc|   5 +-
  gcc/testsuite/gfortran.dg/bind_c_usage_13.f03 |   8 +-
  gcc/testsuite/gfortran.dg/value_9.f90 | 108 +-
  7 files changed, 103 insertions(+), 83 deletions(-)





Re: [PATCH 2/2] ipa-cp: Feed results of IPA-CP into value numbering

2023-08-13 Thread Martin Jambor
Hello Richi,

it took me quite time to get back to this but it might have actually
helped because it forced me to re-read the code around and in turn
simplify the patch.

On Mon, Jun 12 2023, Richard Biener wrote:
> On Fri, 9 Jun 2023, Martin Jambor wrote:
>

[...]

>> @@ -2327,7 +2330,7 @@ vn_walk_cb_data::push_partial_def (pd_data pd,
>> with the current VUSE and performs the expression lookup.  */
>>  
>>  static void *
>> -vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, tree vuse, void *data_)
>> +vn_reference_lookup_2 (ao_ref *op, tree vuse, void *data_)
>>  {
>>vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
>>vn_reference_t vr = data->vr;
>> @@ -2361,6 +2364,38 @@ vn_reference_lookup_2 (ao_ref *op ATTRIBUTE_UNUSED, 
>> tree vuse, void *data_)
>>return *slot;
>>  }
>>  
>> +  if (SSA_NAME_IS_DEFAULT_DEF (vuse)
>  && data->partial_defs.is_empty ())
>
> ^^ do this check early

The check is actually done right at the beginning of the function
already so I simply removed it.

>
>> +{
>> +  HOST_WIDE_INT offset, size;
>> +  tree v = NULL_TREE;
>  tree base = ao_ref_base (op);
>  if ((TREE_CODE (base) == PARM_DECL
>   || TREE_CODE (base) == MEM_REF)
>
> handle both kind of bases with ...
>
>> +  && op->offset.is_constant (&offset)
>> +  && op->size.is_constant (&size)
>> +  && op->max_size_known_p ()
>> +  && known_eq (op->size, op->max_size))
>
> ^^^ this preconditions (would have been missing in the MEM_REF branch
> before)

I missed that call to ao_ref_base fills in these fields - and in the
pointer case that they are not filled in without it.  I hope the patch
below is the simplified version you wanted.

The patch passed bootstrap and testing and also LTO bootstrap on
x86_64-linux.

Thanks,

Martin



PRs 68930 and 92497 show that when IPA-CP figures out constants in
aggregate parameters or when passed by reference but the loads happen
in an inlined function the information is lost.  This happens even
when the inlined function itself was known to have - or even cloned to
have - such constants in incoming parameters because the transform
phase of IPA passes is not run on them.  See discussion in the bugs
for reasons why.

Honza suggested that we can plug the results of IPA-CP analysis into
value numbering, so that FRE can figure out that some loads fetch
known constants.  This is what this patch attempts to do.  The patch
does not attempt to populate partial_defs with information from
IPA-CP, this can be hopefully added as a follow-up.

gcc/ChangeLog:

2023-08-11  Martin Jambor  

PR ipa/68930
PR ipa/92497
* ipa-prop.h (ipcp_get_aggregate_const): Declare.
* ipa-prop.cc (ipcp_get_aggregate_const): New function.
(ipcp_transform_function): Do not deallocate transformation info.
* tree-ssa-sccvn.cc: Include alloc-pool.h, symbol-summary.h and
ipa-prop.h.
(vn_reference_lookup_2): When hitting default-def vuse, query
IPA-CP transformation info for any known constants.

gcc/testsuite/ChangeLog:

2023-06-07  Martin Jambor  

PR ipa/68930
PR ipa/92497
* gcc.dg/ipa/pr92497-1.c: New test.
* gcc.dg/ipa/pr92497-2.c: Likewise.
---
 gcc/ipa-prop.cc  | 33 +++
 gcc/ipa-prop.h   |  3 +++
 gcc/testsuite/gcc.dg/ipa/pr92497-1.c | 26 +
 gcc/testsuite/gcc.dg/ipa/pr92497-2.c | 26 +
 gcc/tree-ssa-sccvn.cc| 34 +++-
 5 files changed, 116 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr92497-1.c
 create mode 100644 gcc/testsuite/gcc.dg/ipa/pr92497-2.c

diff --git a/gcc/ipa-prop.cc b/gcc/ipa-prop.cc
index 4f6ed7b89bd..9efaa5cb848 100644
--- a/gcc/ipa-prop.cc
+++ b/gcc/ipa-prop.cc
@@ -5760,6 +5760,34 @@ ipcp_modif_dom_walker::before_dom_children (basic_block 
bb)
   return NULL;
 }
 
+/* If IPA-CP discovered a constant in parameter PARM at OFFSET of a given SIZE
+   - whether passed by reference or not is given by BY_REF - return that
+   constant.  Otherwise return NULL_TREE.  */
+
+tree
+ipcp_get_aggregate_const (struct function *func, tree parm, bool by_ref,
+ HOST_WIDE_INT bit_offset, HOST_WIDE_INT bit_size)
+{
+  cgraph_node *node = cgraph_node::get (func->decl);
+  ipcp_transformation *ts = ipcp_get_transformation_summary (node);
+
+  if (!ts || !ts->m_agg_values)
+return NULL_TREE;
+
+  int index = ts->get_param_index (func->decl, parm);
+  if (index < 0)
+return NULL_TREE;
+
+  ipa_argagg_value_list avl (ts);
+  unsigned unit_offset = bit_offset / BITS_PER_UNIT;
+  tree v = avl.get_value (index, unit_offset, by_ref);
+  if (!v
+  || maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (v))), bit_size))
+return NULL_TREE;
+
+  return v;
+}
+
 /* Return true if we have recorded VALUE and MASK about PARM.
Set VALUE and MASk

Re: [PATCH v4 1/8] libcpp: Add LC_GEN linemaps to support in-memory buffers

2023-08-13 Thread Lewis Hyatt via Gcc-patches
On Fri, Aug 11, 2023 at 06:45:31PM -0400, David Malcolm wrote:
> On Wed, 2023-08-09 at 18:14 -0400, Lewis Hyatt wrote:
> 
> Hi Lewis, thanks for the patch...
> 
> > Add a new linemap reason LC_GEN which enables encoding the location of data
> > that was generated during compilation and does not appear in any source 
> > file.
> > There could be many use cases, such as, for instance, referring to the 
> > content
> > of builtin macros (not yet implemented, but an easy lift after this one.) 
> > The
> > first intended application is to create a place to store the input to a
> > _Pragma directive, so that proper locations can be assigned to those
> > tokens. This will be done in a subsequent commit.
> > 
> > The TO_FILE member of struct line_map_ordinary has been changed to a union
> > named SRC which can be either a file name, or a pointer to a line_map_data
> > struct describing the data. There is no space overhead added to the line
> > maps data structures.
> > 
> > Outside libcpp, this patch includes only the minimal changes implied by the
> > adjustment from TO_FILE to SRC in struct line_map_ordinary. Subsequent
> > patches will implement the new functionality.
> > 
> > libcpp/ChangeLog:
> > 
> > * include/line-map.h (enum lc_reason): Add LC_GEN.
> > (struct line_map_data): New struct.
> > (struct line_map_ordinary): Change TO_FILE from a char* to a union,
> > and rename to SRC.
> > (class source_id): New class.
> > (ORDINARY_MAP_GENERATED_DATA_P): New function.
> > (ORDINARY_MAP_GENERATED_DATA): New function.
> > (ORDINARY_MAP_GENERATED_DATA_LEN): New function.
> > (ORDINARY_MAP_SOURCE_ID): New function.
> > (ORDINARY_MAPS_SAME_FILE_P): New function.
> > (ORDINARY_MAP_CONTAINING_FILE_NAME): Declare.
> > (LINEMAP_FILE): Adapt to struct line_map_ordinary change.
> > (linemap_get_file_highest_location): Likewise.
> > * line-map.cc (source_id::operator==): New function.
> > (ORDINARY_MAP_CONTAINING_FILE_NAME): New function.
> > (linemap_add): Support creating LC_GEN maps.
> > (linemap_line_start): Support LC_GEN maps.
> > (linemap_check_files_exited): Likewise.
> > (linemap_position_for_loc_and_offset): Likewise.
> > (linemap_get_expansion_filename): Likewise.
> > (linemap_dump): Likewise.
> > (linemap_dump_location): Likewise.
> > (linemap_get_file_highest_location): Likewise.
> > * directives.cc (_cpp_do_file_change): Likewise.
> > 
> > gcc/c-family/ChangeLog:
> > 
> > * c-common.cc (try_to_locate_new_include_insertion_point): Recognize
> > and ignore LC_GEN maps.
> > 
> > gcc/cp/ChangeLog:
> > 
> > * module.cc (module_state::write_ordinary_maps): Recognize and
> > ignore LC_GEN maps, and adapt to interface change in struct
> > line_map_ordinary.
> > (module_state::read_ordinary_maps): Likewise.
> > 
> > gcc/ChangeLog:
> > 
> > * diagnostic-show-locus.cc (compatible_locations_p): Adapt to
> > interface change in struct line_map_ordinary.
> > * input.cc (special_fname_generated): New function.
> > (dump_location_info): Support LC_GEN maps.
> > (get_substring_ranges_for_loc): Adapt to interface change in struct
> > line_map_ordinary.
> > * input.h (special_fname_generated): Declare.
> > 
> > gcc/go/ChangeLog:
> > 
> > * go-linemap.cc (Gcc_linemap::to_string): Recognize and ignore
> > LC_GEN maps.
> > ---
> >  gcc/c-family/c-common.cc |  11 ++-
> >  gcc/cp/module.cc |   8 +-
> >  gcc/diagnostic-show-locus.cc |   2 +-
> >  gcc/go/go-linemap.cc |   3 +-
> >  gcc/input.cc |  27 +-
> >  gcc/input.h  |   1 +
> >  libcpp/directives.cc |   4 +-
> >  libcpp/include/line-map.h    | 144 
> >  libcpp/line-map.cc   | 181 +--
> >  9 files changed, 299 insertions(+), 82 deletions(-)
> 
> [...snip...]
> 
> > 
> > diff --git a/gcc/diagnostic-show-locus.cc b/gcc/diagnostic-show-locus.cc
> > index 0514815b51f..a2aa6b4e0b5 100644
> > --- a/gcc/diagnostic-show-locus.cc
> > +++ b/gcc/diagnostic-show-locus.cc
> > @@ -998,7 +998,7 @@ compatible_locations_p (location_t loc_a, location_t 
> > loc_b)
> >  are in the same file.  */
> >    const line_map_ordinary *ord_map_a = linemap_check_ordinary (map_a);
> >    const line_map_ordinary *ord_map_b = linemap_check_ordinary (map_b);
> > -  return ord_map_a->to_file == ord_map_b->to_file;
> > +  return ORDINARY_MAPS_SAME_FILE_P (ord_map_a, ord_map_b);
> 
> My first thought here was: are buffers supported here, or does it have
> to be a file?
> 
> It turns out that ORDINARY_MAPS_SAME_FILE_P works on both files and
> buffers.
> 
> This suggests that it would be better named as
> ORDINARY_MAPS_SAME_SOURCE_ID_P, but note the com

Re: [RFC PATCH 0/2] RISC-V: __builtin_riscv_pause for all environment

2023-08-13 Thread Andrew Waterman via Gcc-patches
On Sun, Aug 13, 2023 at 12:53 PM Philipp Tomsich
 wrote:
>
> On Sat, 12 Aug 2023 at 01:31, Jeff Law via Gcc-patches
>  wrote:
> >
> >
> >
> > On 8/9/23 16:39, Tsukasa OI wrote:
> > > On 2023/08/10 5:05, Jeff Law wrote:
> >
> > >> I'd tend to think we do not want to expose the intrinsic unless the
> > >> right extensions are enabled -- even though the encoding is a no-op and
> > >> we could emit it as a .insn.
> > >
> > > I think that makes sense.  The only reason I implemented the
> > > no-'Zihintpause' version is because GCC 13 implemented the built-in
> > > unconditionally.  If the compatibility breakage is considered minimum (I
> > > don't know, though), I'm ready to submit 'Zihintpause'-only version of
> > > this patch set.
> > While it's a compatibility break I don't think we have a need to
> > preserve this kind of compatibility.  I suspect anyone using
> > __builtin_riscv_pause was probably already turning on Zihintpause and if
> > they weren't they should have been :-0
> >
> >
> > I'm sure we'll kick this around in the Tuesday meeting and hopefully
> > make a decision about the desired direction.  You're obviously welcome
> > to join if you're inclined.  Let me know if you need an invite.
>
> The original discussion (and I believe that Andrew was the decisive
> voice in the end) came to the conclusion that—given that pause is a
> true hint—it could always be enabled.

I continue to think that, since it's semantically valid to execute a
HINT on any implementation, there's little utility in ever rejecting
the HINT builtins, or in rejecting explicit HINTs in asm, irrespective
of -march.  But ultimately it isn't a big deal either way.

> We had originally expected to enable it only if Zihintpause was part
> of the target architecture, but viewing it as "just a name for an
> already existing pure hint" also made sense.
> Note that on systems that don't implement Zihintpause, the hint is
> guarantueed to not have an architectural effect.
>
> That said, I don't really have a strong leaning one way or another.
> Philipp.


[PATCH v2 1/2] libstdc++: Implement more maintainable header

2023-08-13 Thread Arsen Arsenović via Gcc-patches
This commit replaces the ad-hoc logic in  with an AutoGen
database that (mostly) declaratively generates a version.h bit which
combines all of the FTM logic across all headers together.

This generated header defines macros of the form __glibcxx_foo,
equivalent to their __cpp_lib_foo variants, according to rules specified
in version.def and, optionally, if __glibcxx_want_foo or
__glibcxx_want_all are defined, also defines __cpp_lib_foo forms with
the same definition.

libstdc++-v3/ChangeLog:

* include/Makefile.am (bits_freestanding): Add version.h.
(allcreated): Add version.h.
(${bits_srcdir}/version.h): New rule.  Regenerates
version.h out of version.{def,tpl}.
* include/Makefile.in: Regenerate.
* include/bits/version.def: New file.  Declares a list of
all feature test macros, their values and their preconditions.
* include/bits/version.tpl: New file.  Turns version.def
into a sequence of #if blocks.
* include/bits/version.h: New file.  Generated from
version.def.
* include/std/version: Replace with a __glibcxx_want_all define
and bits/version.h include.
---
This patchset is a rebase of
https://inbox.sourceware.org/libstdc++/20230429101640.1697750-1-ar...@aarsen.me/

... passing the same two checks (difall / vercmp) I wrote for the first
pass.  Testsuite runs are still pending.

Changes in this revision:
- Replace the ${bits_srcdir}/version.h rule with a update-version phony,
- Add the new __cpp_lib_chrono value,
- Add __cpp_lib_{ranges_{contains,find_last,fold,iota}},
- Add comments to various replaced conditions which summarize their
  condition,
- Correct a few minor errors spotted in review

OK for trunk (if those testsuite runs end up clean)?

 libstdc++-v3/include/Makefile.am  |   10 +-
 libstdc++-v3/include/Makefile.in  |   10 +-
 libstdc++-v3/include/bits/version.def | 1597 
 libstdc++-v3/include/bits/version.h   | 1942 +
 libstdc++-v3/include/bits/version.tpl |  210 +++
 libstdc++-v3/include/std/version  |  350 +
 6 files changed, 3770 insertions(+), 349 deletions(-)
 create mode 100644 libstdc++-v3/include/bits/version.def
 create mode 100644 libstdc++-v3/include/bits/version.h
 create mode 100644 libstdc++-v3/include/bits/version.tpl

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index a880e8ee227..9c71c75393a 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -154,6 +154,7 @@ bits_freestanding = \
${bits_srcdir}/stl_raw_storage_iter.h \
${bits_srcdir}/stl_relops.h \
${bits_srcdir}/stl_uninitialized.h \
+   ${bits_srcdir}/version.h \
${bits_srcdir}/string_view.tcc \
${bits_srcdir}/uniform_int_dist.h \
${bits_srcdir}/unique_ptr.h \
@@ -1113,7 +1114,8 @@ allcreated = \
${host_builddir}/c++config.h \
${host_builddir}/largefile-config.h \
${thread_host_headers} \
-   ${pch_build}
+   ${pch_build} \
+   ${bits_srcdir}/version.h
 
 # Here are the rules for building the headers
 all-local: ${allstamped} ${allcreated}
@@ -1463,6 +1465,12 @@ ${pch3_output}: ${pch3_source} ${pch2_output}
-mkdir -p ${pch3_output_builddir}
$(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g ${pch3_source} -o $@
 
+# AutoGen .
+.PHONY: update-version
+update-version:
+   cd ${bits_srcdir} && \
+   autogen version.def
+
 # The real deal.
 install-data-local: install-headers
 install-headers:
diff --git a/libstdc++-v3/include/Makefile.in b/libstdc++-v3/include/Makefile.in
index 0ff875b280b..f5b04d3fe8a 100644
--- a/libstdc++-v3/include/Makefile.in
+++ b/libstdc++-v3/include/Makefile.in
@@ -509,6 +509,7 @@ bits_freestanding = \
${bits_srcdir}/stl_raw_storage_iter.h \
${bits_srcdir}/stl_relops.h \
${bits_srcdir}/stl_uninitialized.h \
+   ${bits_srcdir}/version.h \
${bits_srcdir}/string_view.tcc \
${bits_srcdir}/uniform_int_dist.h \
${bits_srcdir}/unique_ptr.h \
@@ -1441,7 +1442,8 @@ allcreated = \
${host_builddir}/c++config.h \
${host_builddir}/largefile-config.h \
${thread_host_headers} \
-   ${pch_build}
+   ${pch_build} \
+   ${bits_srcdir}/version.h
 
 
 # Host includes for threads
@@ -1937,6 +1939,12 @@ ${pch3_output}: ${pch3_source} ${pch2_output}
-mkdir -p ${pch3_output_builddir}
$(CXX) $(PCHFLAGS) $(AM_CPPFLAGS) -O2 -g ${pch3_source} -o $@
 
+# AutoGen .
+${bits_srcdir}/version.h: ${bits_srcdir}/version.def \
+   ${bits_srcdir}/version.tpl
+   cd $(@D) && \
+   autogen version.def
+
 # The real deal.
 install-data-local: install-headers
 install-headers:
diff --git a/libstdc++-v3/include/bits/version.def 
b/libstdc++-v3/include/bits/version.def
new file mode 100644
index 000..e63715e17e7
--- /dev/null
+++ b/libstdc++-v3/include/bits/version.de

Re: [RFC PATCH 0/2] RISC-V: __builtin_riscv_pause for all environment

2023-08-13 Thread Philipp Tomsich
On Sat, 12 Aug 2023 at 01:31, Jeff Law via Gcc-patches
 wrote:
>
>
>
> On 8/9/23 16:39, Tsukasa OI wrote:
> > On 2023/08/10 5:05, Jeff Law wrote:
>
> >> I'd tend to think we do not want to expose the intrinsic unless the
> >> right extensions are enabled -- even though the encoding is a no-op and
> >> we could emit it as a .insn.
> >
> > I think that makes sense.  The only reason I implemented the
> > no-'Zihintpause' version is because GCC 13 implemented the built-in
> > unconditionally.  If the compatibility breakage is considered minimum (I
> > don't know, though), I'm ready to submit 'Zihintpause'-only version of
> > this patch set.
> While it's a compatibility break I don't think we have a need to
> preserve this kind of compatibility.  I suspect anyone using
> __builtin_riscv_pause was probably already turning on Zihintpause and if
> they weren't they should have been :-0
>
>
> I'm sure we'll kick this around in the Tuesday meeting and hopefully
> make a decision about the desired direction.  You're obviously welcome
> to join if you're inclined.  Let me know if you need an invite.

The original discussion (and I believe that Andrew was the decisive
voice in the end) came to the conclusion that—given that pause is a
true hint—it could always be enabled.
We had originally expected to enable it only if Zihintpause was part
of the target architecture, but viewing it as "just a name for an
already existing pure hint" also made sense.
Note that on systems that don't implement Zihintpause, the hint is
guarantueed to not have an architectural effect.

That said, I don't really have a strong leaning one way or another.
Philipp.


Re: [PATCH] sso-string@gnu-versioned-namespace [PR83077]

2023-08-13 Thread François Dumont via Gcc-patches
Here is another version with enhanced sizeof/alignof static_assert in 
string-inst.cc for the std::__cow_string definition from . 
The assertions in cow-stdexcept.cc are now checking the definition which 
is in the same file.


On 13/08/2023 15:27, François Dumont wrote:


Here is the fixed patch tested in all 3 modes:

- _GLIBCXX_USE_DUAL_ABI

- !_GLIBCXX_USE_DUAL_ABI && !_GLIBCXX_USE_CXX11_ABI

- !_GLIBCXX_USE_DUAL_ABI && _GLIBCXX_USE_CXX11_ABI

I don't know what you have in mind for the change below but I wanted 
to let you know that I tried to put COW std::basic_string into a 
nested __cow namespace when _GLIBCXX_USE_CXX11_ABI. But it had more 
impact on string-inst.cc so I preferred the macro substitution approach.


There are some test failing when !_GLIBCXX_USE_CXX11_ABI that are 
unrelated with my changes. I'll propose fixes in coming days.


    libstdc++: [_GLIBCXX_INLINE_VERSION] Use cxx11 abi [PR83077]

    Use cxx11 abi when activating versioned namespace mode. To do support
    a new configuration mode where !_GLIBCXX_USE_DUAL_ABI and 
_GLIBCXX_USE_CXX11_ABI.


    The main change is that std::__cow_string is now defined whenever 
_GLIBCXX_USE_DUAL_ABI
    or _GLIBCXX_USE_CXX11_ABI is true. Implementation is using 
available std::string in

    case of dual abi and a subset of it when it's not.

    On the other side std::__sso_string is defined only when 
_GLIBCXX_USE_DUAL_ABI is true
    and _GLIBCXX_USE_CXX11_ABI is false. Meaning that 
std::__sso_string is a typedef for the
    cow std::string implementation when dual abi is disabled and cow 
string is being used.


    libstdcxx-v3/ChangeLog:

    PR libstdc++/83077
    * acinclude.m4 [GLIBCXX_ENABLE_LIBSTDCXX_DUAL_ABI]: 
Default to "new" libstdcxx abi.
    * config/locale/dragonfly/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Define money_base

    members.
    * config/locale/generic/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Likewise.
    * config/locale/gnu/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Likewise.

    * config/locale/gnu/numeric_members.cc
    [!_GLIBCXX_USE_DUAL_ABI](__narrow_multibyte_chars): Define.
    * configure: Regenerate.
    * include/bits/c++config
    [_GLIBCXX_INLINE_VERSION](_GLIBCXX_NAMESPACE_CXX11, 
_GLIBCXX_BEGIN_NAMESPACE_CXX11):

    Define empty.
[_GLIBCXX_INLINE_VERSION](_GLIBCXX_END_NAMESPACE_CXX11, 
_GLIBCXX_DEFAULT_ABI_TAG):

    Likewise.
    * include/bits/cow_string.h [!_GLIBCXX_USE_CXX11_ABI]: 
Define a light version of COW

    basic_string as __std_cow_string for use in stdexcept.
    * include/std/stdexcept [_GLIBCXX_USE_CXX11_ABI]: Define 
__cow_string.

    (__cow_string(const char*)): New.
    (__cow_string::c_str()): New.
    * python/libstdcxx/v6/printers.py 
(StdStringPrinter::__init__): Set self.new_string to True

    when std::__8::basic_string type is found.
    * src/Makefile.am 
[ENABLE_SYMVERS_GNU_NAMESPACE](ldbl_alt128_compat_sources): Define empty.

    * src/Makefile.in: Regenerate.
    * src/c++11/Makefile.am (cxx11_abi_sources): Rename into...
    (dual_abi_sources): ...this. Also move cow-local_init.cc, 
cxx11-hash_tr1.cc,

    cxx11-ios_failure.cc entries to...
    (sources): ...this.
    (extra_string_inst_sources): Move cow-fstream-inst.cc, 
cow-sstream-inst.cc, cow-string-inst.cc,
    cow-string-io-inst.cc, cow-wtring-inst.cc, 
cow-wstring-io-inst.cc, cxx11-locale-inst.cc,

    cxx11-wlocale-inst.cc entries to...
    (inst_sources): ...this.
    * src/c++11/Makefile.in: Regenerate.
    * src/c++11/cow-fstream-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-locale_init.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-sstream-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-stdexcept.cc [_GLIBCXX_USE_CXX11_ABI]: 
Include .
    [_GLIBCXX_USE_DUAL_ABI || 
_GLIBCXX_USE_CXX11_ABI](__cow_string): Redefine before
    including . Define 
_GLIBCXX_DEFINE_STDEXCEPT_INSTANTIATIONS so that

    __cow_string definition in  is skipped.
    [_GLIBCXX_USE_CXX11_ABI]: Skip Transaction Memory TS 
definitions.

    Move static_assert to check std::_cow_string abi layout to...
    * src/c++11/string-inst.cc: ...here.
    (_GLIBCXX_DEFINING_CXX11_ABI_INSTANTIATIONS): Define 
following _GLIBCXX_USE_CXX11_ABI

    value.
    [_GLIBCXX_USE_CXX11_ABI && 
!_GLIBCXX_DEFINING_CXX11_ABI_INSTANTIATIONS]:
    Define _GLIBCXX_DEFINING_COW_STRING_INSTANTIATIONS. 
Include .
    Define basic_string as __std_cow_string for the current 
translation unit.
    * src/c++11/cow-string-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow

Re: [PATCH] gcc/reload.h: Change type of x_spill_indirect_levels

2023-08-13 Thread Eddy Young Tie Yang
On Sun, Aug 13, 2023 at 12:24:28PM -0700, Andrew Pinski wrote:
> Date: Sun, 13 Aug 2023 12:24:28 -0700
> From: Andrew Pinski 
> To: Eddy Young 
> Cc: gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH] gcc/reload.h: Change type of x_spill_indirect_levels
> 
> On Sun, Aug 13, 2023 at 12:20 PM Eddy Young  wrote:
> >
> > This patch changes the type of `x_spill_indirect_levels` member of
> > `struct target reload` from `bool` to `unsigned char`.
> >
> > Without this change, the build of esp-open-sdk fails with GCC 11 and
> > above.
> 
> This was done back in d57c99458933 for GCC 6.
> https://gcc.gnu.org/r6-535-gd57c99458933a2 .
> Why are you posting a patch against a branch which has not been
> supported for years now?
> 
> Thanks,
> Andrew Pinski

Hi, Andrew,

Project esp-open-sdk (https://github.com/pfalcon/esp-open-sdk) builds
GCC 4.8.5 from source to install its toolchain. With GCC 11 and newer,
the build fails without this patch.

esp-open-sdk is used for development on the ESP8266 MCU and boards based
on it.

I hope this helps.

Cheers,
Eddy


Re: [PATCH] gcc/reload.h: Change type of x_spill_indirect_levels

2023-08-13 Thread Andrew Pinski via Gcc-patches
On Sun, Aug 13, 2023 at 12:20 PM Eddy Young  wrote:
>
> This patch changes the type of `x_spill_indirect_levels` member of
> `struct target reload` from `bool` to `unsigned char`.
>
> Without this change, the build of esp-open-sdk fails with GCC 11 and
> above.

This was done back in d57c99458933 for GCC 6.
https://gcc.gnu.org/r6-535-gd57c99458933a2 .
Why are you posting a patch against a branch which has not been
supported for years now?

Thanks,
Andrew Pinski


>
> (Please bear with me, this is my first patch submission.)
>
> Cheers,
> Eddy
>
> ---
>  ChangeLog| 5 +
>  gcc/reload.h | 2 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/ChangeLog b/ChangeLog
> index 3dd1ce544af..442aa9192a9 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-08-13 Eddy Young 
> +
> +   * gcc/reload.h: Change type of x_spill_indirect_levels of struct
> +   target_reload to support C++17 build.
> +
>  2015-06-23  Release Manager
>
> * GCC 4.8.5 released.
> diff --git a/gcc/reload.h b/gcc/reload.h
> index 7a13ad30e82..1e94d8ea93b 100644
> --- a/gcc/reload.h
> +++ b/gcc/reload.h
> @@ -166,7 +166,7 @@ struct target_reload {
>   value indicates the level of indirect addressing supported, e.g., two
>   means that (MEM (MEM (REG n))) is also valid if (REG n) does not get
>   a hard register.  */
> -  bool x_spill_indirect_levels;
> +  unsigned char x_spill_indirect_levels;
>
>/* True if caller-save has been reinitialized.  */
>bool x_caller_save_initialized_p;
> --
> 2.39.2
>


[PATCH] gcc/reload.h: Change type of x_spill_indirect_levels

2023-08-13 Thread Eddy Young
This patch changes the type of `x_spill_indirect_levels` member of
`struct target reload` from `bool` to `unsigned char`.

Without this change, the build of esp-open-sdk fails with GCC 11 and
above.

(Please bear with me, this is my first patch submission.)

Cheers,
Eddy

---
 ChangeLog| 5 +
 gcc/reload.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 3dd1ce544af..442aa9192a9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-08-13 Eddy Young 
+
+   * gcc/reload.h: Change type of x_spill_indirect_levels of struct
+   target_reload to support C++17 build.
+
 2015-06-23  Release Manager
 
* GCC 4.8.5 released.
diff --git a/gcc/reload.h b/gcc/reload.h
index 7a13ad30e82..1e94d8ea93b 100644
--- a/gcc/reload.h
+++ b/gcc/reload.h
@@ -166,7 +166,7 @@ struct target_reload {
  value indicates the level of indirect addressing supported, e.g., two
  means that (MEM (MEM (REG n))) is also valid if (REG n) does not get
  a hard register.  */
-  bool x_spill_indirect_levels;
+  unsigned char x_spill_indirect_levels;
 
   /* True if caller-save has been reinitialized.  */
   bool x_caller_save_initialized_p;
-- 
2.39.2



Re:

2023-08-13 Thread Andrew Pinski via Gcc-patches
On Sun, Aug 13, 2023 at 12:05 PM Eddy Young Tie Yang
 wrote:
>
> From d57ac4f9a095a2f616863efd524ac2d87276becb Mon Sep 17 00:00:00 2001
> From: Eddy Young 
> Date: Sun, 13 Aug 2023 19:59:12 +0100
> Subject: [PATCH] gcc/reload.h: Change type of x_spill_indirect_levels
>
> ---
>  ChangeLog| 5 +
>  gcc/reload.h | 2 +-
>  2 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/ChangeLog b/ChangeLog
> index 3dd1ce544af..442aa9192a9 100644
> --- a/ChangeLog
> +++ b/ChangeLog
> @@ -1,3 +1,8 @@
> +2015-08-13 Eddy Young 
> +
> +   * gcc/reload.h: Change type of x_spill_indirect_levels of struct
> +   target_reload to support C++17 build.

This was done back in d57c99458933 for GCC 6.
https://gcc.gnu.org/r6-535-gd57c99458933a2 .
Why are you posting a patch against a branch which has not been
supported for years now?

Thanks,
Andrew Pinski

> +
>  2015-06-23  Release Manager
>
> * GCC 4.8.5 released.
> diff --git a/gcc/reload.h b/gcc/reload.h
> index 7a13ad30e82..1e94d8ea93b 100644
> --- a/gcc/reload.h
> +++ b/gcc/reload.h
> @@ -166,7 +166,7 @@ struct target_reload {
>   value indicates the level of indirect addressing supported, e.g., two
>   means that (MEM (MEM (REG n))) is also valid if (REG n) does not get
>   a hard register.  */
> -  bool x_spill_indirect_levels;
> +  unsigned char x_spill_indirect_levels;
>
>/* True if caller-save has been reinitialized.  */
>bool x_caller_save_initialized_p;
> --
> 2.39.2
>


[no subject]

2023-08-13 Thread Eddy Young Tie Yang
>From d57ac4f9a095a2f616863efd524ac2d87276becb Mon Sep 17 00:00:00 2001
From: Eddy Young 
Date: Sun, 13 Aug 2023 19:59:12 +0100
Subject: [PATCH] gcc/reload.h: Change type of x_spill_indirect_levels

---
 ChangeLog| 5 +
 gcc/reload.h | 2 +-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/ChangeLog b/ChangeLog
index 3dd1ce544af..442aa9192a9 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2015-08-13 Eddy Young 
+
+   * gcc/reload.h: Change type of x_spill_indirect_levels of struct
+   target_reload to support C++17 build.
+
 2015-06-23  Release Manager
 
* GCC 4.8.5 released.
diff --git a/gcc/reload.h b/gcc/reload.h
index 7a13ad30e82..1e94d8ea93b 100644
--- a/gcc/reload.h
+++ b/gcc/reload.h
@@ -166,7 +166,7 @@ struct target_reload {
  value indicates the level of indirect addressing supported, e.g., two
  means that (MEM (MEM (REG n))) is also valid if (REG n) does not get
  a hard register.  */
-  bool x_spill_indirect_levels;
+  unsigned char x_spill_indirect_levels;
 
   /* True if caller-save has been reinitialized.  */
   bool x_caller_save_initialized_p;
-- 
2.39.2



[pushed] modula-2, plugin: Fix Darwin bootstrap issues.

2023-08-13 Thread Iain Sandoe via Gcc-patches
Tested on x86_64 Darwin and x86_64 Linux, pushed to master, will
also backport to 13 if it is needed there, thanks
Iain

--- 8< ---

This corrects some typos in the suffix of the m2rte pluing that
lead to a bootstrap fail on Darwin, where the suffix is not '.so'.

On some versions of Darwin, the linker complains if libSystem is not
linked, so we disable all the default libs, but add libc back.

Signed-off-by: Iain Sandoe 

gcc/m2/ChangeLog:

* Make-lang.in: Update suffix spellings to use 'soext'.
Add libc to the plugin link.
---
 gcc/m2/Make-lang.in | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/m2/Make-lang.in b/gcc/m2/Make-lang.in
index 8c6bac229e7..ca1581fe643 100644
--- a/gcc/m2/Make-lang.in
+++ b/gcc/m2/Make-lang.in
@@ -39,6 +39,7 @@ else
   PLUGINLDFLAGS = -Wl,-undefined,dynamic_lookup
   PLUGINLDFLAGS += -Wl,-install_name,m2rte$(soext)
   PLUGINLDFLAGS += -nodefaultlibs
+  PLUGINLDFLAGS += -lc
 endif
 
 TEXISRC = $(srcdir)/doc/gm2.texi \
@@ -91,7 +92,7 @@ PGE=m2/pge$(exeext)
 SRC_PREFIX=G
 
 ifeq ($(enable_plugin),yes)
-M2RTE_PLUGIN_SO=plugin/m2rte$(exeext).so
+M2RTE_PLUGIN_SO=plugin/m2rte$(soext)
 endif
 
 m2/gm2spec.o: $(srcdir)/m2/gm2spec.cc $(SYSTEM_H) $(GCC_H) $(CONFIG_H) \
@@ -420,7 +421,7 @@ plugin/m2rte$(soext): $(srcdir)/m2/plugin/m2rte.cc 
$(GCC_HEADER_DEPENDENCIES_FOR
 else
 m2.install-plugin:
 
-plugin/m2rte$(exeext).so:
+plugin/m2rte$(soext):
 
 endif
 
-- 
2.39.2 (Apple Git-143)



Re: [PATCH] sso-string@gnu-versioned-namespace [PR83077]

2023-08-13 Thread François Dumont via Gcc-patches

Here is the fixed patch tested in all 3 modes:

- _GLIBCXX_USE_DUAL_ABI

- !_GLIBCXX_USE_DUAL_ABI && !_GLIBCXX_USE_CXX11_ABI

- !_GLIBCXX_USE_DUAL_ABI && _GLIBCXX_USE_CXX11_ABI

I don't know what you have in mind for the change below but I wanted to 
let you know that I tried to put COW std::basic_string into a nested 
__cow namespace when _GLIBCXX_USE_CXX11_ABI. But it had more impact on 
string-inst.cc so I preferred the macro substitution approach.


There are some test failing when !_GLIBCXX_USE_CXX11_ABI that are 
unrelated with my changes. I'll propose fixes in coming days.


    libstdc++: [_GLIBCXX_INLINE_VERSION] Use cxx11 abi [PR83077]

    Use cxx11 abi when activating versioned namespace mode. To do support
    a new configuration mode where !_GLIBCXX_USE_DUAL_ABI and 
_GLIBCXX_USE_CXX11_ABI.


    The main change is that std::__cow_string is now defined whenever 
_GLIBCXX_USE_DUAL_ABI
    or _GLIBCXX_USE_CXX11_ABI is true. Implementation is using 
available std::string in

    case of dual abi and a subset of it when it's not.

    On the other side std::__sso_string is defined only when 
_GLIBCXX_USE_DUAL_ABI is true
    and _GLIBCXX_USE_CXX11_ABI is false. Meaning that std::__sso_string 
is a typedef for the
    cow std::string implementation when dual abi is disabled and cow 
string is being used.


    libstdcxx-v3/ChangeLog:

    PR libstdc++/83077
    * acinclude.m4 [GLIBCXX_ENABLE_LIBSTDCXX_DUAL_ABI]: Default 
to "new" libstdcxx abi.
    * config/locale/dragonfly/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Define money_base

    members.
    * config/locale/generic/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Likewise.
    * config/locale/gnu/monetary_members.cc 
[!_GLIBCXX_USE_DUAL_ABI]: Likewise.

    * config/locale/gnu/numeric_members.cc
    [!_GLIBCXX_USE_DUAL_ABI](__narrow_multibyte_chars): Define.
    * configure: Regenerate.
    * include/bits/c++config
    [_GLIBCXX_INLINE_VERSION](_GLIBCXX_NAMESPACE_CXX11, 
_GLIBCXX_BEGIN_NAMESPACE_CXX11):

    Define empty.
[_GLIBCXX_INLINE_VERSION](_GLIBCXX_END_NAMESPACE_CXX11, 
_GLIBCXX_DEFAULT_ABI_TAG):

    Likewise.
    * include/bits/cow_string.h [!_GLIBCXX_USE_CXX11_ABI]: 
Define a light version of COW

    basic_string as __std_cow_string for use in stdexcept.
    * include/std/stdexcept [_GLIBCXX_USE_CXX11_ABI]: Define 
__cow_string.

    (__cow_string(const char*)): New.
    (__cow_string::c_str()): New.
    * python/libstdcxx/v6/printers.py 
(StdStringPrinter::__init__): Set self.new_string to True

    when std::__8::basic_string type is found.
    * src/Makefile.am 
[ENABLE_SYMVERS_GNU_NAMESPACE](ldbl_alt128_compat_sources): Define empty.

    * src/Makefile.in: Regenerate.
    * src/c++11/Makefile.am (cxx11_abi_sources): Rename into...
    (dual_abi_sources): ...this. Also move cow-local_init.cc, 
cxx11-hash_tr1.cc,

    cxx11-ios_failure.cc entries to...
    (sources): ...this.
    (extra_string_inst_sources): Move cow-fstream-inst.cc, 
cow-sstream-inst.cc, cow-string-inst.cc,
    cow-string-io-inst.cc, cow-wtring-inst.cc, 
cow-wstring-io-inst.cc, cxx11-locale-inst.cc,

    cxx11-wlocale-inst.cc entries to...
    (inst_sources): ...this.
    * src/c++11/Makefile.in: Regenerate.
    * src/c++11/cow-fstream-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-locale_init.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-sstream-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-stdexcept.cc [_GLIBCXX_USE_CXX11_ABI]: 
Include .
    [_GLIBCXX_USE_DUAL_ABI || 
_GLIBCXX_USE_CXX11_ABI](__cow_string): Redefine before
    including . Define 
_GLIBCXX_DEFINE_STDEXCEPT_INSTANTIATIONS so that

    __cow_string definition in  is skipped.
    [_GLIBCXX_USE_CXX11_ABI]: Skip Transaction Memory TS 
definitions.

    Move static_assert to check std::_cow_string abi layout to...
    * src/c++11/string-inst.cc: ...here.
    (_GLIBCXX_DEFINING_CXX11_ABI_INSTANTIATIONS): Define 
following _GLIBCXX_USE_CXX11_ABI

    value.
    [_GLIBCXX_USE_CXX11_ABI && 
!_GLIBCXX_DEFINING_CXX11_ABI_INSTANTIATIONS]:
    Define _GLIBCXX_DEFINING_COW_STRING_INSTANTIATIONS. Include 
.
    Define basic_string as __std_cow_string for the current 
translation unit.
    * src/c++11/cow-string-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-string-io-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-wstring-inst.cc [_GLIBCXX_USE_CXX11_ABI]: 
Skip definitions.
    * src/c++11/cow-wstring-io-inst.cc 
[_GLIBCXX_USE_CXX11_ABI]: Skip definitions.
    * src/c++11/cxx1

Re: [RFC] [v2] Extend fold_vec_perm to handle VLA vectors

2023-08-13 Thread Prathamesh Kulkarni via Gcc-patches
On Thu, 10 Aug 2023 at 21:27, Richard Sandiford
 wrote:
>
> Prathamesh Kulkarni  writes:
> >> static bool
> >> is_simple_vla_size (poly_uint64 size)
> >> {
> >>   if (size.is_constant ())
> >> return false;
> >>   for (int i = 1; i < ARRAY_SIZE (size.coeffs); ++i)
> >> if (size[i] != (i <= 1 ? size[0] : 0))
> > Just wondering is this should be (i == 1 ? size[0] : 0) since i is
> > initialized to 1 ?
>
> Both work.  I prefer <= 1 because it doesn't depend on the micro
> optimisation to start at coefficient 1.  In a theoretical 3-indeterminate
> poly_int, we want the first 2 coefficients to be nonzero and the rest to
> be zero.
>
> > IIUC, is_simple_vla_size should return true for polynomials of first
> > degree and having same coeff like 4 + 4x ?
>
> FWIW, poly_int only supports first-degree polynomials at the moment.
> coeffs>2 means there is more than one indeterminate, rather than a
> higher power.
Oh OK, thanks for the clarification.
>
> >>   return false;
> >>   return true;
> >> }
> >>
> >>
> >>   FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
> >> {
> >>   auto nunits = GET_MODE_NUNITS (mode);
> >>   if (!is_simple_vla_size (nunits))
> >> continue;
> >>   if (nunits[0] ...)
> >> test_... (mode);
> >>   ...
> >>
> >> }
> >>
> >> test_vnx4si_v4si and test_v4si_vnx4si look good.  But with the
> >> loop structure above, I think we can apply the test_vnx4si and
> >> test_vnx16qi to more cases.  So the classification isn't the
> >> exact number of elements, but instead a limit.
> >>
> >> I think the nunits[0] conditions for test_vnx4si are as follows
> >> (inspection only, so could be wrong):
> >>
> >> > +/* Test cases where result and input vectors are VNx4SI  */
> >> > +
> >> > +static void
> >> > +test_vnx4si (machine_mode vmode)
> >> > +{
> >> > +  /* Case 1: mask = {0, ...} */
> >> > +  {
> >> > +tree arg0 = build_vec_cst_rand (vmode, 2, 3, 1);
> >> > +tree arg1 = build_vec_cst_rand (vmode, 2, 3, 1);
> >> > +poly_uint64 len = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
> >> > +
> >> > +vec_perm_builder builder (len, 1, 1);
> >> > +builder.quick_push (0);
> >> > +vec_perm_indices sel (builder, 2, len);
> >> > +tree res = fold_vec_perm_cst (TREE_TYPE (arg0), arg0, arg1, sel);
> >> > +
> >> > +tree expected_res[] = { vector_cst_elt (res, 0) };
> > This should be { vector_cst_elt (arg0, 0) }; will fix in next patch.
> >> > +validate_res (1, 1, res, expected_res);
> >> > +  }
> >>
> >> nunits[0] >= 2 (could be all nunits if the inputs had nelts_per_pattern==1,
> >> which I think would be better)
> > IIUC, the vectors that can be used for a particular test should have
> > nunits[0] >= res_npatterns,
> > where res_npatterns is as computed in fold_vec_perm_cst without the
> > canonicalization ?
> > For above test -- res_npatterns = max(2, max (2, 1)) == 2, so we
> > require nunits[0] >= 2 ?
> > Which implies we can use above test for vectors with length 2 + 2x, 4 + 4x, 
> > etc.
>
> Right, that's what I meant.  With the inputs as they stand it has to be
> nunits[0] >= 2.  We need that form the inputs correctly.  But if the
> inputs instead had nelts_per_pattern == 1, the test would work for all
> nunits.
In the attached patch, I have reordered the tests based on min or max limit.
For tests where sel_npatterns < 3 (ie dup sequence), I have kept input
npatterns = 1,
so we can test more vector modes, and also input npatterns matter only
for stepped sequence in sel
(Since for a dup pattern we don't enforce the constraint of selecting
elements from same input pattern).
Does it look OK ?

For the following tests with input vectors having shape (1, 3)
sel = {0, 1, 2, ...}  // (1, 3)
res = { arg0[0], arg0[1], arg0[2], ... } // (1, 3)

and sel = {len, len + 1, len + 2, ... }  // (1, 3)
res = { arg1[0], arg1[1], arg1[2], ... } // (1, 3)

Altho res_npatterns = 1, I suppose these will need to be tested with
vectors with length >= 4 + 4x,
since index 2 can be ambiguous for length 2 + 2x  ?
(In the patch, these are cases 2 and 3 in test_nunits_min_4)

Patch is bootstrapped+tested on aarch64-linux-gnu with and without SVE
and on x86_64-linux-gnu
(altho I suppose bootstrapping won't be necessary for changes to unit-tests?)
>
> > Sorry if this sounds like a silly question -- Won't nunits[0] >= 2
> > cover all nunits,
> > since a vector, at a minimum, will contain 2 elements ?
>
> Not necessarily.  VNx1TI makes conceptual sense.  We just don't use it
> currently (although that'll change with SME).  And we do have single-element
> VLS vectors like V1DI and V1DF.
Thanks for the explanation, I wasn't aware of that.

Thanks,
Prathamesh
>
> Thanks,
> Richard
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7e5494dfd39..5eacb1d147e 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
gimple code, we need to handle GIMPLE tuples as well as their
corresponding tree 

[PATCH] Add clang's invalid-noreturn warning flag

2023-08-13 Thread Julian Waters via Gcc-patches
Please review a patch to add clang's invalid-noreturn flag to toggle
noreturn  warnings. This patch keeps the old behaviour of always warning on
every noreturn violation, but unlike clang also adds an extra layer of fine
tuning by turning invalid-noreturn into a warning with levels, where level
1 warns about noreturn functions that do return, level 2 warns about
noreturn functions that explicitly have return statements, and level 3,
which is the default to match old behaviour, warns for both instances.

gcc/doc/ChangeLog:

* invoke.texi (-Wno-invalid-noreturn, -Winvalid-noreturn=): Document new
options.

gcc/ChangeLog:

* tree-cfg.cc (pass_warn_function_return::execute): Use new warning option.

gcc/c-family/ChangeLog:

* c.opt (Winvalid-noreturn, Winvalid-noreturn=): New options.

gcc/c/ChangeLog:

* c-typeck.cc (c_finish_return): Use new warning option.
* gimple-parser.cc (c_finish_gimple_return): Likewise.

gcc/cp/ChangeLog:

* coroutines.cc (finish_co_return_stmt): Use new warning option.
* typeck.cc (check_return_expr): Likewise.

 gcc/c-family/c.opt |  8 
 gcc/c/c-typeck.cc  |  9 ++---
 gcc/c/gimple-parser.cc |  9 ++---
 gcc/cp/coroutines.cc   | 11 +++
 gcc/cp/typeck.cc   |  7 +--
 gcc/doc/invoke.texi| 26 ++
 gcc/tree-cfg.cc|  5 -
 7 files changed, 62 insertions(+), 13 deletions(-)


0001-Add-the-invalid-noreturn-warning-to-match-clang.patch
Description: Binary data


[PATCH v1] RISC-V: Support RVV VFWMACC rounding mode intrinsic API

2023-08-13 Thread Pan Li via Gcc-patches
From: Pan Li 

This patch would like to support the rounding mode API for the
VFWMACC as the below samples.

* __riscv_vfwmacc_vv_f64m2_rm
* __riscv_vfwmacc_vv_f64m2_rm_m
* __riscv_vfwmacc_vf_f64m2_rm
* __riscv_vfwmacc_vf_f64m2_rm_m

Signed-off-by: Pan Li 

gcc/ChangeLog:

* config/riscv/riscv-vector-builtins-bases.cc
(class vfwmacc_frm): New class for vfwmacc frm.
(vfwmacc_frm_obj): New declaration.
(BASE): Ditto.
* config/riscv/riscv-vector-builtins-bases.h: Ditto.
* config/riscv/riscv-vector-builtins-functions.def
(vfwmacc_frm): Function definition for vfwmacc.
* config/riscv/riscv-vector-builtins.cc
(function_expander::use_widen_ternop_insn): Add frm support.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/rvv/base/float-point-fwmacc.c: New test.
---
 .../riscv/riscv-vector-builtins-bases.cc  | 25 ++
 .../riscv/riscv-vector-builtins-bases.h   |  1 +
 .../riscv/riscv-vector-builtins-functions.def |  3 ++
 gcc/config/riscv/riscv-vector-builtins.cc | 22 +++--
 .../riscv/rvv/base/float-point-fwmacc.c   | 47 +++
 5 files changed, 93 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/float-point-fwmacc.c

diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc 
b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index e14e9aa7809..e84d6d1d047 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -539,6 +539,29 @@ public:
   }
 };
 
+/* Implements below instructions for frm
+   - vfwmacc
+*/
+class vfwmacc_frm : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override { return true; }
+
+  bool has_merge_operand_p () const override { return false; }
+
+  rtx expand (function_expander &e) const override
+  {
+if (e.op_info->op == OP_TYPE_vf)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul_scalar (PLUS, e.vector_mode ()));
+if (e.op_info->op == OP_TYPE_vv)
+  return e.use_widen_ternop_insn (
+   code_for_pred_widen_mul (PLUS, e.vector_mode ()));
+
+gcc_unreachable ();
+  }
+};
+
 /* Implements vrsub.  */
 class vrsub : public function_base
 {
@@ -2315,6 +2338,7 @@ static CONSTEXPR const vfnmadd_frm vfnmadd_frm_obj;
 static CONSTEXPR const vfmsub vfmsub_obj;
 static CONSTEXPR const vfmsub_frm vfmsub_frm_obj;
 static CONSTEXPR const vfwmacc vfwmacc_obj;
+static CONSTEXPR const vfwmacc_frm vfwmacc_frm_obj;
 static CONSTEXPR const vfwnmacc vfwnmacc_obj;
 static CONSTEXPR const vfwmsac vfwmsac_obj;
 static CONSTEXPR const vfwnmsac vfwnmsac_obj;
@@ -2558,6 +2582,7 @@ BASE (vfnmadd_frm)
 BASE (vfmsub)
 BASE (vfmsub_frm)
 BASE (vfwmacc)
+BASE (vfwmacc_frm)
 BASE (vfwnmacc)
 BASE (vfwmsac)
 BASE (vfwnmsac)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h 
b/gcc/config/riscv/riscv-vector-builtins-bases.h
index e60cebab4ae..acbc7d42fbe 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -176,6 +176,7 @@ extern const function_base *const vfnmadd_frm;
 extern const function_base *const vfmsub;
 extern const function_base *const vfmsub_frm;
 extern const function_base *const vfwmacc;
+extern const function_base *const vfwmacc_frm;
 extern const function_base *const vfwnmacc;
 extern const function_base *const vfwmsac;
 extern const function_base *const vfwnmsac;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def 
b/gcc/config/riscv/riscv-vector-builtins-functions.def
index d75b281eebe..0b73a5bcbc5 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -376,6 +376,9 @@ DEF_RVV_FUNCTION (vfwmsac, alu, full_preds, f_wwfv_ops)
 DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwvv_ops)
 DEF_RVV_FUNCTION (vfwnmsac, alu, full_preds, f_wwfv_ops)
 
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwvv_ops)
+DEF_RVV_FUNCTION (vfwmacc_frm, alu_frm, full_preds, f_wwfv_ops)
+
 // 13.8. Vector Floating-Point Square-Root Instruction
 DEF_RVV_FUNCTION (vfsqrt, alu, full_preds, f_v_ops)
 
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc
index abab06c00ed..ad4a9098620 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -3771,17 +3771,29 @@ function_expander::use_widen_ternop_insn (insn_code 
icode)
 add_all_one_mask_operand (mask_mode ());
 
   for (int argno = arg_offset; argno < call_expr_nargs (exp); argno++)
-add_input_operand (argno);
+{
+  if (base->has_rounding_mode_operand_p ()
+ && argno == call_expr_nargs (exp) - 2)
+   {
+ /* Since the rounding mode argument position is not consistent with
+the instruction pattern, we need to skip rounding mode argument
+here.  */
+ continue;
+   }
+  add_inpu