Re: [PATCH] target/108738 - limit STV chain discovery

2023-03-02 Thread Uros Bizjak via Gcc-patches
On Thu, Mar 2, 2023 at 2:28 PM Richard Biener  wrote:
>
> The following puts a hard limit on the inherently quadratic STV chain
> discovery.  Without a limit for the compiler.i testcase in PR26854
> we see at -O2
>
>  machine dep reorg  : 574.45 ( 53%)
>
> with release checking while with the proposed limit it's
>
>  machine dep reorg  :   2.86 (  1%)
>
> Bootstrapped and tested on x86_64-unknown-linux-gnu.
>
> OK?
>
> Thanks,
> Richard.
>
> PR target/108738
> * config/i386/i386.opt (--param x86-stv-max-visits): New param.
> * doc/invoke.texi (--param x86-stv-max-visits): Document it.
> * config/i386/i386-features.h (scalar_chain::max_visits): New.
> (scalar_chain::build): Add bitmap parameter, return boolean.
> (scalar_chain::add_insn): Likewise.
> (scalar_chain::analyze_register_chain): Likewise.
> * config/i386/i386-features.cc (scalar_chain::scalar_chain):
> Initialize max_visits.
> (scalar_chain::analyze_register_chain): When we exhaust
> max_visits, abort.  Also abort when running into any
> disallowed insn.
> (scalar_chain::add_insn): Propagate abort.
> (scalar_chain::build): Likewise.  When aborting amend
> the set of disallowed insn with the insns set.
> (convert_scalars_to_vector): Adjust.  Do not convert aborted
> chains.

LGTM.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-features.cc | 77 +++-
>  gcc/config/i386/i386-features.h  | 10 +++--
>  gcc/config/i386/i386.opt |  4 ++
>  gcc/doc/invoke.texi  |  4 ++
>  4 files changed, 70 insertions(+), 25 deletions(-)
>
> diff --git a/gcc/config/i386/i386-features.cc 
> b/gcc/config/i386/i386-features.cc
> index eff91301009..c09abf8fc20 100644
> --- a/gcc/config/i386/i386-features.cc
> +++ b/gcc/config/i386/i386-features.cc
> @@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, 
> enum machine_mode vmode_)
>
>n_sse_to_integer = 0;
>n_integer_to_sse = 0;
> +
> +  max_visits = x86_stv_max_visits;
>  }
>
>  /* Free chain's data.  */
> @@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def)
>  }
>
>  /* Check REF's chain to add new insns into a queue
> -   and find registers requiring conversion.  */
> +   and find registers requiring conversion.  Return true if OK, false
> +   if the analysis was aborted.  */
>
> -void
> -scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
> +bool
> +scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
> + bitmap disallowed)
>  {
>df_link *chain;
>bool mark_def = false;
> @@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
> df_ref ref)
>if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
> continue;
>
> +  if (--max_visits == 0)
> +   return false;
> +
>if (!DF_REF_REG_MEM_P (chain->ref))
> {
>   if (bitmap_bit_p (insns, uid))
> @@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
> df_ref ref)
>   add_to_queue (uid);
>   continue;
> }
> +
> + /* If we run into parts of an aborted chain discovery abort.  */
> + if (bitmap_bit_p (disallowed, uid))
> +   return false;
> }
>
>if (DF_REF_REG_DEF_P (chain->ref))
> @@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap 
> candidates, df_ref ref)
>
>if (mark_def)
>  mark_dual_mode_def (ref);
> +
> +  return true;
>  }
>
> -/* Add instruction into a chain.  */
> +/* Add instruction into a chain.  Return true if OK, false if the search
> +   was aborted.  */
>
> -void
> -scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
> +bool
> +scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
> +   bitmap disallowed)
>  {
>if (!bitmap_set_bit (insns, insn_uid))
> -return;
> +return true;
>
>if (dump_file)
>  fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, 
> chain_id);
> @@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int 
> insn_uid)
>df_ref ref;
>for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
>  if (!HARD_REGISTER_P (DF_REF_REG (ref)))
> -  analyze_register_chain (candidates, ref);
> +  if (!analyze_register_chain (candidates, ref, disallowed))
> +   return false;
>
>/* The operand(s) of VEC_SELECT don't need to be converted/convertible.  */
>if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
> -return;
> +return true;
>
>for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
>  if (!DF_REF_REG_MEM_P (ref))
> -  analyze_register_chain (candidates, ref);
> +  if (!analyze_register_chain (candidates, ref, disallowed))
> +   return false;
> +
> +  retu

Re: [PATCH 01/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Xi Ruoyao via Gcc-patches
Please don't use the same title for all the patches.  This will puzzle
people running "git log" once they are committed.

And when you send 01-07, use "reply" instead of "new" so they will show
up in the correct location in a mail client.  Or use git send-email
which is much eaiser to use.

On Thu, 2023-03-02 at 23:52 -0500, Michael Collison wrote:
> This patch adds foundational support in the form of:
> 
> 1. New predicates
> 
> 2. New function prototypes
> 
> 3. Exporting emit_vlmax_vsetvl to global scope
> 
> 4. Add a new command line option -mriscv_vector_lmul
> 
> gcc/ChangeLog:
> 
>  * config/riscv/riscv-protos.h (riscv_classify_vlmul_field):

The change log entries should be indented with one tab, not some
whitespaces.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


[PATCH 07/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison

This patch adds tests for autovectorization of integer add and subtract.

gcc/testsuite/ChangeLog:

    * gcc.target/riscv/rvv/autovec: New directory
    for autovectorization tests.
    * gcc.target/riscv/rvv/autovec/loop-add-rv32.c: New
    test to verify code generation of vector add on rv32.
    * gcc.target/riscv/rvv/autovec/loop-add.c: New
    test to verify code generation of vector add on rv64.
    * gcc.target/riscv/rvv/autovec/loop-sub-rv32.c: New
    test to verify code generation of vector subtract on rv32.
    * gcc.target/riscv/rvv/autovec/loop-sub.c: New
    test to verify code generation of vector subtract on rv64.

---
 .../riscv/rvv/autovec/loop-add-rv32.c | 24 +++
 .../gcc.target/riscv/rvv/autovec/loop-add.c   | 24 +++
 .../riscv/rvv/autovec/loop-sub-rv32.c | 24 +++
 .../gcc.target/riscv/rvv/autovec/loop-sub.c   | 24 +++
 4 files changed, 96 insertions(+)
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c

 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c

 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c

diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c

new file mode 100644
index 000..bdc3b6892e9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv32gcv 
-mabi=ilp32d" } */

+
+#include 
+
+#define TEST_TYPE(TYPE)                 \
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)    \
+  {                            \
+    for (int i = 0; i < n; i++)                \
+  dst[i] = a[i] + b[i];                \
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL()    \
+ TEST_TYPE(int16_t)    \
+ TEST_TYPE(uint16_t)    \
+ TEST_TYPE(int32_t)    \
+ TEST_TYPE(uint32_t)    \
+ TEST_TYPE(int64_t)    \
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c

new file mode 100644
index 000..d7f992c7d27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv64gcv 
-mabi=lp64d" } */

+
+#include 
+
+#define TEST_TYPE(TYPE)                 \
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)    \
+  {                            \
+    for (int i = 0; i < n; i++)                \
+  dst[i] = a[i] + b[i];                \
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL()    \
+ TEST_TYPE(int16_t)    \
+ TEST_TYPE(uint16_t)    \
+ TEST_TYPE(int32_t)    \
+ TEST_TYPE(uint32_t)    \
+ TEST_TYPE(int64_t)    \
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c

new file mode 100644
index 000..7d0a40ec539
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv32gcv 
-mabi=ilp32d" } */

+
+#include 
+
+#define TEST_TYPE(TYPE)                 \
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)    \
+  {                            \
+    for (int i = 0; i < n; i++)                \
+  dst[i] = a[i] - b[i];                \
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL()    \
+ TEST_TYPE(int16_t)    \
+ TEST_TYPE(uint16_t)    \
+ TEST_TYPE(int32_t)    \
+ TEST_TYPE(uint32_t)    \
+ TEST_TYPE(int64_t)    \
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c 
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c

new file mode 100644
index 000..c8900884f83
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -ftree-vectorize -march=rv64gcv 
-mabi=lp64d" } */

+
+#include 
+
+#define TEST_TYPE(TYPE)                 \
+  void vadd_##TYPE (TYPE *dst, TYPE *a, TYPE *b, int n)    \
+  {                            \
+    for (int i = 0; i < n; i++)                \
+  dst[i] = a[i] - b[i];                \
+  }
+
+/* *int8_t not autovec currently. */
+#define TEST_ALL()    \
+ TEST_TYPE(int16_t)    \
+ TEST_TYPE(uint16_t)    \
+ TEST_TYPE(int32_t)    \
+ TEST_TYPE(uint32_t)    \
+ TEST_TYPE(int64_t)    \
+ TEST_TYPE(uint64_t)
+
+TEST_ALL()
+
+/* { dg-final { scan-assembler-time

[PATCH 06/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This patch adds patterns that provide basic autovectorization support 
for integer adds and subtracts.


gcc/ChangeLog:

    * config/riscv/riscv.md (riscv_classify_vlmul_field):
    New external declaration.
    (riscv_vector_preferred_simd_mode): Include
    vector-iterators.md.
    * config/riscv/vector-auto.md: New file containing
    autovectorization patterns.
    * config/riscv/vector-iterators.md (UNSPEC_VADD/UNSPEC_VSUB):
    New unspecs for autovectorization patterns.
    * config/riscv/vector.md: Remove include of vector-iterators.md
    and include vector-auto.md.

---
 gcc/config/riscv/riscv.md    |   1 +
 gcc/config/riscv/vector-auto.md  | 172 +++
 gcc/config/riscv/vector-iterators.md |   2 +
 gcc/config/riscv/vector.md   |   4 +-
 4 files changed, 177 insertions(+), 2 deletions(-)
 create mode 100644 gcc/config/riscv/vector-auto.md

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 05924e9bbf1..c34124095f7 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -131,6 +131,7 @@
 (include "predicates.md")
 (include "constraints.md")
 (include "iterators.md")
+(include "vector-iterators.md")

 ;; 
 ;;
diff --git a/gcc/config/riscv/vector-auto.md 
b/gcc/config/riscv/vector-auto.md

new file mode 100644
index 000..e5a19663d18
--- /dev/null
+++ b/gcc/config/riscv/vector-auto.md
@@ -0,0 +1,172 @@
+;; Machine description for RISC-V 'V' Extension for GNU compiler.
+;; Copyright (C) 2022-2023 Free Software Foundation, Inc.
+;; Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI 
Technologies Ltd.

+;; Contributed by Michael Collison (colli...@rivosinc.com, Rivos Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+
+;; 
-

+;;  [INT] Addition
+;; 
-

+;; Includes:
+;; - vadd.vv
+;; - vadd.vx
+;; - vadd.vi
+;; 
-

+
+(define_expand "add3"
+  [(match_operand:VI 0 "register_operand")
+   (match_operand:VI 1 "register_operand")
+   (match_operand:VI 2 "vector_arith_operand")]
+  "TARGET_VECTOR"
+{
+  using namespace riscv_vector;
+
+  rtx merge = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), 
UNSPEC_VUNDEF);

+  rtx vl = emit_vlmax_vsetvl (mode);
+  rtx mask_policy = get_mask_policy_no_pred();
+  rtx tail_policy = get_tail_policy_no_pred();
+  rtx mask = CONSTM1_RTX(mode);
+  rtx vlmax_avl_p = get_avl_type_rtx(NONVLMAX);
+
+  emit_insn(gen_pred_add(operands[0], mask, merge, operands[1], 
operands[2],

+                vl, tail_policy, mask_policy, vlmax_avl_p));
+
+  DONE;
+})
+
+(define_expand "cond_add"
+  [(match_operand:VI 0 "register_operand")
+   (match_operand: 1 "register_operand")
+   (match_operand:VI 2 "register_operand")
+   (match_operand:VI 3 "vector_reg_or_const_dup_operand")
+   (match_operand:VI 4 "register_operand")]
+  "TARGET_VECTOR"
+{
+  using namespace riscv_vector;
+
+  rtx merge = operands[4];
+  rtx vl = emit_vlmax_vsetvl (mode);
+  rtx mask_policy = get_mask_policy_no_pred();
+  rtx tail_policy = get_tail_policy_no_pred();
+  rtx mask = operands[1];
+  rtx vlmax_avl_p = get_avl_type_rtx(NONVLMAX);
+
+  emit_insn(gen_pred_add(operands[0], mask, merge, operands[2], 
operands[3],

+                vl, tail_policy, mask_policy, vlmax_avl_p));
+  DONE;
+})
+
+(define_expand "len_add"
+  [(match_operand:VI 0 "register_operand")
+   (match_operand:VI 1 "register_operand")
+   (match_operand:VI 2 "vector_reg_or_const_dup_operand")
+   (match_operand 3 "p_reg_or_const_csr_operand")]
+  "TARGET_VECTOR"
+{
+  using namespace riscv_vector;
+
+  rtx merge = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), 
UNSPEC_VUNDEF);

+  rtx vl = operands[3];
+  rtx mask_policy = get_mask_policy_no_pred();
+  rtx tail_policy = get_tail_policy_no_pred();
+  rtx mask = CONSTM1_RTX(mode);
+  rtx vlmax_avl_p = get_avl_type_rtx(NONVLMAX);
+
+  emit_insn(gen_pred_add(operands[0], mask, merge, operands[1], 
operands[2],

+                vl, tail_policy, mask_policy, vlmax_avl_p));
+  DONE;
+})
+
+
+;; 
-

+;;  [INT] Subtraction
+;; 

[PATCH 05/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This patch adds support for registering target hooks for basic 
autovectorization support as well as basic tuning information for the 
vector extension.


gcc/ChangeLog:

    * config/riscv/riscv-cores.def (RISCV_TUNE):
    Add VECTOR_TUNE_INFO parameter and
    * common/config/riscv/riscv-common.cc (RISCV_TUNE):
    Add VECTOR_TUNE_INFO parameter.
    * config/riscv/riscv.cc (riscv_vector_tune_param):
    New struct for vector tuning information.
    (riscv_tune_info): add vector_tune_param.
    (vector_tune_param): New static variable.
    (riscv_vectorization_factor): New variable.
    (generic_rvv_insn_scale_table): New struct.
    (generic_rvv_stmt_scale_table): New struct.
    (generic_rvv_insn_cost_table): New vector insn cost table.
    (generic_rvv_stmt_cost_table): New vector statement cost table.
    (generic_rvv_tune_info): New rvv tuning table.
    (RISCV_TUNE): Add VECTOR_TUNE_INFO parameter.
    (riscv_rtx_costs): Return vector estimate if vector mode.
    (riscv_option_override): Set vector_tune_param.
    (riscv_option_override): Set riscv_vectorization_factor.
    (riscv_estimated_poly_value): Implement
    TARGET_ESTIMATED_POLY_VALUE.
    (riscv_preferred_simd_mode): Implement
    TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
    (riscv_autovectorize_vector_modes): Implement
    TARGET_AUTOVECTORIZE_VECTOR_MODES.
    (riscv_get_mask_mode): Implement TARGET_VECTORIZE_GET_MASK_MODE.
    (riscv_empty_mask_is_expensive): Implement
    TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE.
    (riscv_builtin_vectorization_cost): Implement
    TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST.
    (riscv_vectorize_create_costs): Implement
    TARGET_VECTORIZE_CREATE_COSTS.
    (TARGET_ESTIMATED_POLY_VALUE): Register target macro.
    (TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST): Ditto.
    (TARGET_VECTORIZE_PREFERRED_SIMD_MODE): Ditto.
    (TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES): Ditto.
    (TARGET_VECTORIZE_GET_MASK_MODE): Ditto.
    (TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE): Ditto.
    (TARGET_VECTORIZE_LOOP_LEN_OVERRIDE_MASK): Ditto.
    (TARGET_VECTORIZE_CREATE_COSTS): Ditto

---
 gcc/common/config/riscv/riscv-common.cc |   2 +-
 gcc/config/riscv/riscv-cores.def    |  14 +-
 gcc/config/riscv/riscv.cc   | 321 +++-
 3 files changed, 325 insertions(+), 12 deletions(-)

diff --git a/gcc/common/config/riscv/riscv-common.cc 
b/gcc/common/config/riscv/riscv-common.cc

index ebc1ed7d7e4..6b8d92af986 100644
--- a/gcc/common/config/riscv/riscv-common.cc
+++ b/gcc/common/config/riscv/riscv-common.cc
@@ -246,7 +246,7 @@ static const riscv_cpu_info riscv_cpu_tables[] =

 static const char *riscv_tunes[] =
 {
-#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO) \
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO, 
VECTOR_TUNE_INFO)    \

 TUNE_NAME,
 #include "../../../config/riscv/riscv-cores.def"
 NULL
diff --git a/gcc/config/riscv/riscv-cores.def 
b/gcc/config/riscv/riscv-cores.def

index 2a834cae21d..4feb0366222 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -30,15 +30,15 @@
    identifier, reference to riscv.cc.  */

 #ifndef RISCV_TUNE
-#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
+#define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO, VECTOR_TUNE_INFO)
 #endif

-RISCV_TUNE("rocket", generic, rocket_tune_info)
-RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
-RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
-RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
-RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
-RISCV_TUNE("size", generic, optimize_size_tune_info)
+RISCV_TUNE("rocket", generic, rocket_tune_info, generic_rvv_tune_info)
+RISCV_TUNE("sifive-3-series", generic, rocket_tune_info, 
generic_rvv_tune_info)
+RISCV_TUNE("sifive-5-series", generic, rocket_tune_info, 
generic_rvv_tune_info)
+RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info, 
generic_rvv_tune_info)
+RISCV_TUNE("thead-c906", generic, thead_c906_tune_info, 
generic_rvv_tune_info)

+RISCV_TUNE("size", generic, optimize_size_tune_info, generic_rvv_tune_info)

 #undef RISCV_TUNE

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f11b7949a49..16b38ba4d76 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -60,6 +60,16 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "tm-constrs.h"
 #include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "cfgrtl.h"
+#include "sel-sched.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "gimple-expr.h"
+#include "tree-vectorizer.h"
+#include "riscv-vector-cost.h"

 /* This file should be included last.  */
 #include "target-def.h"
@@ -238,6 +248,12 @@ struct riscv_tune_param
   bool slow_unaligned_access;
 };

+/* Cost for vector insn classes.  */
+struct riscv_vector_tune_param {
+    const vector_insn_cost_table* rvv_insn_costs_table;

[PATCH 04/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This patch adds support for functions used in implementing various 
portions of autovectorization support.


gcc/ChangeLog:

    * config/riscv/riscv-v.cc (riscv_classify_vlmul_field):
    New function.
    (riscv_vector_preferred_simd_mode): Ditto.
    (get_mask_policy_no_pred): Ditto.
    (get_tail_policy_no_pred): Ditto.
    (riscv_tuple_mode_p): Ditto.
    (riscv_classify_nf): Ditto.
    (riscv_vlmul_regsize): Ditto.
    (riscv_vector_mask_mode_p): Ditto.
    (riscv_vector_get_mask_mode): Ditto.

---
 gcc/config/riscv/riscv-v.cc | 176 
 1 file changed, 176 insertions(+)

diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 58007cc16eb..58f69e259c0 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -39,9 +39,11 @@
 #include "emit-rtl.h"
 #include "tm_p.h"
 #include "target.h"
+#include "targhooks.h"
 #include "expr.h"
 #include "optabs.h"
 #include "tm-constrs.h"
+#include "riscv-vector-builtins.h"

 using namespace riscv_vector;

@@ -108,6 +110,41 @@ const_vec_all_same_in_range_p (rtx x, HOST_WIDE_INT 
minval,

   && IN_RANGE (INTVAL (elt), minval, maxval));
 }

+/* Return the vlmul field for a specific machine mode. */
+unsigned int
+riscv_classify_vlmul_field (enum machine_mode mode)
+{
+  /* Make the decision based on the mode's enum value rather than its
+ properties, so that we keep the correct classification regardless
+ of -mriscv-vector-bits.  */
+  switch (mode)
+    {
+    case E_VNx8BImode:
+  return VLMUL_FIELD_111;
+
+    case E_VNx4BImode:
+  return VLMUL_FIELD_110;
+
+    case E_VNx2BImode:
+  return VLMUL_FIELD_101;
+
+    case E_VNx16BImode:
+  return VLMUL_FIELD_000;
+
+    case E_VNx32BImode:
+  return VLMUL_FIELD_001;
+
+    case E_VNx64BImode:
+  return VLMUL_FIELD_010;
+
+    default:
+  break;
+    }
+
+  /* we don't care about VLMUL for Mask */
+  return VLMUL_FIELD_000;
+}
+
 rtx
 emit_vlmax_vsetvl (machine_mode vmode)
 {
@@ -162,6 +199,64 @@ calculate_ratio (unsigned int sew, enum vlmul_type 
vlmul)

   return ratio;
 }

+/* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE for RVV.  */
+
+machine_mode
+riscv_vector_preferred_simd_mode (scalar_mode mode, unsigned vf)
+{
+  if (!TARGET_VECTOR)
+    return word_mode;
+
+  switch (mode)
+    {
+    case E_QImode:
+  return vf == 1   ? VNx8QImode
+     : vf == 2 ? VNx16QImode
+     : vf == 4 ? VNx32QImode
+           : VNx64QImode;
+  break;
+    case E_HImode:
+  return vf == 1   ? VNx4HImode
+     : vf == 2 ? VNx8HImode
+     : vf == 4 ? VNx16HImode
+           : VNx32HImode;
+  break;
+    case E_SImode:
+  return vf == 1   ? VNx2SImode
+     : vf == 2 ? VNx4SImode
+     : vf == 4 ? VNx8SImode
+           : VNx16SImode;
+  break;
+    case E_DImode:
+  if (riscv_vector_elen_flags != MASK_VECTOR_ELEN_32
+      && riscv_vector_elen_flags != MASK_VECTOR_ELEN_FP_32)
+    return vf == 1     ? VNx1DImode
+       : vf == 2 ? VNx2DImode
+       : vf == 4 ? VNx4DImode
+             : VNx8DImode;
+  break;
+    case E_SFmode:
+  if (TARGET_HARD_FLOAT && riscv_vector_elen_flags != 
MASK_VECTOR_ELEN_32

+      && riscv_vector_elen_flags != MASK_VECTOR_ELEN_64)
+    return vf == 1     ? VNx2SFmode
+       : vf == 2 ? VNx4SFmode
+       : vf == 4 ? VNx8SFmode
+             : VNx16SFmode;
+  break;
+    case E_DFmode:
+  if (TARGET_DOUBLE_FLOAT && TARGET_VECTOR_ELEN_FP_64)
+    return vf == 1     ? VNx1DFmode
+       : vf == 2 ? VNx2DFmode
+       : vf == 4 ? VNx4DFmode
+             : VNx8DFmode;
+  break;
+    default:
+  break;
+    }
+
+  return word_mode;
+}
+
 /* Emit an RVV unmask && vl mov from SRC to DEST.  */
 static void
 emit_pred_op (unsigned icode, rtx mask, rtx dest, rtx src, rtx len,
@@ -374,6 +469,87 @@ get_avl_type_rtx (enum avl_type type)
   return gen_int_mode (type, Pmode);
 }

+rtx
+get_mask_policy_no_pred ()
+{
+  return get_mask_policy_for_pred(PRED_TYPE_none);
+}
+
+rtx
+get_tail_policy_no_pred ()
+{
+  return get_mask_policy_for_pred(PRED_TYPE_none);
+}
+
+/* Return true if it is a RVV tuple mode. */
+bool
+riscv_tuple_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return false;
+}
+
+/* Return nf for a machine mode. */
+int
+riscv_classify_nf (machine_mode mode)
+{
+  switch (mode)
+    {
+
+    default:
+  break;
+    }
+
+  return 1;
+}
+
+/* Return vlmul register size for a machine mode. */
+int
+riscv_vlmul_regsize (machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+    return 1;
+  switch (riscv_classify_vlmul_field (mode))
+    {
+    case VLMUL_FIELD_001:
+  return 2;
+    case VLMUL_FIELD_010:
+  return 4;
+    case VLMUL_FIELD_011:
+  return 8;
+    case VLMUL_FIELD_100:
+  gcc_unreachable ();
+    default:
+  return 1;
+    }
+}
+
+/* Return true if it is a RVV mask mode. */
+bool
+riscv_vector_mask_mode_p (machine_mode mode)

[PATCH 03/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This patches adds two new files to support the vector cost model and 
modifies the Makefile fragment to build the cost model c++ file. Due to 
the large size this patch is provided as an attachment.


gcc/ChangeLog:

    * gcc/config.gcc (riscv-vector-cost.o): New object file to build.
    * config/riscv/riscv-vector-cost.cc: New file for riscv vector cost
    model
    * config/riscv/riscv-vector-cost.h: New header file for riscv vector
    cost model.
    * config/riscv/t-riscv: Add make rule for riscv-vector-cost.o.


From eb995818cd5f77f85e8df93b690b00ce1fd1aa35 Mon Sep 17 00:00:00 2001
From: Michael Collison 
Date: Thu, 2 Mar 2023 12:27:36 -0500
Subject: [PATCH] Autovectorization patch set 2

---
 gcc/config.gcc|   2 +-
 gcc/config/riscv/riscv-vector-cost.cc | 620 ++
 gcc/config/riscv/riscv-vector-cost.h  | 400 +
 gcc/config/riscv/t-riscv  |   5 +
 4 files changed, 1026 insertions(+), 1 deletion(-)
 create mode 100644 gcc/config/riscv/riscv-vector-cost.cc
 create mode 100644 gcc/config/riscv/riscv-vector-cost.h

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c070e6ecd2e..a401187 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -530,7 +530,7 @@ pru-*-*)
 riscv*)
 	cpu_type=riscv
 	extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
-	extra_objs="${extra_objs} riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
+	extra_objs="${extra_objs} riscv-vector-cost.o riscv-vector-builtins.o riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
 	d_target_objs="riscv-d.o"
 	extra_headers="riscv_vector.h"
 	target_gtfiles="$target_gtfiles \$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/riscv-vector-cost.cc b/gcc/config/riscv/riscv-vector-cost.cc
new file mode 100644
index 000..5a33b20843a
--- /dev/null
+++ b/gcc/config/riscv/riscv-vector-cost.cc
@@ -0,0 +1,620 @@
+/* Cost model implementation for RISC-V 'V' Extension for GNU compiler.
+   Copyright (C) 2022-2023 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zh...@rivai.ai), RiVAI Technologies Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#define INCLUDE_STRING
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "regs.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "rtlanal.h"
+#include "output.h"
+#include "alias.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "varasm.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "function.h"
+#include "explow.h"
+#include "memmodel.h"
+#include "emit-rtl.h"
+#include "reload.h"
+#include "tm_p.h"
+#include "target.h"
+#include "basic-block.h"
+#include "expr.h"
+#include "optabs.h"
+#include "bitmap.h"
+#include "df.h"
+#include "diagnostic.h"
+#include "builtins.h"
+#include "predict.h"
+#include "tree-pass.h"
+#include "opts.h"
+#include "langhooks.h"
+#include "rtl-iter.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "fold-const.h"
+#include "gimple-iterator.h"
+#include "tree-vectorizer.h"
+#include "tree-ssa-loop-niter.h"
+#include "riscv-vector-builtins.h"
+
+/* This file should be included last.  */
+#include "riscv-vector-cost.h"
+#include "target-def.h"
+
+bool vector_insn_cost_table::get_cost(rtx x, machine_mode mode, int *cost,
+  bool speed) const {
+  rtx op0, op1, op2;
+  enum rtx_code code = GET_CODE(x);
+  scalar_int_mode int_mode;
+
+  /* By default, assume that everything has equivalent cost to the
+ cheapest instruction.  Any additional costs are applied as a delta
+ above this default.  */
+  *cost = COSTS_N_INSNS(1);
+
+  switch (code) {
+  case SET:
+/* The cost depends entirely on the operands to SET.  */
+*cost = 0;
+op0 = SET_DEST(x);
+op1 = SET_SRC(x);
+
+switch (GET_CODE(op0)) {
+case MEM:
+  if (speed) {
+*cost += store->cost(x, mode);
+  }
+
+  //*cost += rtx_cost(op1, mode, SET, 1, speed);
+  return true;
+
+case SUBREG:
+  if (!REG_P(SUBREG_REG(op0)))
+*cost += rtx_cost(SUBREG_REG(op0), VOIDmode, SET, 0, speed);
+
+  /* Fal

[PATCH 02/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This patch adds foundational support by making two functions that handle 
predication policies visibly globally.


gcc/ChangeLog:

    * config/riscv/riscv-vector-builtins.cc (get_tail_policy_for_pred):
    Remove static declaration to to make externally visible.
    (get_mask_policy_for_pred): Ditto.
    * config/riscv/riscv-vector-builtins.h (get_tail_policy_for_pred):
    New external declaration.
    (get_mask_policy_for_pred): Ditto.

---
 gcc/config/riscv/riscv-vector-builtins.cc | 4 ++--
 gcc/config/riscv/riscv-vector-builtins.h  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/riscv-vector-builtins.cc 
b/gcc/config/riscv/riscv-vector-builtins.cc

index 2e92ece3b64..90fc73a5bcf 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -1850,7 +1850,7 @@ use_real_merge_p (enum predication_type_index pred)

 /* Get TAIL policy for predication. If predication indicates TU, 
return the TU.

    Otherwise, return the prefer default configuration.  */
-static rtx
+rtx
 get_tail_policy_for_pred (enum predication_type_index pred)
 {
   if (pred == PRED_TYPE_tu || pred == PRED_TYPE_tum || pred == 
PRED_TYPE_tumu)
@@ -1860,7 +1860,7 @@ get_tail_policy_for_pred (enum 
predication_type_index pred)


 /* Get MASK policy for predication. If predication indicates MU, 
return the MU.

    Otherwise, return the prefer default configuration.  */
-static rtx
+rtx
 get_mask_policy_for_pred (enum predication_type_index pred)
 {
   if (pred == PRED_TYPE_tumu || pred == PRED_TYPE_mu)
diff --git a/gcc/config/riscv/riscv-vector-builtins.h 
b/gcc/config/riscv/riscv-vector-builtins.h

index ede08c6a480..135e2463b1e 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -433,6 +433,8 @@ extern const char *const operand_suffixes[NUM_OP_TYPES];
 extern const rvv_builtin_suffixes type_suffixes[NUM_VECTOR_TYPES + 1];
 extern const char *const predication_suffixes[NUM_PRED_TYPES];
 extern rvv_builtin_types_t builtin_types[NUM_VECTOR_TYPES + 1];
+extern rtx get_tail_policy_for_pred (enum predication_type_index pred);
+extern rtx get_mask_policy_for_pred (enum predication_type_index pred);

 inline bool
 function_instance::operator!= (const function_instance &other) const
--
2.34.1



[PATCH 01/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison

This patch adds foundational support in the form of:

1. New predicates

2. New function prototypes

3. Exporting emit_vlmax_vsetvl to global scope

4. Add a new command line option -mriscv_vector_lmul

gcc/ChangeLog:

    * config/riscv/riscv-protos.h (riscv_classify_vlmul_field):
    New external declaration.
    (riscv_vector_preferred_simd_mode): Ditto.
    (riscv_tuple_mode_p): Ditto.
    (riscv_vector_mask_mode_p): Ditto.
    (riscv_classify_nf): Ditto.
    (riscv_vlmul_regsize): Ditto.
    (riscv_vector_preferred_simd_mode): Ditto.
    (riscv_vector_get_mask_mode): Ditto.
    (emit_vlmax_vsetvl): Ditto.
    (get_mask_policy_no_pred): Ditto.
    (get_tail_policy_no_pred): Ditto.
    * config/riscv/riscv-opts.h (riscv_vector_bits_enum): New enum.
    (riscv_vector_lmul_enum): Ditto.
    (vlmul_field_enum): Ditto.
    * config/riscv/riscv-v.cc (emit_vlmax_vsetvl):
    Remove static scope.
    * config/riscv/riscv.opt (riscv_vector_lmul):
    New option -mriscv_vector_lmul.
    * config/riscv/predicates.md (p_reg_or_const_csr_operand):
    New predicate.
    (vector_reg_or_const_dup_operand): Ditto.

---
 gcc/config/riscv/predicates.md  | 13 +++
 gcc/config/riscv/riscv-opts.h   | 40 +
 gcc/config/riscv/riscv-protos.h | 16 +
 gcc/config/riscv/riscv-v.cc |  2 +-
 gcc/config/riscv/riscv.opt  | 20 +
 5 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 7bc7c0b4f4d..31517ae4606 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -264,6 +264,14 @@
 })

 ;; Predicates for the V extension.
+(define_special_predicate "p_reg_or_const_csr_operand"
+  (match_code "reg, subreg, const_int")
+{
+  if (CONST_INT_P (op))
+    return satisfies_constraint_K (op);
+  return GET_MODE (op) == Pmode;
+})
+
 (define_special_predicate "vector_length_operand"
   (ior (match_operand 0 "pmode_register_operand")
    (match_operand 0 "const_csr_operand")))
@@ -287,6 +295,11 @@
   (ior (match_operand 0 "register_operand")
    (match_test "op == CONSTM1_RTX (GET_MODE (op))")))

+(define_predicate "vector_reg_or_const_dup_operand"
+  (ior (match_operand 0 "register_operand")
+   (match_test "const_vec_duplicate_p (op)
+  && !CONST_POLY_INT_P (CONST_VECTOR_ELT (op, 0))")))
+
 (define_predicate "vector_mask_operand"
   (ior (match_operand 0 "register_operand")
    (match_operand 0 "vector_all_trues_mask_operand")))
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index ff398c0a2ae..2057a14e153 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -67,6 +67,46 @@ enum stack_protector_guard {
   SSP_GLOBAL            /* global canary */
 };

+/* RVV vector register sizes.  */
+enum riscv_vector_bits_enum
+{
+  RVV_SCALABLE,
+  RVV_NOT_IMPLEMENTED = RVV_SCALABLE,
+  RVV_64 = 64,
+  RVV_128 = 128,
+  RVV_256 = 256,
+  RVV_512 = 512,
+  RVV_1024 = 1024,
+  RVV_2048 = 2048,
+  RVV_4096 = 4096,
+  RVV_8192 = 8192,
+  RVV_16384 = 16384,
+  RVV_32768 = 32768,
+  RVV_65536 = 65536
+};
+
+/* vectorization factor.  */
+enum riscv_vector_lmul_enum
+{
+  RVV_LMUL1 = 1,
+  RVV_LMUL2 = 2,
+  RVV_LMUL4 = 4,
+  RVV_LMUL8 = 8
+};
+
+enum vlmul_field_enum
+{
+  VLMUL_FIELD_000, /* LMUL = 1 */
+  VLMUL_FIELD_001, /* LMUL = 2 */
+  VLMUL_FIELD_010, /* LMUL = 4 */
+  VLMUL_FIELD_011, /* LMUL = 8 */
+  VLMUL_FIELD_100, /* RESERVED */
+  VLMUL_FIELD_101, /* LMUL = 1/8 */
+  VLMUL_FIELD_110, /* LMUL = 1/4 */
+  VLMUL_FIELD_111, /* LMUL = 1/2 */
+  MAX_VLMUL_FIELD
+};
+
 #define MASK_ZICSR    (1 << 0)
 #define MASK_ZIFENCEI (1 << 1)

diff --git a/gcc/config/riscv/riscv-protos.h 
b/gcc/config/riscv/riscv-protos.h

index 37c634eca1d..70c8dc4ce69 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -200,4 +200,19 @@ const unsigned int RISCV_BUILTIN_SHIFT = 1;
 /* Mask that selects the riscv_builtin_class part of a function code.  */
 const unsigned int RISCV_BUILTIN_CLASS = (1 << RISCV_BUILTIN_SHIFT) - 1;

+/* Routines implemented in riscv-v.cc*/
+
+namespace riscv_vector {
+extern unsigned int riscv_classify_vlmul_field (enum machine_mode m);
+extern machine_mode riscv_vector_preferred_simd_mode (scalar_mode mode, 
unsigned vf);

+extern bool riscv_tuple_mode_p (machine_mode);
+extern bool riscv_vector_mask_mode_p (machine_mode);
+extern int riscv_classify_nf (machine_mode);
+extern int riscv_vlmul_regsize(machine_mode);
+extern machine_mode riscv_vector_preferred_simd_mode (scalar_mode mode, 
unsigned vf);

+extern opt_machine_mode riscv_vector_get_mask_mode (machine_mode mode);
+extern rtx emit_vlmax_vsetvl (machine_mode vmode);
+extern rtx get_mask_policy_no_pred ();
+extern rtx get_tail_policy_no_pred ();
+}
 #endif /* ! GCC_RISCV_PROTOS_H */
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 59c25c65cd5..58007cc16eb 100644
--- a/gcc/config/r

[PATCH 00/07] RISC-V: Add auto-vectorization support

2023-03-02 Thread Michael Collison
This series of patches adds foundational support for RISC-V 
autovectorization. These patches are based on the current upstream rvv 
vector intrinsic support and is not a new implementation. Most of the 
implementation consists of adding the new vector cost model, the 
autovectorization patterns themselves and target hooks.


This implementation only provides support for integer addition and 
subtraction as a proof of concept.


As discussed on this list, if these patches are approved they will be 
merged into a "auto-vectorization" branch once gcc-13 branches for release.


There are two known issues related to crashes (assert failures) 
associated with tree vectorization; one of which I have sent a patch for 
and have received feedback. I will be sending a patch for the second 
issue tomorrow.



 gcc/common/config/riscv/riscv-common.cc   |   2 +-
 gcc/config.gcc    |   2 +-
 gcc/config/riscv/predicates.md    |  13 +
 gcc/config/riscv/riscv-cores.def  |  14 +-
 gcc/config/riscv/riscv-opts.h |  40 ++
 gcc/config/riscv/riscv-protos.h   |  15 +
 gcc/config/riscv/riscv-v.cc   | 178 -
 gcc/config/riscv/riscv-vector-builtins.cc |   4 +-
 gcc/config/riscv/riscv-vector-builtins.h  |   2 +
 gcc/config/riscv/riscv-vector-cost.cc | 620 ++
 gcc/config/riscv/riscv-vector-cost.h  | 400 +++
 gcc/config/riscv/riscv.cc | 321 -
 gcc/config/riscv/riscv.md |   1 +
 gcc/config/riscv/riscv.opt    |  20 +
 gcc/config/riscv/t-riscv  |   5 +
 gcc/config/riscv/vector-auto.md   | 172 +
 gcc/config/riscv/vector-iterators.md  |   2 +
 gcc/config/riscv/vector.md    |   4 +-
 .../riscv/rvv/autovec/loop-add-rv32.c |  24 +
 .../gcc.target/riscv/rvv/autovec/loop-add.c   |  24 +
 .../riscv/rvv/autovec/loop-sub-rv32.c |  24 +
 .../gcc.target/riscv/rvv/autovec/loop-sub.c   |  24 +
 22 files changed, 1893 insertions(+), 18 deletions(-)
 create mode 100644 gcc/config/riscv/riscv-vector-cost.cc
 create mode 100644 gcc/config/riscv/riscv-vector-cost.h
 create mode 100644 gcc/config/riscv/vector-auto.md
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add-rv32.c

 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-add.c
 create mode 100644 
gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub-rv32.c

 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/loop-sub.c



Re: [PATCH] LoongArch: Stop -mfpu from silently breaking ABI

2023-03-02 Thread Xi Ruoyao via Gcc-patches
On Fri, 2023-03-03 at 10:12 +0800, Yujie Yang wrote:
> However, "loongarch64" is defined to include the "fpu64" ISA module[1]
> (i.e. enable "-mfpu=64" when -mfpu is not explicitly used). So I believe
> the above behavior you observed is expected.

Ah this make things much simpler and aligns with my gut feeling :).  I
can drop the change in loongarch-opts.cc now.  And the smaller changeset
also makes me more confident about a backport to gcc-12.

V2 patch is being tested and I'll send it after the testing.  Meanwhile
I created PR109000 to track the issue.

-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


RE: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-03-02 Thread Li, Pan2 via Gcc-patches
Got it. Thank you and very appreciate for your help and patient. Updated the 
PATCH to below link.

https://gcc.gnu.org/pipermail/gcc-patches/2023-March/613257.html

Pan

-Original Message-
From: Richard Sandiford  
Sent: Friday, March 3, 2023 1:55 AM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
rguent...@suse.de
Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

"Li, Pan2"  writes:
> Oops, looks I missed that part for assertion. Thank you for coaching.
> Added and tested the below changes at the end of emit_mode_adjustments 
> already but looks we may have other problems about the size, the precision 
> and the C types.
>
> Looks like I need to hold this PATCH for a while until we have a conclusion. 
> Feel free to let me know if there is mistake or misleading.
>
> + 
> +  for_all_modes (c, m)
> +printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
> +   " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, 
> + m->name);
> +

Using:

  gcc_assert (maybe_ne (mode_size[E_%smode], -1));

would be simpler.  We might as well make it a full assert (rather than a 
checking assert) because this code isn't executed very often.

Thanks,
Richard

>
> Thank you and have a nice day!
>
> Pan
>
>
> -Original Message-
> From: Richard Sandiford 
> Sent: Thursday, March 2, 2023 5:44 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; 
> kito.ch...@sifive.com; rguent...@suse.de
> Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision 
> adjustment
>
> pan2...@intel.com writes:
>> From: Pan Li 
>>
>>  Fix the bug of the rvv bool mode precision with the adjustment.
>>  The bits size of vbool*_t will be adjusted to
>>  [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>>  adjusted mode precison of vbool*_t will help underlying pass to
>>  make the right decision for both the correctness and optimization.
>>
>>  Given below sample code:
>>  void test_1(int8_t * restrict in, int8_t * restrict out)
>>  {
>>vbool8_t v2 = *(vbool8_t*)in;
>>vbool16_t v5 = *(vbool16_t*)in;
>>*(vbool16_t*)(out + 200) = v5;
>>*(vbool8_t*)(out + 100) = v2;
>>  }
>>
>>  Before the precision adjustment:
>>  addia4,a1,100
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  addia1,a1,200
>>  vlm.v   v24,0(a0)
>>  vsm.v   v24,0(a4)
>>  // Need one vsetvli and vlm.v for correctness here.
>>  vsm.v   v24,0(a1)
>>
>>  After the precision adjustment:
>>  csrrt0,vlenb
>>  sllit1,t0,1
>>  csrra3,vlenb
>>  sub sp,sp,t1
>>  sllia4,a3,1
>>  add a4,a4,sp
>>  sub a3,a4,a3
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  addia2,a1,200
>>  vlm.v   v24,0(a0)
>>  vsm.v   v24,0(a3)
>>  addia1,a1,100
>>  vsetvli a4,zero,e8,mf2,ta,ma
>>  csrrt0,vlenb
>>  vlm.v   v25,0(a3)
>>  vsm.v   v25,0(a2)
>>  sllit1,t0,1
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  vsm.v   v24,0(a1)
>>  add sp,sp,t1
>>  jr  ra
>>
>>  However, there may be some optimization opportunates after
>>  the mode precision adjustment. It can be token care of in
>>  the RISC-V backend in the underlying separted PR(s).
>>
>>  PR 108185
>>  PR 108654
>>
>> gcc/ChangeLog:
>>
>>  * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>>  * config/riscv/riscv.cc (riscv_v_adjust_precision):
>>  * config/riscv/riscv.h (riscv_v_adjust_precision):
>>  * genmodes.cc (ADJUST_PRECISION):
>>  (emit_mode_adjustments):
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/riscv/pr108185-1.c: New test.
>>  * gcc.target/riscv/pr108185-2.c: New test.
>>  * gcc.target/riscv/pr108185-3.c: New test.
>>  * gcc.target/riscv/pr108185-4.c: New test.
>>  * gcc.target/riscv/pr108185-5.c: New test.
>>  * gcc.target/riscv/pr108185-6.c: New test.
>>  * gcc.target/riscv/pr108185-7.c: New test.
>>  * gcc.target/riscv/pr108185-8.c: New test.
>>
>> Signed-off-by: Pan Li 
>> ---
>>  gcc/config/riscv/riscv-modes.def|  8 +++
>>  gcc/config/riscv/riscv.cc   | 12 
>>  gcc/config/riscv/riscv.h|  1 +
>>  gcc/genmodes.cc | 20 +-
>>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++ 
>> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 
>> +
>>  12 files changed, 592 insertions(+), 2

[PATCH v3] RISC-V: Bugfix for rvv bool mode precision adjustment

2023-03-02 Thread pan2.li--- via Gcc-patches
From: Pan Li 

Fix the bug of the rvv bool mode precision with the adjustment.
The bits size of vbool*_t will be adjusted to
[1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
adjusted mode precison of vbool*_t will help underlying pass to
make the right decision for both the correctness and optimization.

Given below sample code:
void test_1(int8_t * restrict in, int8_t * restrict out)
{
  vbool8_t v2 = *(vbool8_t*)in;
  vbool16_t v5 = *(vbool16_t*)in;
  *(vbool16_t*)(out + 200) = v5;
  *(vbool8_t*)(out + 100) = v2;
}

Before the precision adjustment:
addia4,a1,100
vsetvli a5,zero,e8,m1,ta,ma
addia1,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a4)
// Need one vsetvli and vlm.v for correctness here.
vsm.v   v24,0(a1)

After the precision adjustment:
csrrt0,vlenb
sllit1,t0,1
csrra3,vlenb
sub sp,sp,t1
sllia4,a3,1
add a4,a4,sp
sub a3,a4,a3
vsetvli a5,zero,e8,m1,ta,ma
addia2,a1,200
vlm.v   v24,0(a0)
vsm.v   v24,0(a3)
addia1,a1,100
vsetvli a4,zero,e8,mf2,ta,ma
csrrt0,vlenb
vlm.v   v25,0(a3)
vsm.v   v25,0(a2)
sllit1,t0,1
vsetvli a5,zero,e8,m1,ta,ma
vsm.v   v24,0(a1)
add sp,sp,t1
jr  ra

However, there may be some optimization opportunates after
the mode precision adjustment. It can be token care of in
the RISC-V backend in the underlying separted PR(s).

PR 108185
PR 108654

gcc/ChangeLog:

* config/riscv/riscv-modes.def (ADJUST_PRECISION):
* config/riscv/riscv.cc (riscv_v_adjust_precision):
* config/riscv/riscv.h (riscv_v_adjust_precision):
* genmodes.cc (ADJUST_PRECISION):
(emit_mode_adjustments):

gcc/testsuite/ChangeLog:

* gcc.target/riscv/pr108185-1.c: New test.
* gcc.target/riscv/pr108185-2.c: New test.
* gcc.target/riscv/pr108185-3.c: New test.
* gcc.target/riscv/pr108185-4.c: New test.
* gcc.target/riscv/pr108185-5.c: New test.
* gcc.target/riscv/pr108185-6.c: New test.
* gcc.target/riscv/pr108185-7.c: New test.
* gcc.target/riscv/pr108185-8.c: New test.

Signed-off-by: Pan Li 
---
 gcc/config/riscv/riscv-modes.def|  8 +++
 gcc/config/riscv/riscv.cc   | 12 
 gcc/config/riscv/riscv.h|  1 +
 gcc/genmodes.cc | 26 ++-
 gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
 gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
 12 files changed, 598 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c

diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index d5305efa8a6..110bddce851 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * riscv_bytes_per_vector_chunk);
 ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
 
+ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
+ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
+ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
+ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
+ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
+ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
+ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
+
 /*
| Mode| MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
| | LMUL| SEW/LMUL| LMUL| SEW/LMUL|
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index f11b7949a4

Re: [PATCH] LoongArch: Stop -mfpu from silently breaking ABI

2023-03-02 Thread Yujie Yang
On Fri, Mar 03, 2023 at 12:01:22AM +0800, Xi Ruoyao via Gcc-patches wrote:
> But then it causes "-mabi=lp64s -march=loongarch64" to generate code like:
> 
>   movgr2fr.d $fa0, $a0
>   frecip.d   $fa0, $fa0
>   movfr2gr.d $a0, $fa0
> 
> The problem here is "loongarch64" is never strictly defined.  So we
> consider "loongarch64" a "64-bit LoongArch CPU with the simplest FPU
> needed by the ABI", and if -march=loongarch64 but -mfpu is not
> explicitly used, we set -mfpu such a simplest one.

Thanks for the fix on TARGET_*_FLOAT_ABI usage! Certainly more testing
needs to be done on the soft-float side.

However, "loongarch64" is defined to include the "fpu64" ISA module[1]
(i.e. enable "-mfpu=64" when -mfpu is not explicitly used). So I believe
the above behavior you observed is expected.

[1] Table 5. Target CPU Models,
https://loongson.github.io/LoongArch-Documentation/LoongArch-toolchain-conventions-EN.html



[committed] d: vector float comparison doesn't result in 0 or -1 [PR108945]

2023-03-02 Thread Iain Buclaw via Gcc-patches
Hi,

When comparing two vectors, the type of vector was used as the result of
the condition result.  This meant that for floating point comparisons,
each value would either be `0.0' or `-1.0' reinterpreted as an integer,
not the expected integral bitmask values `0' and `-1'.

Instead, use the comparison type determined by truth_type_for as the
result of the comparison.  If a reinterpret is later required by the
final conversion for generating CmpExp, it is still only going to
reinterpret one integer kind as another.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32, and
committed to mainline.

Regards,
Iain.

---
PR d/108945

gcc/d/ChangeLog:

* d-codegen.cc (build_boolop): Evaluate vector comparison as
the truth_type_for vector type.

gcc/testsuite/ChangeLog:

* gdc.dg/pr108945.d: New test.
---
 gcc/d/d-codegen.cc  |  9 -
 gcc/testsuite/gdc.dg/pr108945.d | 12 
 2 files changed, 16 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gdc.dg/pr108945.d

diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
index 0e8e07366ee..5a041927ec9 100644
--- a/gcc/d/d-codegen.cc
+++ b/gcc/d/d-codegen.cc
@@ -1453,13 +1453,12 @@ build_boolop (tree_code code, tree arg0, tree arg1)
 {
   /* Build a vector comparison.
 VEC_COND_EXPR ; */
-  tree type = TREE_TYPE (arg0);
-  tree cmptype = truth_type_for (type);
+  tree cmptype = truth_type_for (TREE_TYPE (arg0));
   tree cmp = fold_build2_loc (input_location, code, cmptype, arg0, arg1);
 
-  return fold_build3_loc (input_location, VEC_COND_EXPR, type, cmp,
- build_minus_one_cst (type),
- build_zero_cst (type));
+  return fold_build3_loc (input_location, VEC_COND_EXPR, cmptype, cmp,
+ build_minus_one_cst (cmptype),
+ build_zero_cst (cmptype));
 }
 
   if (code == EQ_EXPR || code == NE_EXPR)
diff --git a/gcc/testsuite/gdc.dg/pr108945.d b/gcc/testsuite/gdc.dg/pr108945.d
new file mode 100644
index 000..03b9de8e758
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr108945.d
@@ -0,0 +1,12 @@
+// { dg-options "-fdump-tree-gimple" }
+// { dg-additional-options "-mavx" { target avx_runtime } }
+// { dg-do compile { target { avx_runtime || vect_sizes_16B_8B } } }
+
+alias f4 = __vector(float[4]);
+
+auto pr108945(f4 a, f4 b)
+{
+return a < b;
+}
+
+// { dg-final { scan-tree-dump-not "VEC_COND_EXPR" "gimple" } }
-- 
2.37.2



[committed] d: Fix ICE on explicit immutable struct import [PR10887]

2023-03-02 Thread Iain Buclaw via Gcc-patches
Hi,

This patch fixes an ICE in the D front-end when importing an immutable
struct.  Const and immutable types are built as variants of the type
they are derived from, and TYPE_STUB_DECL is not set for these variants.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32, committed
to mainline, and backported to the release branches for gcc-10, gcc-11,
and gcc-12.

Regards,
Iain.

---
PR d/108877

gcc/d/ChangeLog:

* imports.cc (ImportVisitor::visit (EnumDeclaration *)): Call
make_import on TYPE_MAIN_VARIANT.
(ImportVisitor::visit (AggregateDeclaration *)): Likewise.
(ImportVisitor::visit (ClassDeclaration *)): Likewise.

gcc/testsuite/ChangeLog:

* gdc.dg/imports/pr108877a.d: New test.
* gdc.dg/pr108877.d: New test.
---
 gcc/d/imports.cc | 7 ++-
 gcc/testsuite/gdc.dg/imports/pr108877a.d | 6 ++
 gcc/testsuite/gdc.dg/pr108877.d  | 9 +
 3 files changed, 21 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gdc.dg/imports/pr108877a.d
 create mode 100644 gcc/testsuite/gdc.dg/pr108877.d

diff --git a/gcc/d/imports.cc b/gcc/d/imports.cc
index 3b46d1b7560..2efef4ed54f 100644
--- a/gcc/d/imports.cc
+++ b/gcc/d/imports.cc
@@ -106,12 +106,16 @@ public:
 tree type = build_ctype (d->type);
 /* Not all kinds of D enums create a TYPE_DECL.  */
 if (TREE_CODE (type) == ENUMERAL_TYPE)
-  this->result_ = this->make_import (TYPE_STUB_DECL (type));
+  {
+   type = TYPE_MAIN_VARIANT (type);
+   this->result_ = this->make_import (TYPE_STUB_DECL (type));
+  }
   }
 
   void visit (AggregateDeclaration *d) final override
   {
 tree type = build_ctype (d->type);
+type = TYPE_MAIN_VARIANT (type);
 this->result_ = this->make_import (TYPE_STUB_DECL (type));
   }
 
@@ -119,6 +123,7 @@ public:
   {
 /* Want the RECORD_TYPE, not POINTER_TYPE.  */
 tree type = TREE_TYPE (build_ctype (d->type));
+type = TYPE_MAIN_VARIANT (type);
 this->result_ = this->make_import (TYPE_STUB_DECL (type));
   }
 
diff --git a/gcc/testsuite/gdc.dg/imports/pr108877a.d 
b/gcc/testsuite/gdc.dg/imports/pr108877a.d
new file mode 100644
index 000..a23c78ddf84
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/imports/pr108877a.d
@@ -0,0 +1,6 @@
+immutable struct ImmutableS { }
+const struct ConstS { }
+immutable class ImmutableC { }
+const class ConstC { }
+immutable enum ImmutableE { _ }
+const enum ConstE { _ }
diff --git a/gcc/testsuite/gdc.dg/pr108877.d b/gcc/testsuite/gdc.dg/pr108877.d
new file mode 100644
index 000..710551f3f9a
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr108877.d
@@ -0,0 +1,9 @@
+// { dg-options "-I $srcdir/gdc.dg" }
+// { dg-do compile }
+import imports.pr108877a :
+ImmutableS,
+ConstS,
+ImmutableC,
+ConstC,
+ImmutableE,
+ConstE;
-- 
2.37.2



[committed] d: Allow vectors to be compared for identity (PR108946)

2023-03-02 Thread Iain Buclaw via Gcc-patches
Hi,

Vector equality and comparisons are now accepted by the language
implementation, but identity wasn't.  This patch implements it as an
extra integer comparison of the bit-casted bitmask.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32, and
committed to mainline.

Regards,
Iain.

---
PR d/108946

gcc/d/ChangeLog:

* d-target.cc (Target::isVectorOpSupported): Allow identity ops.
* expr.cc (ExprVisitor::visit (IdentityExp *)): Handle vector identity
comparisons.

gcc/testsuite/ChangeLog:

* gdc.dg/simd2a.d: Update test.
* gdc.dg/simd2b.d: Likewise.
* gdc.dg/simd2c.d: Likewise.
* gdc.dg/simd2d.d: Likewise.
* gdc.dg/simd2e.d: Likewise.
* gdc.dg/simd2f.d: Likewise.
* gdc.dg/simd2g.d: Likewise.
* gdc.dg/simd2h.d: Likewise.
* gdc.dg/simd2i.d: Likewise.
* gdc.dg/simd2j.d: Likewise.
---
 gcc/d/d-target.cc |  5 -
 gcc/d/expr.cc | 25 +
 gcc/testsuite/gdc.dg/simd2a.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2b.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2c.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2d.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2e.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2f.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2g.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2h.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2i.d |  5 +++--
 gcc/testsuite/gdc.dg/simd2j.d |  5 +++--
 12 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc
index 5eab5706ead..4c7a212703e 100644
--- a/gcc/d/d-target.cc
+++ b/gcc/d/d-target.cc
@@ -323,11 +323,6 @@ Target::isVectorOpSupported (Type *type, EXP op, Type *)
   /* Logical operators must have a result type of bool.  */
   return false;
 
-case EXP::identity:
-case EXP::notIdentity:
-  /* Comparison operators must have a result type of bool.  */
-  return false;
-
 default:
   break;
 }
diff --git a/gcc/d/expr.cc b/gcc/d/expr.cc
index c8ec37d7103..4311edcc2d6 100644
--- a/gcc/d/expr.cc
+++ b/gcc/d/expr.cc
@@ -313,6 +313,31 @@ public:
 
this->result_ = build_struct_comparison (code, ts->sym, t1, t2);
   }
+else if (tb1->ty == TY::Tvector && tb2->ty == TY::Tvector)
+  {
+   /* For vectors, identity is defined as all values being equal.  */
+   tree t1 = build_expr (e->e1);
+   tree t2 = build_expr (e->e2);
+   tree mask = build_boolop (code, t1, t2);
+
+   /* To reinterpret the vector comparison as a boolean expression, bitcast
+  the bitmask result and generate an additional integer comparison.  */
+   opt_scalar_int_mode mode =
+ int_mode_for_mode (TYPE_MODE (TREE_TYPE (mask)));
+   gcc_assert (mode.exists ());
+
+   tree type = lang_hooks.types.type_for_mode (mode.require (), 1);
+   if (type == NULL_TREE)
+ type = make_unsigned_type (GET_MODE_BITSIZE (mode.require ()));
+
+   /* In `t1 is t2', all mask bits must be set for vectors to be equal.
+  Otherwise any bit set is enough for vectors to be not-equal.  */
+   tree mask_eq = (code == EQ_EXPR)
+ ? build_all_ones_cst (type) : build_zero_cst (type);
+
+   this->result_ = build_boolop (code, mask_eq,
+ build_vconvert (type, mask));
+  }
 else
   {
/* For operands of other types, identity is defined as being the
diff --git a/gcc/testsuite/gdc.dg/simd2a.d b/gcc/testsuite/gdc.dg/simd2a.d
index 373d5d1e229..d47175fd38b 100644
--- a/gcc/testsuite/gdc.dg/simd2a.d
+++ b/gcc/testsuite/gdc.dg/simd2a.d
@@ -5,6 +5,7 @@ import core.simd;
 void test2a()
 {
 byte16 v1, v2 = 1, v3 = 1;
+bool b1;
 v1 = v2;
 v1 = v2 + v3;
 v1 = v2 - v3;
@@ -16,8 +17,8 @@ void test2a()
 v1 = v2 ^ v3;
 static assert(!__traits(compiles, v1 ~ v2));
 static assert(!__traits(compiles, v1 ^^ v2));
-static assert(!__traits(compiles, v1 is v2));
-static assert(!__traits(compiles, v1 !is v2));
+b1 = v1 is v2;
+b1 = v1 !is v2;
 static assert( __traits(compiles, v1 == v2));
 static assert( __traits(compiles, v1 != v2));
 static assert( __traits(compiles, v1 < v2));
diff --git a/gcc/testsuite/gdc.dg/simd2b.d b/gcc/testsuite/gdc.dg/simd2b.d
index e72da0d9b77..a1b2a10caaf 100644
--- a/gcc/testsuite/gdc.dg/simd2b.d
+++ b/gcc/testsuite/gdc.dg/simd2b.d
@@ -5,6 +5,7 @@ import core.simd;
 void test2b()
 {
 ubyte16 v1, v2 = 1, v3 = 1;
+bool b1;
 v1 = v2;
 v1 = v2 + v3;
 v1 = v2 - v3;
@@ -16,8 +17,8 @@ void test2b()
 v1 = v2 ^ v3;
 static assert(!__traits(compiles, v1 ~ v2));
 static assert(!__traits(compiles, v1 ^^ v2));
-static assert(!__traits(compiles, v1 is v2));
-static assert(!__traits(compiles, v1 !is v2));
+b1 = v1 is v2;
+b1 = v1 !is v2;
 static assert( __traits(compiles, v1 == v2));
 static assert( __traits(compiles, v1 != v2));
 static assert( __traits(compiles, 

[committed] d: Add test for PR d/108167 to the testsuite [PR108167]

2023-03-02 Thread Iain Buclaw via Gcc-patches
Hi,

This patch adds the test for checking PR108167.  The D front-end
implementation got fixed in upstream, add test to the gdc testsuite to
check we don't regress on it.

Regression tested on x86_64-linux-gnu/-m32, and committed to mainline.

Regards,
Iain.

---
PR d/108167

gcc/testsuite/ChangeLog:

* gdc.dg/pr108167.d: New test.
---
 gcc/testsuite/gdc.dg/pr108167.d | 5 +
 1 file changed, 5 insertions(+)
 create mode 100644 gcc/testsuite/gdc.dg/pr108167.d

diff --git a/gcc/testsuite/gdc.dg/pr108167.d b/gcc/testsuite/gdc.dg/pr108167.d
new file mode 100644
index 000..1337a494171
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr108167.d
@@ -0,0 +1,5 @@
+// { dg-do compile }
+auto pr108167(const(ubyte[32])[] a)
+{
+return cast(const(ubyte)*)&a[1][0];
+}
-- 
2.37.2



Re: [V4][PATCH 2/2] Update documentation to clarify a GCC extension

2023-03-02 Thread Qing Zhao via Gcc-patches
Ping.

Qing

> On Feb 24, 2023, at 1:35 PM, Qing Zhao  wrote:
> 
> on a structure with a C99 flexible array member being nested in
> another structure.
> 
> "GCC extension accepts a structure containing an ISO C99 "flexible array
> member", or a union containing such a structure (possibly recursively)
> to be a member of a structure.
> 
> There are two situations:
> 
>   * The structure with a C99 flexible array member is the last field of
> another structure, for example:
> 
>  struct flex  { int length; char data[]; };
>  union union_flex { int others; struct flex f; };
> 
>  struct out_flex_struct { int m; struct flex flex_data; };
>  struct out_flex_union { int n; union union_flex flex_data; };
> 
> In the above, both 'out_flex_struct.flex_data.data[]' and
> 'out_flex_union.flex_data.f.data[]' are considered as flexible
> arrays too.
> 
>   * The structure with a C99 flexible array member is the middle field
> of another structure, for example:
> 
>  struct flex  { int length; char data[]; };
> 
>  struct mid_flex { int m; struct flex flex_data; int n; };
> 
> In the above, 'mid_flex.flex_data.data[]' is allowed to be extended
> flexibly to the padding.  E.g, up to 4 elements.
> 
> However, relying on space in struct padding is a bad programming
> practice, compilers do not handle such extension consistently, Any
> code relying on this behavior should be modified to ensure that
> flexible array members only end up at the ends of structures.
> 
> Please use warning option '-Wgnu-variable-sized-type-not-at-end' to
> identify all such cases in the source code and modify them.  This
> extension will be deprecated from gcc in the next release.
> "
> 
> gcc/c-family/ChangeLog:
> 
>   * c.opt: New option -Wgnu-variable-sized-type-not-at-end.
> 
> gcc/c/ChangeLog:
> 
>   * c-decl.cc (finish_struct): Issue warnings for new option.
> 
> gcc/ChangeLog:
> 
>   * doc/extend.texi: Document GCC extension on a structure containing
>   a flexible array member to be a member of another structure.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/variable-sized-type-flex-array.c: New test.
> ---
> gcc/c-family/c.opt|  5 ++
> gcc/c/c-decl.cc   |  7 +++
> gcc/doc/extend.texi   | 48 ++-
> .../gcc.dg/variable-sized-type-flex-array.c   | 31 
> 4 files changed, 90 insertions(+), 1 deletion(-)
> create mode 100644 gcc/testsuite/gcc.dg/variable-sized-type-flex-array.c
> 
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index cddeece..660ac07f3d4 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -737,6 +737,11 @@ Wformat-truncation=
> C ObjC C++ LTO ObjC++ Joined RejectNegative UInteger Var(warn_format_trunc) 
> Warning LangEnabledBy(C ObjC C++ LTO ObjC++,Wformat=, warn_format >= 1, 0) 
> IntegerRange(0, 2)
> Warn about calls to snprintf and similar functions that truncate output.
> 
> +Wgnu-variable-sized-type-not-at-end
> +C C++ Var(warn_variable_sized_type_not_at_end) Warning
> +Warn about structures or unions with C99 flexible array members are not
> +at the end of a structure.
> +
> Wif-not-aligned
> C ObjC C++ ObjC++ Var(warn_if_not_aligned) Init(1) Warning
> Warn when the field in a struct is not aligned.
> diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
> index f589a2f5192..c5b54f07965 100644
> --- a/gcc/c/c-decl.cc
> +++ b/gcc/c/c-decl.cc
> @@ -9296,6 +9296,13 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
> tree attributes,
>  && is_last_field)
>   TYPE_INCLUDE_FLEXARRAY (t) = true;
> 
> +  if (warn_variable_sized_type_not_at_end
> +   && !is_last_field
> +   && TYPE_INCLUDE_FLEXARRAY (TREE_TYPE (x)))
> + warning_at (DECL_SOURCE_LOCATION (x),
> + OPT_Wgnu_variable_sized_type_not_at_end,
> + "variable sized type not at the end of a struct");
> +
>   if (DECL_NAME (x)
> || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
>   saw_named_field = true;
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index c1122916255..e278148c332 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -1748,7 +1748,53 @@ Flexible array members may only appear as the last 
> member of a
> A structure containing a flexible array member, or a union containing
> such a structure (possibly recursively), may not be a member of a
> structure or an element of an array.  (However, these uses are
> -permitted by GCC as extensions.)
> +permitted by GCC as extensions, see details below.)
> +@end itemize
> +
> +GCC extension accepts a structure containing an ISO C99 @dfn{flexible array
> +member}, or a union containing such a structure (possibly recursively)
> +to be a member of a structure.
> +
> +There are two situations:
> +
> +@itemize @bullet
> +@item
> +The structure with a C99 flexible array me

Re: [v4][PATCH 1/2] Handle component_ref to a structre/union field including C99 FAM [PR101832]

2023-03-02 Thread Qing Zhao via Gcc-patches
Ping.

Qing

> On Feb 24, 2023, at 1:35 PM, Qing Zhao  wrote:
> 
> GCC extension accepts the case when a struct with a C99 flexible array member
> is embedded into another struct or union (possibly recursively).
> __builtin_object_size should treat such struct as flexible size.
> 
> gcc/c/ChangeLog:
> 
>   PR tree-optimization/101832
>   * c-decl.cc (finish_struct): Set TYPE_INCLUDE_FLEXARRAY for
>   struct/union type.
> 
> gcc/cp/ChangeLog:
> 
>   PR tree-optimization/101832
>   * module.cc (trees_out::core_bools): Stream out new bit
>   type_include_flexarray.
>   (trees_in::core_bools): Stream in new bit type_include_flexarray.
> 
> gcc/ChangeLog:
> 
>   PR tree-optimization/101832
>   * print-tree.cc (print_node): Print new bit type_include_flexarray.
>   * tree-core.h (struct tree_type_common): New bit
>   type_include_flexarray.
>   * tree-object-size.cc (addr_object_size): Handle structure/union type
>   when it has flexible size.
>   * tree-streamer-in.cc (unpack_ts_type_common_value_fields): Stream
>   in new bit type_include_flexarray.
>   * tree-streamer-out.cc (pack_ts_type_common_value_fields): Stream
>   out new bit type_include_flexarray.
>   * tree.h (TYPE_INCLUDE_FLEXARRAY): New macro
>   TYPE_INCLUDE_FLEXARRAY.
> 
> gcc/testsuite/ChangeLog:
> 
>   PR tree-optimization/101832
>   * gcc.dg/builtin-object-size-pr101832.c: New test.
> ---
> gcc/c/c-decl.cc   |  12 ++
> gcc/cp/module.cc  |   2 +
> gcc/print-tree.cc |   5 +
> .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
> gcc/tree-core.h   |   4 +-
> gcc/tree-object-size.cc   |  79 +++
> gcc/tree-streamer-in.cc   |   1 +
> gcc/tree-streamer-out.cc  |   1 +
> gcc/tree.h|   6 +
> 9 files changed, 215 insertions(+), 29 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> 
> diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc
> index 08078eadeb8..f589a2f5192 100644
> --- a/gcc/c/c-decl.cc
> +++ b/gcc/c/c-decl.cc
> @@ -9284,6 +9284,18 @@ finish_struct (location_t loc, tree t, tree fieldlist, 
> tree attributes,
>   /* Set DECL_NOT_FLEXARRAY flag for FIELD_DECL x.  */
>   DECL_NOT_FLEXARRAY (x) = !is_flexible_array_member_p (is_last_field, x);
> 
> +  /* Set TYPE_INCLUDE_FLEXARRAY for the context of x, t
> +   * when x is an array.  */
> +  if (TREE_CODE (TREE_TYPE (x)) == ARRAY_TYPE)
> + TYPE_INCLUDE_FLEXARRAY (t) = flexible_array_member_type_p (TREE_TYPE 
> (x)) ;
> +  /* Recursively set TYPE_INCLUDE_FLEXARRAY for the context of x, t
> +  when x is the last field.  */
> +  else if ((TREE_CODE (TREE_TYPE (x)) == RECORD_TYPE
> + || TREE_CODE (TREE_TYPE (x)) == UNION_TYPE)
> +&& TYPE_INCLUDE_FLEXARRAY (TREE_TYPE (x))
> +&& is_last_field)
> + TYPE_INCLUDE_FLEXARRAY (t) = true;
> +
>   if (DECL_NAME (x)
> || RECORD_OR_UNION_TYPE_P (TREE_TYPE (x)))
>   saw_named_field = true;
> diff --git a/gcc/cp/module.cc b/gcc/cp/module.cc
> index ac2fe66b080..c750361b704 100644
> --- a/gcc/cp/module.cc
> +++ b/gcc/cp/module.cc
> @@ -5371,6 +5371,7 @@ trees_out::core_bools (tree t)
>   WB (t->type_common.lang_flag_5);
>   WB (t->type_common.lang_flag_6);
>   WB (t->type_common.typeless_storage);
> +  WB (t->type_common.type_include_flexarray);
> }
> 
>   if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON))
> @@ -5551,6 +5552,7 @@ trees_in::core_bools (tree t)
>   RB (t->type_common.lang_flag_5);
>   RB (t->type_common.lang_flag_6);
>   RB (t->type_common.typeless_storage);
> +  RB (t->type_common.type_include_flexarray);
> }
> 
>   if (CODE_CONTAINS_STRUCT (code, TS_DECL_COMMON))
> diff --git a/gcc/print-tree.cc b/gcc/print-tree.cc
> index 1f3afcbbc86..efacdb7686f 100644
> --- a/gcc/print-tree.cc
> +++ b/gcc/print-tree.cc
> @@ -631,6 +631,11 @@ print_node (FILE *file, const char *prefix, tree node, 
> int indent,
> && TYPE_CXX_ODR_P (node))
>   fputs (" cxx-odr-p", file);
> 
> +  if ((code == RECORD_TYPE
> +|| code == UNION_TYPE)
> +   && TYPE_INCLUDE_FLEXARRAY (node))
> + fputs (" include-flexarray", file);
> +
>   /* The transparent-union flag is used for different things in
>different nodes.  */
>   if ((code == UNION_TYPE || code == RECORD_TYPE)
> diff --git a/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c 
> b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> new file mode 100644
> index 000..60078e11634
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> @@ -0,0 +1,134 @@
> +/* PR 101832: 
> +   GCC extension accepts the case when a struct with a C99 flexible array
> +   member is embedded into ano

Re: [V4][PATCH 0/2] Handle component_ref to a structure/union field including FAM for builtin_object_size

2023-03-02 Thread Qing Zhao via Gcc-patches
Ping

Qing

> On Feb 24, 2023, at 1:35 PM, Qing Zhao  wrote:
> 
> Hi, Joseph and Richard,
> 
> Could you please review this patch and let me know whether it’s ready
> for committing into GCC13?
> 
> The fix to Bug PR101832 is an important patch for kernel security
> purpose. it's better to be put into GCC13.
> 
> =
> 
> These are the 4th version of the patches for PR101832, to fix
> builtin_object_size to correctly handle component_ref to a
> structure/union field that includes a flexible array member.
> 
> also includes a documentation update for the GCC extension on embedding
> a structure/union with flexible array member into another structure.
> which includes a fix to PR77650.
> 
> compared to the 3rd version of the patch, the major changes are:
> 
> 1. update the documentation part per Joseph's comments.
> 
> compared to the 2nd version of the patch, the major changes are:
> 
> 1. only include C99 flexible array member to this extension, trailing [0], [1]
>  and [4] are all excluded.
> 2. for the new bit type_include_flexarray in tree_type_common, print it
>  and also stream in/out it. 
> 3. update testing cases.
> 4. more clarification on the documentation. warnings for deprecating the 
>  case when the structure with C99 FAM is embedded in the middle of
>  another structure. 
> 5. add a new warning option -Wgnu-variable-sized-type-not-at-end for
>  identifing all such cases.
> 
> bootstrapped and regression tested on aarch64 and x86.
> 
> Okay for commit?
> 
> thanks.
> 
> Qing
> 
> Qing Zhao (2):
>  Handle component_ref to a structre/union field including C99 FAM
>[PR101832]
>  Update documentation to clarify a GCC extension
> 
> gcc/c-family/c.opt|   5 +
> gcc/c/c-decl.cc   |  19 +++
> gcc/cp/module.cc  |   2 +
> gcc/doc/extend.texi   |  48 ++-
> gcc/print-tree.cc |   5 +
> .../gcc.dg/builtin-object-size-pr101832.c | 134 ++
> .../gcc.dg/variable-sized-type-flex-array.c   |  31 
> gcc/tree-core.h   |   4 +-
> gcc/tree-object-size.cc   |  79 +++
> gcc/tree-streamer-in.cc   |   1 +
> gcc/tree-streamer-out.cc  |   1 +
> gcc/tree.h|   6 +
> 12 files changed, 305 insertions(+), 30 deletions(-)
> create mode 100644 gcc/testsuite/gcc.dg/builtin-object-size-pr101832.c
> create mode 100644 gcc/testsuite/gcc.dg/variable-sized-type-flex-array.c
> 
> -- 
> 2.31.1
> 



[committed] testsuite: Fix up memchr-3.c test [PR108991]

2023-03-02 Thread Jakub Jelinek via Gcc-patches
On Thu, Mar 02, 2023 at 01:43:30PM +, Jonathan Yong via Gcc-patches wrote:
> On 3/2/23 10:46, Richard Sandiford wrote:
> > > diff --git a/gcc/testsuite/gcc.dg/memchr-3.c 
> > > b/gcc/testsuite/gcc.dg/memchr-3.c
> > > index c38d9cf3349..af1b26ef3ae 100644
> > > --- a/gcc/testsuite/gcc.dg/memchr-3.c
> > > +++ b/gcc/testsuite/gcc.dg/memchr-3.c
> > > @@ -6,7 +6,7 @@
> > >   typedef __INT8_TYPE__  int8_t;
> > >   typedef __INT32_TYPE__ int32_t;
> > > -extern void* memchr (const void*, int, long);
> > > +extern void* memchr (const void*, int, long); /* { dg-warning 
> > > "-Wbuiltin-declaration-mismatch" { target llp64 } } */
> > >   struct SX
> > >   {
> > 
> > OK, thanks.
> 
> Thanks, pushed to master branch.

The newly added dg-warning directive was missing the comment argument,
so the target selector was treated as comment and the warning was expected
on all targets when it should be expected only on llp64 targets.

Tested on x86_64-linux -m32/-m64, committed to trunk as obvious:

2023-03-03  Jakub Jelinek  

PR testsuite/108991
* gcc.dg/memchr-3.c: Add missing comment argument to dg-warning
before target selector.

--- gcc/testsuite/gcc.dg/memchr-3.c.jj  2023-03-02 19:09:45.466594110 +0100
+++ gcc/testsuite/gcc.dg/memchr-3.c 2023-03-03 00:48:04.368348282 +0100
@@ -6,7 +6,7 @@
 typedef __INT8_TYPE__  int8_t;
 typedef __INT32_TYPE__ int32_t;
 
-extern void* memchr (const void*, int, long); /* { dg-warning 
"-Wbuiltin-declaration-mismatch" { target llp64 } } */
+extern void* memchr (const void*, int, long); /* { dg-warning 
"-Wbuiltin-declaration-mismatch" "" { target llp64 } } */
 
 struct SX
 {


Jakub



[PATCH v2] libcpp: Handle extended characters in user-defined literal suffix [PR103902]

2023-03-02 Thread Lewis Hyatt via Gcc-patches
The PR complains that we do not handle UTF-8 in the suffix for a user-defined
literal, such as:

bool operator ""_π (unsigned long long);

In fact we don't handle any extended identifier characters there, whether
UTF-8, UCNs, or the $ sign. We do handle it fine if the optional space after
the "" tokens is included, since then the identifier is lexed in the "normal"
way as its own token. But when it is lexed as part of the string token, this
is handled in lex_string() with a one-off loop that is not aware of extended
characters.

This patch fixes it by adding a new function scan_cur_identifier() that can be
used to lex an identifier while in the middle of lexing another token.

BTW, the other place that has been mis-lexing identifiers is
lex_identifier_intern(), which is used to implement #pragma push_macro
and #pragma pop_macro. This does not support extended characters either.
I will add that in a subsequent patch, because it can't directly reuse the
new function, but rather needs to lex from a string instead of a cpp_buffer.

With scan_cur_identifier(), we do also correctly warn about bidi and
normalization issues in the extended identifiers comprising the suffix.

libcpp/ChangeLog:

PR preprocessor/103902
* lex.cc (identifier_diagnostics_on_lex): New function refactoring
some common code.
(lex_identifier_intern): Use the new function.
(lex_identifier): Don't run identifier diagnostics here, rather let
the call site do it when needed.
(_cpp_lex_direct): Adjust the call sites of lex_identifier ()
acccordingly.
(struct scan_id_result): New struct.
(scan_cur_identifier): New function.
(create_literal2): New function.
(lit_accum::create_literal2): New function.
(is_macro): Folded into new function...
(maybe_ignore_udl_macro_suffix): ...here.
(is_macro_not_literal_suffix): Folded likewise.
(lex_raw_string): Handle UTF-8 in UDL suffix via scan_cur_identifier ().
(lex_string): Likewise.

gcc/testsuite/ChangeLog:

PR preprocessor/103902
* g++.dg/cpp0x/udlit-extended-id-1.C: New test.
* g++.dg/cpp0x/udlit-extended-id-2.C: New test.
* g++.dg/cpp0x/udlit-extended-id-3.C: New test.
* g++.dg/cpp0x/udlit-extended-id-4.C: New test.
---

Notes:
Hello-

This is the updated version of the patch, incorporating feedback from Jakub
and Jason, most recently discussed here:

https://gcc.gnu.org/pipermail/gcc-patches/2023-February/612073.html

Please let me know how it looks? It is simpler than before with the new
approach. Thanks!

One thing to note. As Jason clarified for me, a usage like this:

 #pragma GCC poison _x
const char * operator "" _x (const char *, unsigned long);

The space between the "" and the _x is currently allowed but will be
deprecated in C++23. GCC currently will complain about the poisoned use of
_x in this case, and this patch, which is just focused on handling UTF-8
properly, does not change this. But it seems that it would be correct
not to apply poison in this case. I can try to follow up with a patch to do
so, if it seems worthwhile? Given the syntax is deprecated, maybe it's not
worth it...

For the time being, this patch does add a testcase for the above and xfails
it. For the case where no space is present, which is the part touched by the
present patch, existing behavior is preserved correctly and no diagnostics
such as poison are issued for the UDL suffix. (Contrary to v1 of this
patch.)

Thanks! bootstrap + regtested all languages on x86-64 Linux with
no regressions.

-Lewis

 .../g++.dg/cpp0x/udlit-extended-id-1.C|  68 
 .../g++.dg/cpp0x/udlit-extended-id-2.C|   6 +
 .../g++.dg/cpp0x/udlit-extended-id-3.C|  15 +
 .../g++.dg/cpp0x/udlit-extended-id-4.C|  14 +
 libcpp/lex.cc | 382 ++
 5 files changed, 317 insertions(+), 168 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-4.C

diff --git a/gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-1.C 
b/gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-1.C
new file mode 100644
index 000..411d4fdd0ba
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/udlit-extended-id-1.C
@@ -0,0 +1,68 @@
+// { dg-do run { target c++11 } }
+// { dg-additional-options "-Wno-error=normalized" }
+#include 
+using namespace std;
+
+constexpr unsigned long long operator "" _π (unsigned long long x)
+{
+  return 3 * x;
+}
+
+/* Historically we didn't parse properly as part of the "" token, so check that
+   as well.  */
+constexpr unsigned long long operator ""_Π2 (

Re: [wwwdocs] gcc-13/porting_to.html: Document C++ -fexcess-precision=standard

2023-03-02 Thread Jakub Jelinek via Gcc-patches
On Fri, Mar 03, 2023 at 12:05:09AM +0100, Gerald Pfeifer wrote:
> On Thu, 2 Mar 2023, Jakub Jelinek wrote:
> > +
> > +#include 
> 
> Oops, in HTML we need to spell "<" as "<" and ">" as "> - otherwise
> the above would be seen as a tag by the name of stdlib.h. ;-)
> 
> I pushed the follow-up patch below.

Oops, missed that.  Thanks.

Jakub



Re: [wwwdocs] gcc-13/porting_to.html: Document C++ -fexcess-precision=standard

2023-03-02 Thread Gerald Pfeifer
On Thu, 2 Mar 2023, Jakub Jelinek wrote:
> +
> +#include 

Oops, in HTML we need to spell "<" as "<" and ">" as "> - otherwise
the above would be seen as a tag by the name of stdlib.h. ;-)

I pushed the follow-up patch below.

Gerald


commit 935fcdebfb2fb4dcd89edb51ebed5f1be0fb41e5
Author: Gerald Pfeifer 
Date:   Thu Mar 2 23:58:29 2023 +0100

gcc-13: Use < and > where necessary

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index f0ccef69..953e1453 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -129,7 +129,7 @@ constants and expressions.  E.g. for 
FLT_EVAL_METHOD equal
 to 2 on ia32:
 
 
-#include 
+#include 
 void foo (void) { if (1.1f + 3.3f != 1.1L + 3.3L) abort (); }
 void bar (void) { double d = 4.2; if (d == 4.2) abort (); }
 


[r13-6414 Regression] FAIL: gcc.dg/memchr-3.c target llp64 (test for warnings, line 9) on Linux/x86_64

2023-03-02 Thread haochen.jiang via Gcc-patches
On Linux/x86_64,

62a8d31ecc07041af4a81353c2d57d9845c4b771 is the first bad commit
commit 62a8d31ecc07041af4a81353c2d57d9845c4b771
Author: Jonathan Yong <10wa...@gmail.com>
Date:   Mon Feb 27 10:02:32 2023 +

gcc.dg/memchr-3.c: Account for LLP64 warnings

caused

FAIL: gcc.dg/memchr-3.c  target llp64  (test for warnings, line 9)

with GCC configured with

../../gcc/configure 
--prefix=/export/users/haochenj/src/gcc-bisect/master/master/r13-6414/usr 
--enable-clocale=gnu --with-system-zlib --with-demangler-in-ld 
--with-fpmath=sse --enable-languages=c,c++,fortran --enable-cet --without-isl 
--enable-libmpx x86_64-linux --disable-bootstrap

To reproduce:

$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/memchr-3.c 
--target_board='unix{-m32}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/memchr-3.c 
--target_board='unix{-m32\ -march=cascadelake}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/memchr-3.c 
--target_board='unix{-m64}'"
$ cd {build_dir}/gcc && make check RUNTESTFLAGS="dg.exp=gcc.dg/memchr-3.c 
--target_board='unix{-m64\ -march=cascadelake}'"

(Please do not reply to this email, for question about this report, contact me 
at haochen dot jiang at intel.com)


Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread David Malcolm via Gcc-patches
On Thu, 2023-03-02 at 23:35 +0100, Guillaume Gomez wrote:
> I don't have push rights so if you could push it, it'd be super
> appreciated!

Done, as r13-6425-g6b432c0f777ab9; I took the liberty of slightly
tweaking the subject line to add a "jit, testsuite: " prefix.

Thanks again for the patch
Dave



Ping: [PATCH 2/2] Rework 128-bit complex multiply and divide.

2023-03-02 Thread Michael Meissner via Gcc-patches
This patch is second in importance after the first patch in the series.  It is
needed to allow complex IBM 128-bit multiply/divide when long double is IEEE
128-bit.

| Date: Fri, 3 Feb 2023 00:53:05 -0500
| From: Michael Meissner 
| Subject: [PATCH 2/2] Rework 128-bit complex multiply and divide.
| Message-ID: 

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Ping: [PATCH 1/2] PR target/107299: Fix build issue when long double is IEEE 128-bit

2023-03-02 Thread Michael Meissner via Gcc-patches
This is the most important patch.  It is needed to allow the boostrap to work
again when long double is IEEE 128-bit.

| Date: Fri, 3 Feb 2023 00:49:12 -0500
| From: Michael Meissner 
| Subject: [PATCH 1/2] PR target/107299: Fix build issue when long double is 
IEEE 128-bit
| Message-ID: 

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread Guillaume Gomez via Gcc-patches
I don't have push rights so if you could push it, it'd be super appreciated!

Le jeu. 2 mars 2023 à 23:33, David Malcolm  a écrit :

> On Thu, 2023-03-02 at 23:29 +0100, Guillaume Gomez wrote:
> > Just realized I used whitespace and not a tab. Sorry about that.
> > Here's the
> > fixed version...
>
> Looks great.  Do you have push rights, or do you want me to push this?
>
> Thanks
> Dave
>
> >
> > Le jeu. 2 mars 2023 à 23:19, Guillaume Gomez
> >  a
> > écrit :
> >
> > > No problem, thanks for the explanations.
> > >
> > > I joined the patch with the fixed commit message.
> > >
> > > Le jeu. 2 mars 2023 à 22:58, David Malcolm  a
> > > écrit :
> > >
> > > > On Thu, 2022-12-15 at 08:34 +0100, Guillaume Gomez via Jit wrote:
> > > > > Forgot it indeed, thanks for notifying me!
> > > > >
> > > > > I modified the commit message to add it and added it into this
> > > > > email.
> > > >
> > > > Sorry about the delay in reviewing this; for some reason I didn't
> > > > see
> > > > the mail.
> > > >
> > > > The patch looks good for trunk, but please add a reference to
> > > >   PR jit/107999
> > > > to the subject line and ChangeLog message.
> > > >
> > > > Dave
> > > >
> > > > >
> > > > > Le mer. 14 déc. 2022 à 16:12, Antoni Boucher 
> > > > > a
> > > > > écrit :
> > > > >
> > > > > > Thanks!
> > > > > >
> > > > > > In your patch, you're missing this line at the end of the
> > > > > > commit
> > > > > > message:
> > > > > >
> > > > > >Signed-off-by: Guillaume Gomez
> > > > > > 
> > > > > >
> > > > > > On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit
> > > > > > wrote:
> > > > > > > Hi,
> > > > > > >
> > > > > > > This fixes bug 107999.
> > > > > > >
> > > > > > > Thanks in advance for the review.
> > > > > >
> > > > > >
> > > >
> > > >
>
>


Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread David Malcolm via Gcc-patches
On Thu, 2023-03-02 at 23:29 +0100, Guillaume Gomez wrote:
> Just realized I used whitespace and not a tab. Sorry about that.
> Here's the
> fixed version...

Looks great.  Do you have push rights, or do you want me to push this?

Thanks
Dave

> 
> Le jeu. 2 mars 2023 à 23:19, Guillaume Gomez
>  a
> écrit :
> 
> > No problem, thanks for the explanations.
> > 
> > I joined the patch with the fixed commit message.
> > 
> > Le jeu. 2 mars 2023 à 22:58, David Malcolm  a
> > écrit :
> > 
> > > On Thu, 2022-12-15 at 08:34 +0100, Guillaume Gomez via Jit wrote:
> > > > Forgot it indeed, thanks for notifying me!
> > > > 
> > > > I modified the commit message to add it and added it into this
> > > > email.
> > > 
> > > Sorry about the delay in reviewing this; for some reason I didn't
> > > see
> > > the mail.
> > > 
> > > The patch looks good for trunk, but please add a reference to
> > >   PR jit/107999
> > > to the subject line and ChangeLog message.
> > > 
> > > Dave
> > > 
> > > > 
> > > > Le mer. 14 déc. 2022 à 16:12, Antoni Boucher 
> > > > a
> > > > écrit :
> > > > 
> > > > > Thanks!
> > > > > 
> > > > > In your patch, you're missing this line at the end of the
> > > > > commit
> > > > > message:
> > > > > 
> > > > >    Signed-off-by: Guillaume Gomez
> > > > > 
> > > > > 
> > > > > On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit
> > > > > wrote:
> > > > > > Hi,
> > > > > > 
> > > > > > This fixes bug 107999.
> > > > > > 
> > > > > > Thanks in advance for the review.
> > > > > 
> > > > > 
> > > 
> > > 



Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread Guillaume Gomez via Gcc-patches
Just realized I used whitespace and not a tab. Sorry about that. Here's the
fixed version...

Le jeu. 2 mars 2023 à 23:19, Guillaume Gomez  a
écrit :

> No problem, thanks for the explanations.
>
> I joined the patch with the fixed commit message.
>
> Le jeu. 2 mars 2023 à 22:58, David Malcolm  a écrit :
>
>> On Thu, 2022-12-15 at 08:34 +0100, Guillaume Gomez via Jit wrote:
>> > Forgot it indeed, thanks for notifying me!
>> >
>> > I modified the commit message to add it and added it into this email.
>>
>> Sorry about the delay in reviewing this; for some reason I didn't see
>> the mail.
>>
>> The patch looks good for trunk, but please add a reference to
>>   PR jit/107999
>> to the subject line and ChangeLog message.
>>
>> Dave
>>
>> >
>> > Le mer. 14 déc. 2022 à 16:12, Antoni Boucher  a
>> > écrit :
>> >
>> > > Thanks!
>> > >
>> > > In your patch, you're missing this line at the end of the commit
>> > > message:
>> > >
>> > >Signed-off-by: Guillaume Gomez 
>> > >
>> > > On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit wrote:
>> > > > Hi,
>> > > >
>> > > > This fixes bug 107999.
>> > > >
>> > > > Thanks in advance for the review.
>> > >
>> > >
>>
>>
From 0835c7ba8bdf4090c7fb102206e70c1ed235808e Mon Sep 17 00:00:00 2001
From: Guillaume Gomez 
Date: Wed, 14 Dec 2022 14:28:22 +0100
Subject: [PATCH] [PATCH] Fix a failing test by updating its error string
 [PR107999]

gcc/testsuite/ChangeLog:
	PR jit/107999
	* jit.dg/test-error-array-bounds.c: Update test.

Signed-off-by: Guillaume Gomez 
---
 gcc/testsuite/jit.dg/test-error-array-bounds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/jit.dg/test-error-array-bounds.c b/gcc/testsuite/jit.dg/test-error-array-bounds.c
index b6c0ee526d4..a0dead13cb7 100644
--- a/gcc/testsuite/jit.dg/test-error-array-bounds.c
+++ b/gcc/testsuite/jit.dg/test-error-array-bounds.c
@@ -70,5 +70,5 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
   /* ...and that the message was captured by the API.  */
   CHECK_STRING_VALUE (gcc_jit_context_get_first_error (ctxt),
 		  "array subscript 10 is above array bounds of"
-		  " 'char[10]' [-Warray-bounds]");
+		  " 'char[10]' [-Warray-bounds=]");
 }
-- 
2.34.1



[pushed][PR90706] IRA: Use minimal cost for hard register movement

2023-03-02 Thread Vladimir Makarov via Gcc-patches

The following patch is for

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90706

The patch was successfully bootstrapped and tested on i686, x86-64, 
aarch64, ppc64le.


commit 23661e39df76e07fb4ce1ea015379c7601d947ef
Author: Vladimir N. Makarov 
Date:   Thu Mar 2 16:29:05 2023 -0500

IRA: Use minimal cost for hard register movement

This is the 2nd attempt to fix PR90706.  IRA calculates wrong AVR
costs for moving general hard regs of SFmode.  This was the reason for
spilling a pseudo in the PR.  In this patch we use smaller move cost
of hard reg in its natural and operand modes.

PR rtl-optimization/90706

gcc/ChangeLog:

* ira-costs.cc: Include print-rtl.h.
(record_reg_classes, scan_one_insn): Add code to print debug info.
(record_operand_costs): Find and use smaller cost for hard reg
move.

gcc/testsuite/ChangeLog:

* gcc.target/avr/pr90706.c: New.

diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
index 4c28171f27d..c0fdef807dd 100644
--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "ira-int.h"
 #include "addresses.h"
 #include "reload.h"
+#include "print-rtl.h"
 
 /* The flags is set up every time when we calculate pseudo register
classes through function ira_set_pseudo_classes.  */
@@ -503,6 +504,18 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
   int insn_allows_mem[MAX_RECOG_OPERANDS];
   move_table *move_in_cost, *move_out_cost;
   short (*mem_cost)[2];
+  const char *p;
+
+  if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
+{
+  fprintf (ira_dump_file, "Processing insn %u", INSN_UID (insn));
+  if (INSN_CODE (insn) >= 0
+	  && (p = get_insn_name (INSN_CODE (insn))) != NULL)
+	fprintf (ira_dump_file, " {%s}", p);
+  fprintf (ira_dump_file, " (freq=%d)\n",
+	   REG_FREQ_FROM_BB (BLOCK_FOR_INSN (insn)));
+  dump_insn_slim (ira_dump_file, insn);
+  }
 
   for (i = 0; i < n_ops; i++)
 insn_allows_mem[i] = 0;
@@ -526,6 +539,21 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
 	  continue;
 	}
 
+  if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
+	{
+	  fprintf (ira_dump_file, "  Alt %d:", alt);
+	  for (i = 0; i < n_ops; i++)
+	{
+	  p = constraints[i];
+	  if (*p == '\0')
+		continue;
+	  fprintf (ira_dump_file, "  (%d) ", i);
+	  for (; *p != '\0' && *p != ',' && *p != '#'; p++)
+		fputc (*p, ira_dump_file);
+	}
+	  fprintf (ira_dump_file, "\n");
+	}
+
   for (i = 0; i < n_ops; i++)
 	{
 	  unsigned char c;
@@ -593,12 +621,16 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
 		 register, this alternative can't be used.  */
 
 		  if (classes[j] == NO_REGS)
-		alt_fail = 1;
-		  /* Otherwise, add to the cost of this alternative
-		 the cost to copy the other operand to the hard
-		 register used for this operand.  */
+		{
+		  alt_fail = 1;
+		}
 		  else
-		alt_cost += copy_cost (ops[j], mode, classes[j], 1, NULL);
+		/* Otherwise, add to the cost of this alternative the cost
+		   to copy the other operand to the hard register used for
+		   this operand.  */
+		{
+		  alt_cost += copy_cost (ops[j], mode, classes[j], 1, NULL);
+		}
 		}
 	  else
 		{
@@ -1021,18 +1053,45 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
   for (i = 0; i < n_ops; i++)
 	if (REG_P (ops[i]) && REGNO (ops[i]) >= FIRST_PSEUDO_REGISTER)
 	  {
+	int old_cost;
+	bool cost_change_p = false;
 	struct costs *pp = op_costs[i], *qq = this_op_costs[i];
 	int *pp_costs = pp->cost, *qq_costs = qq->cost;
 	int scale = 1 + (recog_data.operand_type[i] == OP_INOUT);
 	cost_classes_t cost_classes_ptr
 	  = regno_cost_classes[REGNO (ops[i])];
 
-	pp->mem_cost = MIN (pp->mem_cost,
+	old_cost = pp->mem_cost;
+	pp->mem_cost = MIN (old_cost,
 (qq->mem_cost + op_cost_add) * scale);
 
+	if (ira_dump_file != NULL && internal_flag_ira_verbose > 5
+		&& pp->mem_cost < old_cost)
+	  {
+		cost_change_p = true;
+		fprintf (ira_dump_file, "op %d(r=%u) new costs MEM:%d",
+			 i, REGNO(ops[i]), pp->mem_cost);
+	  }
 	for (k = cost_classes_ptr->num - 1; k >= 0; k--)
-	  pp_costs[k]
-		= MIN (pp_costs[k], (qq_costs[k] + op_cost_add) * scale);
+	  {
+		old_cost = pp_costs[k];
+		pp_costs[k]
+		  = MIN (old_cost, (qq_costs[k] + op_cost_add) * scale);
+		if (ira_dump_file != NULL && internal_flag_ira_verbose > 5
+		&& pp_costs[k] < old_cost)
+		  {
+		if (!cost_change_p)
+		  fprintf (ira_dump_file, "op %d(r=%u) new costs",
+			   i, REGNO(ops[i]));
+		cost_change_p = true;
+		fprintf (ira_dump_file, " %s:%d",
+			 reg_class_names[cost_classes_ptr->classes[k]],
+			 pp_costs[k]);
+		  }
+	  }
+	if (ira_dump_file != NULL && internal_flag_ira_verbose > 5
+

Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread Guillaume Gomez via Gcc-patches
No problem, thanks for the explanations.

I joined the patch with the fixed commit message.

Le jeu. 2 mars 2023 à 22:58, David Malcolm  a écrit :

> On Thu, 2022-12-15 at 08:34 +0100, Guillaume Gomez via Jit wrote:
> > Forgot it indeed, thanks for notifying me!
> >
> > I modified the commit message to add it and added it into this email.
>
> Sorry about the delay in reviewing this; for some reason I didn't see
> the mail.
>
> The patch looks good for trunk, but please add a reference to
>   PR jit/107999
> to the subject line and ChangeLog message.
>
> Dave
>
> >
> > Le mer. 14 déc. 2022 à 16:12, Antoni Boucher  a
> > écrit :
> >
> > > Thanks!
> > >
> > > In your patch, you're missing this line at the end of the commit
> > > message:
> > >
> > >Signed-off-by: Guillaume Gomez 
> > >
> > > On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit wrote:
> > > > Hi,
> > > >
> > > > This fixes bug 107999.
> > > >
> > > > Thanks in advance for the review.
> > >
> > >
>
>
From 985228a76feecf16658b95a012e0b531e7e5c750 Mon Sep 17 00:00:00 2001
From: Guillaume Gomez 
Date: Wed, 14 Dec 2022 14:28:22 +0100
Subject: [PATCH] [PATCH] Fix a failing test by updating its error string
 [PR107999]

gcc/testsuite/ChangeLog:
PR jit/107999
	* jit.dg/test-error-array-bounds.c: Update test.

Signed-off-by: Guillaume Gomez 
---
 gcc/testsuite/jit.dg/test-error-array-bounds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/jit.dg/test-error-array-bounds.c b/gcc/testsuite/jit.dg/test-error-array-bounds.c
index b6c0ee526d4..a0dead13cb7 100644
--- a/gcc/testsuite/jit.dg/test-error-array-bounds.c
+++ b/gcc/testsuite/jit.dg/test-error-array-bounds.c
@@ -70,5 +70,5 @@ verify_code (gcc_jit_context *ctxt, gcc_jit_result *result)
   /* ...and that the message was captured by the API.  */
   CHECK_STRING_VALUE (gcc_jit_context_get_first_error (ctxt),
 		  "array subscript 10 is above array bounds of"
-		  " 'char[10]' [-Warray-bounds]");
+		  " 'char[10]' [-Warray-bounds=]");
 }
-- 
2.34.1



[PATCH] Fortran: fix CLASS attribute handling [PR106856]

2023-03-02 Thread Harald Anlauf via Gcc-patches
Dear all,

the attached patch fixes a long-standing issue with CLASS attributes
when a declaration is scattered over multiple statements.

The major part ("draft") of the patch is by Tobias, which I took up
before it started to bit-rot too much, see PR.  It is mainly about
a proper updating and book-keeping of symbol attributes.

While debugging the draft patch, I fixed a few disturbing memleaks
in class.cc that showed up when looking at intermediate fallout.

This patch also addresses issues reported in a few other PRs:
pr53951, pr101101, pr104229, pr107380.  These are mostly
duplicates at some level.

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From 4600577e3ecceb2525618685f47c8a979cf9d244 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Thu, 2 Mar 2023 22:37:14 +0100
Subject: [PATCH] Fortran: fix CLASS attribute handling [PR106856]

gcc/fortran/ChangeLog:

	PR fortran/106856
	* class.cc (gfc_build_class_symbol): Handle update of attributes of
	existing class container.
	(gfc_find_derived_vtab): Fix several memory leaks.
	* decl.cc (attr_decl1): Manage update of symbol attributes from
	CLASS attributes.
	* primary.cc (gfc_variable_attr): OPTIONAL shall not be taken or
	updated from the class container.

gcc/testsuite/ChangeLog:

	PR fortran/106856
	* gfortran.dg/interface_41.f90: Remove dg-pattern from valid testcase.
	* gfortran.dg/class_74.f90: New test.
	* gfortran.dg/class_75.f90: New test.

Co-authored-by: Tobias Burnus  
---
 gcc/fortran/class.cc   | 23 +++--
 gcc/fortran/decl.cc| 59 +++---
 gcc/fortran/primary.cc |  1 -
 gcc/testsuite/gfortran.dg/class_74.f90 | 41 +++
 gcc/testsuite/gfortran.dg/class_75.f90 | 24 +
 gcc/testsuite/gfortran.dg/interface_41.f90 |  2 +-
 6 files changed, 115 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/class_74.f90
 create mode 100644 gcc/testsuite/gfortran.dg/class_75.f90

diff --git a/gcc/fortran/class.cc b/gcc/fortran/class.cc
index ae653e74437..2eebdd4a3bb 100644
--- a/gcc/fortran/class.cc
+++ b/gcc/fortran/class.cc
@@ -638,6 +638,7 @@ gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,
 {
   char tname[GFC_MAX_SYMBOL_LEN+1];
   char *name;
+  gfc_typespec *orig_ts = ts;
   gfc_symbol *fclass;
   gfc_symbol *vtab;
   gfc_component *c;
@@ -646,9 +647,21 @@ gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,

   gcc_assert (as);

-  if (attr->class_ok)
-/* Class container has already been built.  */
+  /* Class container has already been built with same name.  */
+  if (attr->class_ok
+  && ts->u.derived->components->attr.dimension >= attr->dimension
+  && ts->u.derived->components->attr.codimension >= attr->codimension
+  && ts->u.derived->components->attr.class_pointer >= attr->pointer
+  && ts->u.derived->components->attr.allocatable >= attr->allocatable)
 return true;
+  if (attr->class_ok)
+{
+  attr->dimension |= ts->u.derived->components->attr.dimension;
+  attr->codimension |= ts->u.derived->components->attr.codimension;
+  attr->pointer |= ts->u.derived->components->attr.class_pointer;
+  attr->allocatable |= ts->u.derived->components->attr.allocatable;
+  ts = &ts->u.derived->components->ts;
+}

   attr->class_ok = attr->dummy || attr->pointer || attr->allocatable
 		   || attr->select_type_temporary || attr->associate_var;
@@ -790,7 +803,7 @@ gfc_build_class_symbol (gfc_typespec *ts, symbol_attribute *attr,
 }

   fclass->attr.is_class = 1;
-  ts->u.derived = fclass;
+  orig_ts->u.derived = fclass;
   attr->allocatable = attr->pointer = attr->dimension = attr->codimension = 0;
   (*as) = NULL;
   free (name);
@@ -2344,6 +2357,7 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 	  vtab->attr.vtab = 1;
 	  vtab->attr.access = ACCESS_PUBLIC;
 	  gfc_set_sym_referenced (vtab);
+	  free (name);
 	  name = xasprintf ("__vtype_%s", tname);

 	  gfc_find_symbol (name, ns, 0, &vtype);
@@ -2447,6 +2461,7 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 	  else
 		{
 		  /* Construct default initialization variable.  */
+		  free (name);
 		  name = xasprintf ("__def_init_%s", tname);
 		  gfc_get_symbol (name, ns, &def_init);
 		  def_init->attr.target = 1;
@@ -2480,6 +2495,7 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 		  ns->contained = sub_ns;
 		  sub_ns->resolved = 1;
 		  /* Set up procedure symbol.  */
+		  free (name);
 		  name = xasprintf ("__copy_%s", tname);
 		  gfc_get_symbol (name, sub_ns, ©);
 		  sub_ns->proc_name = copy;
@@ -2558,6 +2574,7 @@ gfc_find_derived_vtab (gfc_symbol *derived)
 		  ns->contained = sub_ns;
 		  sub_ns->resolved = 1;
 		  /* Set up procedure symbol.  */
+		  free (name);
 		  name = xasprintf ("__deallocate_%s", tname);
 		  gfc_get_symbol (name, sub_ns, &dealloc);
 		  sub_ns->proc_name = dealloc;
diff --git a/gcc/fortran/decl.cc b/gcc/fortran/decl.cc
index eec0314cf4

Re: [PATCH] libgccjit: Fix a failing test

2023-03-02 Thread David Malcolm via Gcc-patches
On Thu, 2022-12-15 at 08:34 +0100, Guillaume Gomez via Jit wrote:
> Forgot it indeed, thanks for notifying me!
> 
> I modified the commit message to add it and added it into this email.

Sorry about the delay in reviewing this; for some reason I didn't see
the mail.

The patch looks good for trunk, but please add a reference to
  PR jit/107999
to the subject line and ChangeLog message.

Dave

> 
> Le mer. 14 déc. 2022 à 16:12, Antoni Boucher  a
> écrit :
> 
> > Thanks!
> > 
> > In your patch, you're missing this line at the end of the commit
> > message:
> > 
> >    Signed-off-by: Guillaume Gomez 
> > 
> > On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit wrote:
> > > Hi,
> > > 
> > > This fixes bug 107999.
> > > 
> > > Thanks in advance for the review.
> > 
> > 



Re: [PATCH v4] c++: -Wdangling-reference with reference wrapper [PR107532]

2023-03-02 Thread Marek Polacek via Gcc-patches
On Wed, Mar 01, 2023 at 04:53:23PM -0500, Jason Merrill wrote:
> > @@ -13791,12 +13830,39 @@ std_pair_ref_ref_p (tree t)
> >const int& y = (f(1), 42); // NULL_TREE
> >const int& z = f(f(1)); // f(f(1))
> > -   EXPR is the initializer.  */
> > +   EXPR is the initializer.  If ARG_P is true, we're processing an argument
> > +   to a function; the point is to distinguish between, for example,
> > +
> > + Ref::inner (&TARGET_EXPR )
> > +
> > +   where we shouldn't warn, and
> > +
> > + Ref::inner (&TARGET_EXPR )>)
> > +
> > +   where we should warn (Ref is a reference_like_class_p so we see through
> > +   it.  */
> >   static tree
> > -do_warn_dangling_reference (tree expr)
> > +do_warn_dangling_reference (tree expr, bool arg_p)
> >   {
> > STRIP_NOPS (expr);
> > +  if (TREE_CODE (expr) == ADDR_EXPR)
> > +expr = TREE_OPERAND (expr, 0);
> 
> I think if we move this here, we also need to check that expr before
> STRIP_NOPS had REFERENCE_TYPE.  OK with that change.

Sorry but I don't think I can do that.  There can be CONVERT_EXPRs
that need to be stripped, whether arg_p or !arg_p.  For example, we can get
(const int *) f ((const int &) &TARGET_EXPR >)
for
const int& r5 = (42, f(10));

Is the patch OK as-is then?

Marek



[wwwdocs] Document allocator_traits::rebind_alloc assertion with GCC 13

2023-03-02 Thread Jonathan Wakely via Gcc-patches
Pushed to wwwdocs.

---
 htdocs/gcc-13/porting_to.html | 60 +++
 1 file changed, 60 insertions(+)

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index 5cbeefb6..f0ccef69 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -144,5 +144,65 @@ done in the i387 floating point stack or are spilled from 
it.
 The -fexcess-precision=fast option can be used to request the
 previous behavior.
 
+allocator_traits::rebind_alloc 
must be A
+
+
+GCC 13 now checks that allocators used with the standard library
+can be "rebound" to allocate memory for a different type,
+as required by the allocator requirements in the C++ standard.
+If an allocator type Alloc
+cannot be correctly rebound to another type Alloc,
+you will get an error like this:
+
+
+
+.../bits/alloc_traits.h:70:31: error: static assertion failed: 
allocator_traits::rebind_alloc must be A
+
+
+
+The assertion checks that rebinding an allocator to its own value type is a
+no-op, which will be true if its rebind member is defined correctly.
+If rebinding it to its own value type produces a different type,
+then the allocator cannot be used with the standard library.
+
+
+
+The most common cause of this error is an allocator type 
Alloc
+that derives from std::allocator but does not provide its own
+rebind member. When the standard library attempts to rebind the
+allocator using Alloc::rebind it finds the
+std::allocator::rebind member from the base class,
+and the result is std::allocator instead of
+Alloc.
+
+
+
+The solution is to provide a correct rebind member as shown below.
+A converting constructor must also be provided, so that that an
+Alloc can be constructed from an Alloc,
+and vice versa:
+
+
+template
+class Alloc
+{
+  Alloc();
+  
+  template Alloc(const Alloc);
+
+  template struct rebind { using other = Alloc; };
+  
+  // ...
+};
+
+
+
+Since C++20, there is no rebind member in std::allocator,
+so deriving your own allocator types from std::allocator is simpler
+and doesn't require the derived allocator to provide its own rebind.
+For compatibility with previous C++ standards, the member should still be
+provided. The converting constructor is still required even in C++20.
+
+
 
 
-- 
2.39.2



Re: [PATCH, V3] PR 107299, GCC does not build on PowerPC when long double is IEEE 128-bit

Hi!

On Wed, Dec 14, 2022 at 03:29:02PM -0500, Michael Meissner wrote:
> These 3 patches fix the problems with building GCC on PowerPC systems when 
> long
> double is configured to use the IEEE 128-bit format.

If you are strictly trying to fix a bootstrap problem, you should say
so: it should be very prominent in the proposed commit message, and
that message should be not much more than that.  And the patch should do
nothing else.  *Every* patch should do just one thing, but for fixes it
is even more important (how will we ever get into any stable state if we
always do random stuff?)

> The basic issue is internally within GCC there are several types for 128-bit
> floating point.  The types are:

There also are three *modes*, but there should be only IFmode and
KFmode, and TFmode should be just a #define that resolves to either, not
a separate mode.  This simplifies things a lot.  It should have always
been that way, as we talked about way back when already.  But you didn't
want to implement things that way.  This is on my plate now (for GCC 14).

> 3)  The type for _Float128.  This type is always IEEE 128-bit if it 
> exists.

And if there is no IEEE QP type?  What should _Float128 be then?

Largely academic, because there always *should* be a QP type (and mode).
This is a long-standing shortcoming as well.  I will finally fix that
myself for GCC 14 as well, simplifying many things.

> Like __ibm128, it uses the long double type if
> long double is IEEE 128-bit,

Which is completely upside down, of course.  The basic types should be
basic and always exist.  Things like long double can use some
indirection and/or copy stuff over.

Anything else is just a maze of twisty little passages.  A fun game if
you like that sort of thing, maybe, but instead of solving problems it
causes more :-(

> After these patches, there are 3 specific tests and 1 set of tests that fail
> when using IEEE 128-bit long double:
> 
> 1)  fp128_conversions.c: I haven't looked at yet;

That needs to be looked at before these patches can be approved.

> 2)  pr105334.c: This is a bug that __ibm128 doesn't work if the default
> long double is IEEE 128-bit and you use the options: -mlong-double-128
> -msoft-float (i.e. no -mabi=ibmlongdouble).  I believe I have patches
> for this floating around.

Ditto.

> 3)  The g++.dg/cpp23/ext-floating1.C test is failing.  I believe we need 
> to
> dig in to fix PowerPC specific ISO C/C++ 2x _Float128 support.  I have
> looked at it yet.

Ditto.

> 4)  All/some of the G++ modules tests fail.  This is PR 98645, and it is
> assigned to Nathan Sidwell.

And this one too.

Any new failures need analysis.  Always.  This is why we have regression
tests at all!


Segher


Re: [PATCH] libgccjit: Fix a failing test

Ping David.

Le lun. 16 janv. 2023 à 15:08, Guillaume Gomez 
a écrit :

> Ping David.
>
> Le jeu. 5 janv. 2023 à 23:37, Guillaume Gomez 
> a écrit :
>
>> Ping David.
>>
>> Le sam. 24 déc. 2022 à 21:01, Guillaume Gomez 
>> a écrit :
>>
>>> Ping David
>>>
>>> Le jeu. 15 déc. 2022 à 11:34, Guillaume Gomez <
>>> guillaume1.go...@gmail.com> a écrit :
>>>
 Forgot it indeed, thanks for notifying me!

 I modified the commit message to add it and added it into this email.

 Le mer. 14 déc. 2022 à 16:12, Antoni Boucher  a
 écrit :

> Thanks!
>
> In your patch, you're missing this line at the end of the commit
> message:
>
>Signed-off-by: Guillaume Gomez 
>
> On Wed, 2022-12-14 at 14:39 +0100, Guillaume Gomez via Jit wrote:
> > Hi,
> >
> > This fixes bug 107999.
> >
> > Thanks in advance for the review.
>
>


[PATCH] driver: Treat include path args the same way between cpp_unique_options and asm_options. [PR71850]

This is a proposal to fix PR71850 by applying the existing logic for
passing include paths to cc1 to as.

Thanks,
Costas
From 393aff0d006ee9372cc8b9321c612c2dfb4b0a31 Mon Sep 17 00:00:00 2001
From: Costas Argyris 
Date: Thu, 2 Mar 2023 18:27:22 +
Subject: [PATCH] driver: Treat include path args the same way between
 cpp_unique_options and asm_options. [PR71850]

On Windows, when a @file with many include paths is passed to gcc, it forwards those include paths to cc1 through a temporary @file as well, so they don't end up in the command line.This is because cpp_unique_options has %@{I* which passes -I args in a temporary file, if a temporary file was passed to the driver in the first place.

The same logic is not applied in asm_options, and this leads to the include paths being passed as command line arguments to the assembler, which causes the failure on Windows seen in PR71850.

Treating the -I args to the assembler the same way as to the compiler (that is, through a @tempfile if @file was passed to gcc) solves the issue, allowing a large number of include paths to be passed to gcc on Windows through a @file.
---
 gcc/gcc.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/gcc.cc b/gcc/gcc.cc
index becc56051a8..b1fa80cde4f 100644
--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
@@ -1278,7 +1278,7 @@ static const char *asm_options =
 #if HAVE_GNU_AS
 /* If GNU AS is used, then convert -w (no warnings), -I, and -v
to the assembler equivalents.  */
-"%{v} %{w:-W} %{I*} "
+"%{v} %{w:-W} %@{I*} "
 #endif
 "%(asm_debug_option)"
 ASM_COMPRESS_DEBUG_SPEC
-- 
2.30.2



[pushed] analyzer: fix uninit false +ves reading from DECL_HARD_REGISTER [PR108968]

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to trunk as r13-6420-g20bd258d0fa098.

gcc/analyzer/ChangeLog:
PR analyzer/108968
* region-model.cc (region_model::get_rvalue_1): Handle VAR_DECLs
with a DECL_HARD_REGISTER by returning UNKNOWN.

gcc/testsuite/ChangeLog:
PR analyzer/108968
* gcc.dg/analyzer/uninit-pr108968-register.c: New test.

Signed-off-by: David Malcolm 
---
 gcc/analyzer/region-model.cc | 9 -
 gcc/testsuite/gcc.dg/analyzer/uninit-pr108968-register.c | 9 +
 2 files changed, 17 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/uninit-pr108968-register.c

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 2187aecbe91..bf07cec2884 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -2203,9 +2203,16 @@ region_model::get_rvalue_1 (path_var pv, 
region_model_context *ctxt) const
return get_rvalue_for_bits (TREE_TYPE (expr), reg, bits, ctxt);
   }
 
-case SSA_NAME:
 case VAR_DECL:
+  if (DECL_HARD_REGISTER (pv.m_tree))
+   {
+ /* If it has a hard register, it doesn't have a memory region
+and can't be referred to as an lvalue.  */
+ return m_mgr->get_or_create_unknown_svalue (TREE_TYPE (pv.m_tree));
+   }
+  /* Fall through. */
 case PARM_DECL:
+case SSA_NAME:
 case RESULT_DECL:
 case ARRAY_REF:
   {
diff --git a/gcc/testsuite/gcc.dg/analyzer/uninit-pr108968-register.c 
b/gcc/testsuite/gcc.dg/analyzer/uninit-pr108968-register.c
new file mode 100644
index 000..a76c09e7b14
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/analyzer/uninit-pr108968-register.c
@@ -0,0 +1,9 @@
+/* { dg-do compile { target x86_64-*-* } } */
+
+#define STACK_SIZE 4096
+struct cpu_info {};
+struct cpu_info *get_cpu_info(void)
+{
+  register unsigned long sp asm("rsp");
+  return (struct cpu_info *)((sp | (STACK_SIZE - 1)) + 1) - 1; /* { dg-bogus 
"use of uninitialized value 'sp'" } */
+}
-- 
2.26.3



[PATCH] testsuite: Do not expect partial vectorization for s390.

Hi,

this patch changes SLP test expectations.  As we only vectorize when no
more than one rgroup is present, no vectorization is performed.

I was also considering using a separate target selector (something like
vect_partial_vectors_bias_m1) but as the number of testcases is limited
that would probably not simplify things much for now.

Is this OK?

Regards
 Robin

--

gcc/testsuite/ChangeLog:

* gcc.dg/vect/slp-3.c: Adapt test expectation.
* gcc.dg/vect/slp-multitypes-11.c: Likewise.
* gcc.dg/vect/slp-perm-8.c: Likewise.
---
 gcc/testsuite/gcc.dg/vect/slp-3.c | 8 
 gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c | 4 ++--
 gcc/testsuite/gcc.dg/vect/slp-perm-8.c| 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/vect/slp-3.c 
b/gcc/testsuite/gcc.dg/vect/slp-3.c
index 80ded1840ad2..4b9a58662305 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-3.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-3.c
@@ -141,8 +141,8 @@ int main (void)
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { 
! { vect_partial_vectors || vect32 } } } } } */
-/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { 
vect_partial_vectors || vect32 } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { 
target { ! { vect_partial_vectors || vect32 } } } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { 
target { vect_partial_vectors || vect32 } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { target { 
! { vect_partial_vectors || vect32 } || s390_vx } } } } */
+/* { dg-final { scan-tree-dump-times "vectorized 4 loops" 1 "vect" { target { 
vect_partial_vectors || vect32 } && { ! s390_vx } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 3 "vect" { 
target { ! { vect_partial_vectors || vect32 } || s390_vx } } } }*/
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "vect" { 
target { vect_partial_vectors || vect32 } && { ! s390_vx } } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c 
b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
index 96218861cd61..dbf93ee6bb37 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-multitypes-11.c
@@ -50,6 +50,6 @@ int main (void)
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  { target 
vect_unpack } } } */
 /* The epilogues are vectorized using partial vectors.  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { 
target { vect_unpack && {! vect_partial_vectors_usage_1 } } xfail { 
vect_variable_length && vect_load_lanes } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect"  { 
target { vect_unpack && vect_partial_vectors_usage_1 } xfail { 
vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect"  { 
target { vect_unpack && { {! vect_partial_vectors_usage_1 } || s390_vx } } 
xfail { vect_variable_length && vect_load_lanes } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect"  { 
target { vect_unpack && vect_partial_vectors_usage_1 && { ! s390_vx } } xfail { 
vect_variable_length && vect_load_lanes } } } } */
   
diff --git a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c 
b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
index 9e59832fb8b7..4023670d4459 100644
--- a/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
+++ b/gcc/testsuite/gcc.dg/vect/slp-perm-8.c
@@ -60,9 +60,9 @@ int main (int argc, const char* argv[])
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_perm_byte } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { vect_perm3_byte && { { ! vect_load_lanes } && {! 
vect_partial_vectors_usage_1 } } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { vect_perm3_byte && { { ! vect_load_lanes } && { {! 
vect_partial_vectors_usage_1 } || s390_vx } } } } } } */
 /* The epilogues are vectorized using partial vectors.  */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
target { vect_perm3_byte && { { ! vect_load_lanes } && 
vect_partial_vectors_usage_1 } } } } } */
+/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 2 "vect" { 
target { vect_perm3_byte && { { ! vect_load_lanes } && 
vect_partial_vectors_usage_1 && { ! s390_vx } } } } } } */
 /* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { 
target vect_load_lanes } } } */
 /* { dg-final { scan-tree-dump "Built SLP cancelled: can use load/store-lanes" 
"vect" { target { vect_perm3_byte && vect_load_lanes } } } } */
 /* { dg-final { scan-tree-dump "LOAD_LANES" "vect" { target vect_load_lanes } 
} } */
-- 
2.39.1


[PATCH] s390: Use arch14 instead of z16 for -march=native.

Hi,

When compiling on a system where binutils do not yet support the 'z16'
name assembling fails with -march=native which we currently interpret
as -march=z16 (on a z16 machine).  This patch uses -march=arch14
instead.

Is it OK?

Regards
 Robin

--

gcc/ChangeLog:

* config/s390/driver-native.cc (s390_host_detect_local_cpu): Use
arch14 instead of z16.
---
 gcc/config/s390/driver-native.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 563da45c7f6e..3b9c1e1ca5df 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -125,10 +125,10 @@ s390_host_detect_local_cpu (int argc, const char **argv)
  break;
case 0x3931:
case 0x3932:
- cpu = "z16";
+ cpu = "arch14";
  break;
default:
- cpu = "z16";
+ cpu = "arch14";
  break;
}
}
-- 
2.39.1



[PATCH] s390: Fix ifcvt test cases

Hi,

we seem to flip flop between the "high" and "not low" variants of load on
condition.  Accept both in the affected test cases.

Going to commit this as obvious.

Regards
 Robin

--

gcc/testsuite/ChangeLog:

* gcc.target/s390/ifcvt-two-insns-bool.c: Allow "high" and
"not low or equal" load on condition variant.
* gcc.target/s390/ifcvt-two-insns-int.c: Dito.
* gcc.target/s390/ifcvt-two-insns-long.c: Dito.
---
 gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c | 4 ++--
 gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c  | 4 ++--
 gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c 
b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
index 1027ddceb935..a56bc4676143 100644
--- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
+++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-bool.c
@@ -3,8 +3,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
 
-/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
-/* { dg-final { scan-assembler "locrh\t.*" } } */
+/* { dg-final { scan-assembler "lochi(?:h|nle)\t%r.?,1" } } */
+/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
 #include 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c 
b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
index fc6946f2466d..64b8a732290e 100644
--- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
+++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-int.c
@@ -3,8 +3,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
 
-/* { dg-final { scan-assembler "lochih\t%r.?,1" } } */
-/* { dg-final { scan-assembler "locrh\t.*" } } */
+/* { dg-final { scan-assembler "lochi(h|nle)\t%r.?,1" } } */
+/* { dg-final { scan-assembler "locr(?:h|nle)\t.*" } } */
 #include 
 #include 
 #include 
diff --git a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c 
b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
index 51af4985247a..f2d784e762a8 100644
--- a/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
+++ b/gcc/testsuite/gcc.target/s390/ifcvt-two-insns-long.c
@@ -3,8 +3,8 @@
 /* { dg-do run } */
 /* { dg-options "-O2 -march=z13 -mzarch --save-temps" } */
 
-/* { dg-final { scan-assembler "locghih\t%r.?,1" } } */
-/* { dg-final { scan-assembler "locgrh\t.*" } } */
+/* { dg-final { scan-assembler "locghi(?:h|nle)\t%r.?,1" } } */
+/* { dg-final { scan-assembler "locgr(?:h|nle)\t.*" } } */
 
 #include 
 #include 
-- 
2.39.1


Re: Ping: [PATCH] testsuite: Tweak gcc.dg/attr-aligned.c for CRIS

On Feb 27, 2023, at 5:54 PM, Hans-Peter Nilsson via Gcc-patches 
 wrote:
> 
> Ping...

Ok.

> 
>> From: Hans-Peter Nilsson 
>> Date: Thu, 16 Feb 2023 21:05:29 +0100
> 
>> Asking for the lines outside the "#if __CRIS__" part.
>> Ok to commit?
>> 
>> -- >8 --
>> tm.texi says for BIGGEST_ALIGNMENT (from which
>> __BIGGEST_ALIGNMENT__ is derived): "Biggest alignment that
>> any data type can require on this machine, in bits."
>> 
>> That is, using that value might be too strict for alignment
>> of *functions* and CRIS requires at least 16-bit alignment
>> for functions.  But, one purpose of the test is to test that
>> alignment can be set to a large but valid value, so pick
>> 512, which has some use as a historically required alignment
>> for certain I/O descriptors.
>> 
>>  * gcc.dg/attr-aligned.c: Adjust comment for ALIGN_MAX_STATIC.
>>  (ALIGN_MAX_STATIC): Set to 512 for CRIS.
>> ---
>> gcc/testsuite/gcc.dg/attr-aligned.c | 8 +++-
>> 1 file changed, 7 insertions(+), 1 deletion(-)
>> 
>> diff --git a/gcc/testsuite/gcc.dg/attr-aligned.c 
>> b/gcc/testsuite/gcc.dg/attr-aligned.c
>> index 887bdd0f3799..4f0c885dc812 100644
>> --- a/gcc/testsuite/gcc.dg/attr-aligned.c
>> +++ b/gcc/testsuite/gcc.dg/attr-aligned.c
>> @@ -18,6 +18,10 @@
>> # else
>> #   define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
>> # endif
>> +#elif __CRIS__
>> +/* __BIGGEST_ALIGNMENT__ doesn't cover functions (16 bits for CRIS). */
>> +#  define ALIGN_MAX_STATIC  512
>> +#  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
>> #elif pdp11
>> #  define ALIGN_MAX_STATIC  2
>> /* Work around a pdp11 ICE (see PR target/87821).  */
>> @@ -29,7 +33,9 @@
>> /* Is this processor- or operating-system specific?  */
>> #  define ALIGN_MAX_STATIC  ALIGN_MAX_HARD
>> #else
>> -   /* Guaranteed to be accepted regardless of the target.  */
>> +   /* Guaranteed to be accepted regardless of the target for objects.
>> +  This might not be true for alignment of functions though, so
>> +  may need to be set to a target-specific value above.  */
>> #  define ALIGN_MAX_STATIC  __BIGGEST_ALIGNMENT__
>>/* Guaranteed to be rejected regardless of the target.  */
>> #  define ALIGN_TOO_BIG_OFILE   (ALIGN_MAX_HARD << 1)
>> -- 
>> 2.30.2
>> 



Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

"Li, Pan2"  writes:
> Oops, looks I missed that part for assertion. Thank you for coaching.
> Added and tested the below changes at the end of emit_mode_adjustments 
> already but looks we may have other problems about the size, the precision 
> and the C types.
>
> Looks like I need to hold this PATCH for a while until we have a conclusion. 
> Feel free to let me know if there is mistake or misleading.
>
> + 
> +  for_all_modes (c, m)
> +printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
> +   " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, m->name);
> +

Using:

  gcc_assert (maybe_ne (mode_size[E_%smode], -1));

would be simpler.  We might as well make it a full assert (rather than a
checking assert) because this code isn't executed very often.

Thanks,
Richard

>
> Thank you and have a nice day!
>
> Pan
>
>
> -Original Message-
> From: Richard Sandiford  
> Sent: Thursday, March 2, 2023 5:44 PM
> To: Li, Pan2 
> Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
> rguent...@suse.de
> Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment
>
> pan2...@intel.com writes:
>> From: Pan Li 
>>
>>  Fix the bug of the rvv bool mode precision with the adjustment.
>>  The bits size of vbool*_t will be adjusted to
>>  [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>>  adjusted mode precison of vbool*_t will help underlying pass to
>>  make the right decision for both the correctness and optimization.
>>
>>  Given below sample code:
>>  void test_1(int8_t * restrict in, int8_t * restrict out)
>>  {
>>vbool8_t v2 = *(vbool8_t*)in;
>>vbool16_t v5 = *(vbool16_t*)in;
>>*(vbool16_t*)(out + 200) = v5;
>>*(vbool8_t*)(out + 100) = v2;
>>  }
>>
>>  Before the precision adjustment:
>>  addia4,a1,100
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  addia1,a1,200
>>  vlm.v   v24,0(a0)
>>  vsm.v   v24,0(a4)
>>  // Need one vsetvli and vlm.v for correctness here.
>>  vsm.v   v24,0(a1)
>>
>>  After the precision adjustment:
>>  csrrt0,vlenb
>>  sllit1,t0,1
>>  csrra3,vlenb
>>  sub sp,sp,t1
>>  sllia4,a3,1
>>  add a4,a4,sp
>>  sub a3,a4,a3
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  addia2,a1,200
>>  vlm.v   v24,0(a0)
>>  vsm.v   v24,0(a3)
>>  addia1,a1,100
>>  vsetvli a4,zero,e8,mf2,ta,ma
>>  csrrt0,vlenb
>>  vlm.v   v25,0(a3)
>>  vsm.v   v25,0(a2)
>>  sllit1,t0,1
>>  vsetvli a5,zero,e8,m1,ta,ma
>>  vsm.v   v24,0(a1)
>>  add sp,sp,t1
>>  jr  ra
>>
>>  However, there may be some optimization opportunates after
>>  the mode precision adjustment. It can be token care of in
>>  the RISC-V backend in the underlying separted PR(s).
>>
>>  PR 108185
>>  PR 108654
>>
>> gcc/ChangeLog:
>>
>>  * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>>  * config/riscv/riscv.cc (riscv_v_adjust_precision):
>>  * config/riscv/riscv.h (riscv_v_adjust_precision):
>>  * genmodes.cc (ADJUST_PRECISION):
>>  (emit_mode_adjustments):
>>
>> gcc/testsuite/ChangeLog:
>>
>>  * gcc.target/riscv/pr108185-1.c: New test.
>>  * gcc.target/riscv/pr108185-2.c: New test.
>>  * gcc.target/riscv/pr108185-3.c: New test.
>>  * gcc.target/riscv/pr108185-4.c: New test.
>>  * gcc.target/riscv/pr108185-5.c: New test.
>>  * gcc.target/riscv/pr108185-6.c: New test.
>>  * gcc.target/riscv/pr108185-7.c: New test.
>>  * gcc.target/riscv/pr108185-8.c: New test.
>>
>> Signed-off-by: Pan Li 
>> ---
>>  gcc/config/riscv/riscv-modes.def|  8 +++
>>  gcc/config/riscv/riscv.cc   | 12 
>>  gcc/config/riscv/riscv.h|  1 +
>>  gcc/genmodes.cc | 20 +-
>>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++  
>> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
>>  12 files changed, 592 insertions(+), 2 deletions(-)  create mode 
>> 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>>  create mode 100644 gcc/testsui

Re: [PATCH] amdgcn: Enable SIMD vectorization of math functions


On 02/03/2023 15:07, Kwok Cheung Yeung wrote:

Hello

I've made the suggested changes. Should I hold off on committing this 
until GCC 13 has been branched off?


No need, amdgcn is not a primary target and this stuff won't affect 
anyone else. Please go ahead and commit.


Andrew


Re: [PATCH] amdgcn: Add instruction patterns for conditional min/max operations


On 01/03/2023 16:56, Paul-Antoine Arras wrote:

This patch introduces instruction patterns for conditional min and max
operations (cond_{f|s|u}{max|min}) in the GCN machine description. It 
also allows the exec register to be saved in SGPRs to avoid spilling to 
memory.

Tested on GCN3 Fiji gfx803.

OK for trunk?


Not quite yet, but it's only a few cosmetic issues, I think.


+(define_insn_and_split "3"
+  [(set (match_operand:V_DI 0 "register_operand"  "=  v")
+   (minmaxop:V_DI
+ (match_operand:V_DI 1 "gcn_alu_operand" "%  v")
+  (match_operand:V_DI 2 "gcn_alu_operand" "   v")))
+(clobber (reg:DI VCC_REG))]


No need to make it commutative when the two operands have the same 
constraints. There's a few more instances of this later.



+if ( == smin ||  == smax)
+  emit_insn (gen_vec_cmpdi (vcc, minp ? gen_rtx_LT (VOIDmode, 0, 0) :
+gen_rtx_GT (VOIDmode, 0, 0), operands[1], 
operands[2]));
+else
+  emit_insn (gen_vec_cmpdi (vcc, minp ? gen_rtx_LTU (VOIDmode, 0, 0) 
:
+gen_rtx_GTU (VOIDmode, 0, 0), operands[1], 
operands[2]));
+


Long lines need to be wrapped, here and elsewhere.

Andrew


Re: [PATCH] c++, debug: Fix up locus of DW_TAG_imported_module [PR108716]


On 2/9/23 03:41, Jakub Jelinek wrote:

Hi!

Before IMPORTED_DECL has been introduced in PR37410, we used to emit correct
DW_AT_decl_line on DW_TAG_imported_module on the testcase below, after that
change we haven't emitted it at all for a while and after some time
started emitting incorrect locus, in particular the location of } closing
the function.

The problem is that while we have correct EXPR_LOCATION on the USING_STMT,
when genericizing that USING_STMT into IMPORTED_DECL we don't copy the
location to DECL_SOURCE_LOCATION, so it gets whatever input_location happens
to be when it is created.

The following patch fixes that, bootstrapped/regtested on x86_64-linux and
i686-linux, ok for trunk?


OK.


2023-02-09  Jakub Jelinek  

PR debug/108716
* cp-gimplify.cc (cp_genericize_r) : Set
DECL_SOURCE_LOCATION on IMPORTED_DECL to expression location
of USING_STMT or input_location.

* g++.dg/debug/dwarf2/pr108716.C: New test.

--- gcc/cp/cp-gimplify.cc.jj2023-02-01 10:19:43.038140336 +0100
+++ gcc/cp/cp-gimplify.cc   2023-02-08 10:36:00.301501540 +0100
@@ -1514,6 +1514,8 @@ cp_genericize_r (tree *stmt_p, int *walk
tree using_directive = make_node (IMPORTED_DECL);
TREE_TYPE (using_directive) = void_type_node;
DECL_CONTEXT (using_directive) = current_function_decl;
+   DECL_SOURCE_LOCATION (using_directive)
+ = cp_expr_loc_or_input_loc (stmt);
  
  		IMPORTED_DECL_ASSOCIATED_DECL (using_directive) = decl;

DECL_CHAIN (using_directive) = BLOCK_VARS (block);
--- gcc/testsuite/g++.dg/debug/dwarf2/pr108716.C.jj 2023-02-08 
11:48:39.667385750 +0100
+++ gcc/testsuite/g++.dg/debug/dwarf2/pr108716.C2023-02-08 
11:48:57.998115610 +0100
@@ -0,0 +1,14 @@
+// PR debug/108716
+// { dg-options "-O0 -gdwarf-5 -dA -fno-merge-debug-strings" }
+// { dg-final { scan-assembler "DIE \\(\[^\n\r\]*\\) 
DW_TAG_imported_module\[^\n\r\]*\[\n\r]*\[^\n\r\]* 
DW_AT_decl_file\[^\n\r\]*\[\n\r]*\[^\n\r\]*0xc\[^\n\r\]* 
DW_AT_decl_line\[^\n\r\]*\[\n\r]*(\[^\n\r\]*0x13\[^\n\r\]* 
DW_AT_decl_column\[^\n\r\]*\[\n\r]*)?" } }
+
+namespace M {
+  int x = 1;
+}
+
+int
+main ()
+{
+  using namespace M;
+  return 0;
+}

Jakub





Re: [PATCH] c++: Don't defer local statics initialized with constant expressions [PR108702]


On 2/9/23 11:14, Jakub Jelinek wrote:

Hi!

The stmtexpr19.C testcase used to be rejected as it has a static
variable in statement expression in constexpr context, but as that
static variable is initialized by constant expression, when P2647R1
was implemented we agreed to make it valid.

Now, as reported, the testcase compiles fine, but doesn't actually link
because the static variable isn't defined anywhere, and with -flto ICEs
because of this problem.  This is because we never
varpool_node::finalize_decl those vars, the constant expression in which
the DECL_EXPR is present for the static VAR_DECL is folded (constant
evaluated) into just the address of the VAR_DECL.


Would it make sense to define it when we see the DECL_EXPR in constant 
evaluation?



Now, similar testcase included below (do we want to include it in the
testsuite too?) works fine, because in
cp_finish_decl -> make_rtl_for_nonlocal_decl
we have since PR70353 fix:
   /* We defer emission of local statics until the corresponding
  DECL_EXPR is expanded.  But with constexpr its function might never
  be expanded, so go ahead and tell cgraph about the variable now.  */
   defer_p = ((DECL_FUNCTION_SCOPE_P (decl)
   && !var_in_maybe_constexpr_fn (decl))
  || DECL_VIRTUAL_P (decl));
and so don't defer them in constexpr/consteval functions.  The following
patch extends that and doesn't defer vars initialized by constant
expressions either, because otherwise there is nothing to finalize those.
It is true that e.g. with -O0
int foo (int x) {
   if (x) { static int y = 1; ++y; }
   if (0) { static int z = 1; ++z; }
   return sizeof (({ static int w = 1; w; }));
}
we used to emit just y and z and with the patch emit also w, but with
optimizations that is optimized away properly.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

The testcase I was talking above that works because of the
&& !var_in_maybe_constexpr_fn (decl) case is:

extern "C" void abort ();

constexpr const int *
foo ()
{
   static constexpr int a = 1;
   return &a;
}

consteval const int *
bar ()
{
   static constexpr int a = 1;
   return &a;
}

[[gnu::noipa]] void
baz (const int *x)
{
   if (*x != 1)
 abort ();
}

int
main ()
{
   constexpr const int *p = foo ();
   constexpr const int *q = bar ();
   baz (p);
   baz (q);
   if (p == q)
 abort ();
}

2023-02-09  Jakub Jelinek  

PR c++/108702
* decl.cc (make_rtl_for_nonlocal_decl): Don't defer local statics
initialized by constant expressions.

* g++.dg/ext/stmtexpr19.C: Use dg-do link rather than dg-do compile.

--- gcc/cp/decl.cc.jj   2023-01-24 11:10:13.151076134 +0100
+++ gcc/cp/decl.cc  2023-02-09 13:29:50.527083618 +0100
@@ -7731,9 +7731,12 @@ make_rtl_for_nonlocal_decl (tree decl, t
  
/* We defer emission of local statics until the corresponding

   DECL_EXPR is expanded.  But with constexpr its function might never
- be expanded, so go ahead and tell cgraph about the variable now.  */
+ be expanded, so go ahead and tell cgraph about the variable now.
+ Also don't defer local statics initialized by constant expressions,
+ see PR108702.  */
defer_p = ((DECL_FUNCTION_SCOPE_P (decl)
- && !var_in_maybe_constexpr_fn (decl))
+ && !var_in_maybe_constexpr_fn (decl)
+ && !DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (decl))
 || DECL_VIRTUAL_P (decl));
  
/* Defer template instantiations.  */

--- gcc/testsuite/g++.dg/ext/stmtexpr19.C.jj2022-11-19 09:26:30.168061316 
+0100
+++ gcc/testsuite/g++.dg/ext/stmtexpr19.C   2023-02-09 13:32:48.887453520 
+0100
@@ -1,6 +1,6 @@
  // PR c++/81073
  // { dg-options "" }
-// { dg-do compile { target c++11 } }
+// { dg-do link { target c++11 } }
  
  struct test { const int *addr; };
  


Jakub





Re: [PATCH] c++: more mce_false folding from cp_fully_fold_init [PR108243]


On 2/21/23 14:10, Patrick Palka wrote:

We should also fold the overall initializer passed to cp_fully_fold_init
with mce_false, which enables folding of the copy-initialization of
'a1' in the below testcase (the initializer here is an AGGR_INIT_EXPR).

Unfortunately this doesn't help with direct- or default-initialization
because we don't call cp_fully_fold_init in that case, and even if we
did the initializer in that case is expressed as a bare CALL_EXPR
instead of an AGGR_INIT_EXPR, which cp_fully_fold_init can't really
fold.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?


OK.


PR c++/108243

gcc/cp/ChangeLog:

* cp-gimplify.cc (cp_fully_fold): Add an internal overload that
additionally takes and propagate an mce_value parameter, and
define the existing public overload in terms of it.
(cp_fully_fold_init): Pass mce_false to cp_fully_fold.

gcc/testsuite/ChangeLog:

* g++.dg/opt/is_constant_evaluated3.C: New test.
---
  gcc/cp/cp-gimplify.cc | 14 +++
  .../g++.dg/opt/is_constant_evaluated3.C   | 23 +++
  2 files changed, 33 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C

diff --git a/gcc/cp/cp-gimplify.cc b/gcc/cp/cp-gimplify.cc
index 32fe53521cc..5d5c6efb856 100644
--- a/gcc/cp/cp-gimplify.cc
+++ b/gcc/cp/cp-gimplify.cc
@@ -2447,8 +2447,8 @@ cp_fold_rvalue (tree x)
  
  /* Perform folding on expression X.  */
  
-tree

-cp_fully_fold (tree x)
+static tree
+cp_fully_fold (tree x, mce_value manifestly_const_eval)
  {
if (processing_template_decl)
  return x;
@@ -2456,7 +2456,7 @@ cp_fully_fold (tree x)
   have to call both.  */
if (cxx_dialect >= cxx11)
  {
-  x = maybe_constant_value (x);
+  x = maybe_constant_value (x, /*decl=*/NULL_TREE, manifestly_const_eval);
/* Sometimes we are given a CONSTRUCTOR but the call above wraps it into
 a TARGET_EXPR; undo that here.  */
if (TREE_CODE (x) == TARGET_EXPR)
@@ -2469,6 +2469,12 @@ cp_fully_fold (tree x)
return cp_fold_rvalue (x);
  }
  
+tree

+cp_fully_fold (tree x)
+{
+  return cp_fully_fold (x, mce_unknown);
+}
+
  /* Likewise, but also fold recursively, which cp_fully_fold doesn't perform
 in some cases.  */
  
@@ -2477,7 +2483,7 @@ cp_fully_fold_init (tree x)

  {
if (processing_template_decl)
  return x;
-  x = cp_fully_fold (x);
+  x = cp_fully_fold (x, mce_false);
cp_fold_data data (ff_mce_false);
cp_walk_tree (&x, cp_fold_r, &data, NULL);
return x;
diff --git a/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C 
b/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C
new file mode 100644
index 000..0a1e46e5638
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/is_constant_evaluated3.C
@@ -0,0 +1,23 @@
+// PR c++/108243
+// { dg-do compile { target c++11 } }
+// { dg-additional-options "-O -fdump-tree-original" }
+
+struct A {
+  constexpr A(int n) : n(n), m(__builtin_is_constant_evaluated()) { }
+  constexpr A() : A(42) { }
+  int n, m;
+};
+
+int main() {
+  A a1 = {42};
+  A a2{42};
+  A a3(42);
+  A a4;
+  A a5{};
+}
+
+// { dg-final { scan-tree-dump "a1 = {\\.n=42, \\.m=0}" "original" } }
+// { dg-final { scan-tree-dump "a2 = {\\.n=42, \\.m=0}" "original" { xfail 
*-*-* } } }
+// { dg-final { scan-tree-dump "a3 = {\\.n=42, \\.m=0}" "original" { xfail 
*-*-* } } }
+// { dg-final { scan-tree-dump "a4 = {\\.n=42, \\.m=0}" "original" { xfail 
*-*-* } } }
+// { dg-final { scan-tree-dump "a5 = {\\.n=42, \\.m=0}" "original" { xfail 
*-*-* } } }




Re: [PATCH] c++: constant non-copy-init is manifestly constant [PR108243]


On 2/21/23 15:18, Patrick Palka wrote:

On Mon, 20 Feb 2023, Patrick Palka wrote:


According to [basic.start.static]/2 and [expr.const]/2, a variable
with static storage duration initialized with a constant initializer
has constant initialization, and such an initializer is manifestly
constant-evaluated.

We're already getting this right with copy initialization because in
that case check_initializer would consistently call store_init_value
(which for TREE_STATIC variables calls fold_non_dependent_init with
m_c_e=true).

But for direct (or default) initialization, we don't always call
store_init_value.  We instead however always call maybe_constant_init
from expand_default_init[1], albeit with m_c_e=false which means we
don't always get the "manifestly constant-evaluated" part right for
copy-init.

This patch fixes this by simply passing m_c_e=true to this call to
maybe_constant_init for static storage duration variables, mirroring
what store_init_value basically does.

[1]: this maybe_constant_init call isn't reached in the copy-init
case because there init is a CONSTRUCTOR rather than a TREE_LIST so
expand_default_init exits early returning an INIT_EXPR.  This INIT_EXPR
is ultimately what causes us to consistently hit the store_init_value
code path from check_initializer in the copy-init case.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?  Would it be suitable to backport this to the 12 branch since
it should only affect C++20 code?

PR c++/108243

gcc/cp/ChangeLog:

* init.cc (expand_default_init): Pass m_c_e=true instead of
=false to maybe_constant_init when initializing a variable
with static storage duration.

gcc/testsuite/ChangeLog:

* g++.dg/cpp2a/is-constant-evaluated14.C: New test.
---
  gcc/cp/init.cc|   5 +-
  .../g++.dg/cpp2a/is-constant-evaluated14.C| 140 ++
  2 files changed, 144 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated14.C

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 52e96fbe590..705a5b3bdb6 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -2203,7 +2203,10 @@ expand_default_init (tree binfo, tree true_exp, tree 
exp, tree init, int flags,
tree fn = get_callee_fndecl (rval);
if (fn && DECL_DECLARED_CONSTEXPR_P (fn))
{
- tree e = maybe_constant_init (rval, exp);
+ bool manifestly_const_eval = false;
+ if (VAR_P (exp) && TREE_STATIC (exp))
+   manifestly_const_eval = true;
+ tree e = maybe_constant_init (rval, exp, manifestly_const_eval);
  if (TREE_CONSTANT (e))
rval = cp_build_init_expr (exp, e);
}


Hmm, alternatively we could just override manifestly_const_eval to true
from maybe_constant_init for static storage duration variables, like so.
I guess this approach much be preferable since it potentially benefits
all maybe_constant_init callers?


That does look better.

OK (perhaps with a local variable to hold the mce_value).


-- >8 --

gcc/cp/ChangeLog:

* constexpr.cc (maybe_constant_init_1): Override
manifestly_const_eval to true if is_static.

diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc
index aa2c14355f8..8ae83a6eadf 100644
--- a/gcc/cp/constexpr.cc
+++ b/gcc/cp/constexpr.cc
@@ -8760,7 +8760,8 @@ maybe_constant_init_1 (tree t, tree decl, bool 
allow_non_constant,
bool is_static = (decl && DECL_P (decl)
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)));
t = cxx_eval_outermost_constant_expr (t, allow_non_constant, !is_static,
-   mce_value (manifestly_const_eval),
+   (is_static ? mce_true
+: mce_value 
(manifestly_const_eval)),
false, decl);
  }
if (TREE_CODE (t) == TARGET_EXPR)


diff --git a/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated14.C 
b/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated14.C
new file mode 100644
index 000..365bca3fd9a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp2a/is-constant-evaluated14.C
@@ -0,0 +1,140 @@
+// PR c++/108243
+// Verify a variable with static storage duration initialized with a
+// constant initializer has constant initialization, and the initializer
+// is manifestly constant-evaluated.
+// { dg-do run { target c++11 } }
+// { dg-additional-options "-fdump-tree-original" }
+
+#include 
+
+struct A {
+  constexpr A(int n) : n(n), m(__builtin_is_constant_evaluated()) { }
+  constexpr A() : A(42) { }
+  void verify_mce() const {
+if (m != 1) __builtin_abort();
+  }
+  int n;
+  int m;
+};
+
+A a1 = {42};
+A a2{42};
+A a3(42);
+A a4;
+A a5{};
+
+void f() {
+  static A a1 = {42};
+  static A a2{42};
+  static A a3(42);
+  static A a4;
+  static A a5{};
+  for (auto& a : {a1, a2, a3, a4, a5})
+a.verify_mce();
+}

Re: [PATCH] c++, v3: Emit fundamental tinfos for _Float16/decltype(0.0bf16) types on ia32 with -mno-sse2 [PR108883]


On 3/2/23 06:20, Jakub Jelinek wrote:

Hi!

On Wed, Mar 01, 2023 at 05:50:47PM -0500, Jason Merrill wrote:

And then there is a question whether we want to emit rtti for
_Float{16,32,64,128}, _Float{32,64,128}x and decltype(0.0bf16) regardless
of whether the target supports them at all or not.
Emitting them always would have an advantage, if say bfloat16_t support
isn't added for aarch64 for GCC 13 (it is still pending review), we wouldn't
need to deal with symbol versioning for it in GCC 14 or later.
On the other side, on some arches some types are very unlikely to be
supported.  And e.g. _Float128x isn't supported on any arch right now.


A good point.  Incidentally, it seems problematic for embedded users that
all the fundamental type_infos are emitted in the same .o, making it hard to
link in only the ones you care about.  And new floating-point variants add
to that problem.  So perhaps until that is addressed, it's better to avoid
adding a bunch more on targets that don't support them.


Ok, so here is a variant of the patch which still drops the fallback_* stuff,
but for float*_type_node doesn't do the automatic fallback in generic code
and leaves those to a target hook.

So far lightly tested on x86_64-linux -m32/-m64:

2023-03-02  Jakub Jelinek  

PR target/108883
gcc/
* target.h (emit_support_tinfos_callback): New typedef.
* targhooks.h (default_emit_support_tinfos): Declare.
* targhooks.cc (default_emit_support_tinfos): New function.
* target.def (emit_support_tinfos): New target hook.
* doc/tm.texi.in (emit_support_tinfos): Document it.
* doc/tm.texi: Regenerated.
* config/i386/i386.cc (ix86_emit_support_tinfos): New function.
(TARGET_EMIT_SUPPORT_TINFOS): Redefine.
gcc/cp/
* cp-tree.h (enum cp_tree_index): Remove CPTI_FALLBACK_DFLOAT*_TYPE
enumerators.
(fallback_dfloat32_type, fallback_dfloat64_type,
fallback_dfloat128_type): Remove.
* rtti.cc (emit_support_tinfo_1): If not emitted already, call
emit_tinfo_decl and remove from unemitted_tinfo_decls right away.
(emit_support_tinfos): Move &dfloat*_type_node from fundamentals array
into new fundamentals_with_fallback array.  Call emit_support_tinfo_1
on elements of that array too, with the difference that if
the type is NULL, use a fallback REAL_TYPE for it temporarily.
Drop the !targetm.decimal_float_supported_p () handling.  Call
targetm.emit_support_tinfos at the end.
* mangle.cc (write_builtin_type): Remove references to
fallback_dfloat*_type.  Handle bfloat16_type_node mangling.

--- gcc/target.h.jj 2023-02-17 12:45:08.056638510 +0100
+++ gcc/target.h2023-03-02 12:06:59.248146213 +0100
@@ -260,6 +260,8 @@ enum poly_value_estimate_kind
POLY_VALUE_LIKELY
  };
  
+typedef void (*emit_support_tinfos_callback) (tree);

+
  extern bool verify_type_context (location_t, type_context_kind, const_tree,
 bool = false);
  
--- gcc/targhooks.h.jj	2023-01-02 09:32:50.422880177 +0100

+++ gcc/targhooks.h 2023-03-02 12:06:22.559686384 +0100
@@ -98,6 +98,8 @@ extern int default_builtin_vectorization
  
  extern tree default_builtin_reciprocal (tree);
  
+extern void default_emit_support_tinfos (emit_support_tinfos_callback);

+
  extern HOST_WIDE_INT default_static_rtx_alignment (machine_mode);
  extern HOST_WIDE_INT default_constant_alignment (const_tree, HOST_WIDE_INT);
  extern HOST_WIDE_INT constant_alignment_word_strings (const_tree,
--- gcc/targhooks.cc.jj 2023-01-02 09:32:52.591848839 +0100
+++ gcc/targhooks.cc2023-03-02 12:01:39.576868114 +0100
@@ -752,6 +752,11 @@ default_builtin_reciprocal (tree)
return NULL_TREE;
  }
  
+void

+default_emit_support_tinfos (emit_support_tinfos_callback)
+{
+}
+
  bool
  hook_bool_CUMULATIVE_ARGS_arg_info_false (cumulative_args_t,
  const function_arg_info &)
--- gcc/target.def.jj   2023-02-22 15:58:50.252996452 +0100
+++ gcc/target.def  2023-03-02 12:01:52.002684436 +0100
@@ -2606,6 +2606,19 @@ types.",
   const char *, (const_tree type),
   hook_constcharptr_const_tree_null)
  
+/* Temporarily add conditional target specific types for the purpose of

+   emitting C++ fundamental type tinfos.  */
+DEFHOOK
+(emit_support_tinfos,
+ "If your target defines any fundamental types which depend on ISA flags,\n\
+they might need C++ tinfo symbols in libsupc++/libstdc++ regardless of\n\
+ISA flags the library is compiled with.\n\
+This hook allows creating tinfo symbols even for those cases, by temporarily\n\
+creating corresponding fundamental type trees, calling the @var{callback}\n\


"each corresponding fundamental type tree"?

OK with that change.


+function on it and setting the type back to @code{nullptr}.",
+ void, (emit_support_tinfos_callback callback),
+ default_emit_support_tinfos)
+
  /* Make any adjustments to libfunc names n

[PATCH] LoongArch: Stop -mfpu from silently breaking ABI

In the toolchain convention, we describe -mfpu= as:

"Selects the allowed set of basic floating-point instructions and
registers. This option should not change the FP calling convention
unless it's necessary."

Though not explicitly stated, the rationale of this rule is to allow
combinations like "-mabi=lp64s -mfpu=64".  This will be useful for
running applications with LP64S/F ABI on a double-float-capable
LoongArch hardware and using a math library with LP64S/F ABI but native
double float HW instructions, for a better performance.

And now a case in Linux kernel has again proven the usefulness of this
kind of combination.  The AMDGPU DCN kernel driver needs to perform some
floating-point operation, but the entire kernel uses LP64S ABI.  So the
translation units of the AMDGPU DCN driver need to be compiled with
-mfpu=64 (the kernel lacks soft-FP routines in libgcc), but -mabi=lp64s
(or you can't link it with the other part of the kernel).

Unfortunately, currently GCC uses TARGET_{HARD,SOFT,DOUBLE}_FLOAT to
determine the floating calling convention.  This causes "-mfpu=64"
silently allow using $fa* to pass parameters and return values EVEN IF
-mabi=lp64s is used.  To make things worse, the generated object file
has SOFT-FLOAT set in the eflags field so the linker will happily link
it with other LP64S ABI object files, but obviously this will lead to
bad results at runtime.

The fix is simple: use TARGET_*_FLOAT_ABI instead.  But then it causes
"-mabi=lp64s -march=loongarch64" to generate code like:

  movgr2fr.d $fa0, $a0
  frecip.d   $fa0, $fa0
  movfr2gr.d $a0, $fa0

The problem here is "loongarch64" is never strictly defined.  So we
consider "loongarch64" a "64-bit LoongArch CPU with the simplest FPU
needed by the ABI", and if -march=loongarch64 but -mfpu is not
explicitly used, we set -mfpu such a simplest one.

I consider this a bug fix: the behavior difference from the toolchain
convention doc is a bug, and generating object files with SOFT-FLOAT
flag but parameters/return values passed through FPRs is definitely a
bug.

Bootstrapped and regtested on loongarch64-linux-gnu.  Ok for trunk?  I'm
not sure if it's a good idea to backport this into gcc-12 though.

gcc/ChangeLog:

* config/loongarch/loongarch.h (FP_RETURN): Use
TARGET_*_FLOAT_ABI instead of TARGET_*_FLOAT.
(UNITS_PER_FP_ARG): Likewise.
* config/loongarch/loongarch-opts.cc (loongarch_config_target):
If -march=loongarch64 and -mfpu not explicitly used, guess FPU
capability from ABI.

gcc/testsuite/ChangeLog:

* gcc.target/loongarch/flt-abi-isa-1.c: New test.
* gcc.target/loongarch/flt-abi-isa-2.c: New test.
* gcc.target/loongarch/flt-abi-isa-3.c: New test.
* gcc.target/loongarch/flt-abi-isa-4.c: New test.
* gcc.target/loongarch/flt-abi-isa-5.c: New test.
* gcc.target/loongarch/flt-abi-isa-6.c: New test.
* gcc.target/loongarch/flt-abi-isa-7.c: New test.
* gcc.target/loongarch/flt-abi-isa-8.c: New test.
* gcc.target/loongarch/flt-abi-isa-9.c: New test.
* gcc.target/loongarch/flt-abi-isa-10.c: New test.
---
 gcc/config/loongarch/loongarch-opts.cc | 18 ++
 gcc/config/loongarch/loongarch.h   |  4 ++--
 .../gcc.target/loongarch/flt-abi-isa-1.c   | 12 
 .../gcc.target/loongarch/flt-abi-isa-10.c  |  7 +++
 .../gcc.target/loongarch/flt-abi-isa-2.c   | 11 +++
 .../gcc.target/loongarch/flt-abi-isa-3.c   | 11 +++
 .../gcc.target/loongarch/flt-abi-isa-4.c   | 12 
 .../gcc.target/loongarch/flt-abi-isa-5.c   |  7 +++
 .../gcc.target/loongarch/flt-abi-isa-6.c   | 11 +++
 .../gcc.target/loongarch/flt-abi-isa-7.c   |  5 +
 .../gcc.target/loongarch/flt-abi-isa-8.c   |  7 +++
 .../gcc.target/loongarch/flt-abi-isa-9.c   |  7 +++
 12 files changed, 110 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-10.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-3.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-4.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-5.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-6.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-7.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-8.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/flt-abi-isa-9.c

diff --git a/gcc/config/loongarch/loongarch-opts.cc 
b/gcc/config/loongarch/loongarch-opts.cc
index a52e25236ea..bea77da93e9 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -251,6 +251,24 @@ config_target_isa:
 ((t.cpu_arch == CPU_NATIVE && constrained.arch) ?
 

Re: [PATCH] c++: ICE with -Wmismatched-tags and member template [PR106259]


On 3/1/23 17:33, Marek Polacek wrote:

On Wed, Mar 01, 2023 at 04:44:12PM -0500, Jason Merrill wrote:

On 3/1/23 16:40, Marek Polacek wrote:

On Wed, Mar 01, 2023 at 04:30:16PM -0500, Jason Merrill wrote:

On 3/1/23 15:33, Marek Polacek wrote:

-Wmismatched-tags warns about the (harmless) struct/class mismatch.
For, e.g.,

 template struct A { };
 class A a;

it works by adding A to the class2loc hash table while parsing the
class-head and then, while parsing the elaborate type-specifier, we
add A.  At the end of c_parse_file we go through the table and
warn about the class-key mismatches.  In this PR we crash though; we
have

 template struct A {
   template struct W { };
 };
 struct A::W w; // #1

where while parsing A and #1 we've stashed
  A
  A::W
  A::W
into class2loc.  Then in class_decl_loc_t::diag_mismatched_tags TYPE
is A::W, and specialization_of gets us A::W, which
is not in class2loc, so we crash on gcc_assert (cdlguide).  But it's
OK not to have found A::W, we should just look one "level" up,
that is, A::W.

It's important to handle class specializations, so e.g.

 template<>
 struct A {
   template
   class W { };
 };

where W's class-key is different than in the primary template above,
so we should warn depending on whether we're looking into A
or into a different instantiation.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

PR c++/106259

gcc/cp/ChangeLog:

* parser.cc (class_decl_loc_t::diag_mismatched_tags): If the first
lookup of SPEC didn't find anything, try to look for
most_general_template.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wmismatched-tags-11.C: New test.
---
gcc/cp/parser.cc  | 30 +++
.../g++.dg/warn/Wmismatched-tags-11.C | 23 ++
2 files changed, 47 insertions(+), 6 deletions(-)
create mode 100644 gcc/testsuite/g++.dg/warn/Wmismatched-tags-11.C

diff --git a/gcc/cp/parser.cc b/gcc/cp/parser.cc
index 1a124f5395e..b528ee7b1d9 100644
--- a/gcc/cp/parser.cc
+++ b/gcc/cp/parser.cc
@@ -34473,14 +34473,32 @@ class_decl_loc_t::diag_mismatched_tags (tree 
type_decl)
 be (and inevitably is) at index zero.  */
  tree spec = specialization_of (type);
  cdlguide = class2loc.get (spec);
+  /* It's possible that we didn't find SPEC.  Consider:
+
+  template struct A {
+template struct W { };
+  };
+  struct A::W w; // #1
+
+where while parsing A and #1 we've stashed
+  A
+  A::W
+  A::W
+into CLASS2LOC.  If TYPE is A::W, specialization_of
+will yield A::W which may be in CLASS2LOC if we had
+an A class specialization, but otherwise won't be in it.
+So try to look up A::W.  */
+  if (!cdlguide)
+   {
+ spec = DECL_TEMPLATE_RESULT (most_general_template (spec));


Would it make sense to only look at most_general_template, not A::W
at all?


I think that would break with class specialization, as in...


+template struct A {
+  template
+  struct W { };
+};
+
+template<>
+struct A {
+  template
+  class W { };
+};
+
+void
+g ()
+{
+  struct A::W w1; // { dg-warning "mismatched" }


...this, where we should first look into A, and only if not
found, go to A.


I'd expect the


   /* Stop if we run into an explicitly specialized class template.  */


code in most_general_template to avoid that problem.


Ah, I had no idea it does that.  The unconditional most_general_template
works fine for the new test, but some of the existing tests then fail.
Reduced:

templatestruct S2; // #1
template  class S2; // #2

extern class  S2 s2ci; // #3
extern struct S2 s2ci; // { dg-warning "\\\[-Wmismatched-tags" }

where the unconditional most_general_template changes spec from
"class S2" to "struct S2" (both of which are in class2loc).
So it regresses the diagnostic, complaining that #3 should have "struct"
since #1 has "struct".  I think we want to keep the current diagnostic,
saying that the last line should have "class" since the specialization
in line #2 has "class".


Makes sense, the patch is OK.

Jason



Re: [PATCH] vect: Don't apply masks to operations on invariants [PR108979]




> Am 02.03.2023 um 15:28 schrieb Richard Sandiford via Gcc-patches 
> :
> 
> The loop body in the testcase contains an operation on invariants.
> SLP detects this and can hoist/schedule the operation outside of
> the loop.  However, after the fix for PR96373, we would try to
> apply a loop mask to this operation, even though the mask is
> defined in the loop.
> 
> The patch does what Richi suggested in the PR: suppress the
> masking for externs and constants.
> 
> Tested on aarch64-linux-gnu.  OK to install?

Ok.

Richard 

> Richard
> 
> 
> gcc/
>PR tree-optimization/108979
>* tree-vect-stmts.cc (vectorizable_operation): Don't mask
>operations on invariants.
> 
> gcc/testsuite/
>PR tree-optimization/108979
>* gfortran.dg/vect/pr108979.f90: New test.
> ---
> gcc/testsuite/gfortran.dg/vect/pr108979.f90 | 21 +
> gcc/tree-vect-stmts.cc  | 25 -
> 2 files changed, 40 insertions(+), 6 deletions(-)
> create mode 100644 gcc/testsuite/gfortran.dg/vect/pr108979.f90
> 
> diff --git a/gcc/testsuite/gfortran.dg/vect/pr108979.f90 
> b/gcc/testsuite/gfortran.dg/vect/pr108979.f90
> new file mode 100644
> index 000..623eb67826f
> --- /dev/null
> +++ b/gcc/testsuite/gfortran.dg/vect/pr108979.f90
> @@ -0,0 +1,21 @@
> +! { dg-do compile }
> +! { dg-additional-options "-fnon-call-exceptions" }
> +! { dg-additional-options "-march=armv8.2-a+sve" { target aarch64*-*-* } }
> +
> +MODULE hfx_contract_block
> +  INTEGER, PARAMETER :: dp=8
> +CONTAINS
> +  SUBROUTINE block_2_1_2_1(kbd,kbc,kad,kac,pbd,pbc,pad,pac,prim,scale)
> +REAL(KIND=dp) :: kbd(1*1), kbc(1*2), kad(2*1), kac(2*2), pbd(1*1), &
> +  pbc(1*2), pad(2*1), pac(2*2), prim(2*1*2*1), scale
> +  DO md = 1,1
> +DO mc = 1,2
> +  DO mb = 1,1
> +DO ma = 1,2
> +  kac((mc-1)*2+ma) = kac((mc-1)*2+ma)-tmp*p_bd
> +END DO
> +  END DO
> +END DO
> +  END DO
> +  END SUBROUTINE block_2_1_2_1
> +END MODULE hfx_contract_block
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 77ad8b78506..b56457617c0 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -6254,6 +6254,8 @@ vectorizable_operation (vec_info *vinfo,
>  "use not simple.\n");
>   return false;
> }
> +  bool is_invariant = (dt[0] == vect_external_def
> +   || dt[0] == vect_constant_def);
>   /* If op0 is an external or constant def, infer the vector type
>  from the scalar type.  */
>   if (!vectype)
> @@ -6307,6 +6309,8 @@ vectorizable_operation (vec_info *vinfo,
>  "use not simple.\n");
>  return false;
>}
> +  is_invariant &= (dt[1] == vect_external_def
> +   || dt[1] == vect_constant_def);
>   if (vectype2
>  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
>return false;
> @@ -6321,6 +6325,8 @@ vectorizable_operation (vec_info *vinfo,
>  "use not simple.\n");
>  return false;
>}
> +  is_invariant &= (dt[2] == vect_external_def
> +   || dt[2] == vect_constant_def);
>   if (vectype3
>  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
>return false;
> @@ -6426,16 +6432,23 @@ vectorizable_operation (vec_info *vinfo,
>   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
>   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : 
> NULL);
>   internal_fn cond_fn = get_conditional_internal_fn (code);
> -  bool could_trap = gimple_could_trap_p (stmt);
> +
> +  /* If operating on inactive elements could generate spurious traps,
> + we need to restrict the operation to active lanes.  Note that this
> + specifically doesn't apply to unhoisted invariants, since they
> + operate on the same value for every lane.
> +
> + Similarly, if this operation is part of a reduction, a fully-masked
> + loop should only change the active lanes of the reduction chain,
> + keeping the inactive lanes as-is.  */
> +  bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
> +|| reduc_idx >= 0);
> 
>   if (!vec_stmt) /* transformation not required.  */
> {
> -  /* If this operation is part of a reduction, a fully-masked loop
> - should only change the active lanes of the reduction chain,
> - keeping the inactive lanes as-is.  */
>   if (loop_vinfo
>  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
> -  && (could_trap || reduc_idx >= 0))
> +  && mask_out_inactive)
>{
>  if (cond_fn == IFN_LAST
>  || !direct_internal_fn_supported_p (cond_fn, vectype,
> @@ -6578,7 +6591,7 @@ vectorizable_operation (vec_info *vinfo,
>   vop1 = ((op_type == binary_op || op_type == ternary_op)
>  ? vec_oprnds1[i] : NULL_TREE);
>   vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
> -  if (masked_loop_p && (reduc_idx >= 0 

[PATCH] s390: libatomic: Fix 16 byte atomic {cas,load,store}

This is a follow-up to commit a4c6bd0821099f6b8c0f64a96ffd9d01a025c413
introducing a runtime check for alignment for 16 byte atomic
compare-exchange, load, and store.

Bootstrapped and regtested on s390.
Ok for mainline and gcc-{12,11,10}?

libatomic/ChangeLog:

* config/s390/cas_n.c: New file.
* config/s390/load_n.c: New file.
* config/s390/store_n.c: New file.
---
 libatomic/config/s390/cas_n.c   | 65 +
 libatomic/config/s390/load_n.c  | 57 +
 libatomic/config/s390/store_n.c | 54 +++
 3 files changed, 176 insertions(+)
 create mode 100644 libatomic/config/s390/cas_n.c
 create mode 100644 libatomic/config/s390/load_n.c
 create mode 100644 libatomic/config/s390/store_n.c

diff --git a/libatomic/config/s390/cas_n.c b/libatomic/config/s390/cas_n.c
new file mode 100644
index 000..44b7152ca5d
--- /dev/null
+++ b/libatomic/config/s390/cas_n.c
@@ -0,0 +1,65 @@
+/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#include 
+
+
+/* Analog to config/s390/exch_n.c.  */
+
+#if !DONE && N == 16
+bool
+SIZE(libat_compare_exchange) (UTYPE *mptr, UTYPE *eptr, UTYPE newval,
+ int smodel, int fmodel UNUSED)
+{
+  if (!((uintptr_t)mptr & 0xf))
+{
+  return __atomic_compare_exchange_n (
+   (UTYPE *)__builtin_assume_aligned (mptr, 16), eptr, newval, false,
+   __ATOMIC_SEQ_CST, __ATOMIC_RELAXED);
+}
+  else
+{
+  UTYPE oldval;
+  UWORD magic;
+  bool ret;
+
+  pre_seq_barrier (smodel);
+  magic = protect_start (mptr);
+
+  oldval = *mptr;
+  ret = (oldval == *eptr);
+  if (ret)
+   *mptr = newval;
+  else
+   *eptr = oldval;
+
+  protect_end (mptr, magic);
+  post_seq_barrier (smodel);
+
+  return ret;
+}
+}
+#define DONE 1
+#endif /* N == 16 */
+
+#include "../../cas_n.c"
diff --git a/libatomic/config/s390/load_n.c b/libatomic/config/s390/load_n.c
new file mode 100644
index 000..335d2f8b2c3
--- /dev/null
+++ b/libatomic/config/s390/load_n.c
@@ -0,0 +1,57 @@
+/* Copyright (C) 2018-2023 Free Software Foundation, Inc.
+
+   This file is part of the GNU Atomic Library (libatomic).
+
+   Libatomic is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libatomic is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   .  */
+
+#include 
+
+
+/* Analog to config/s390/exch_n.c.  */
+
+#if !DONE && N == 16
+UTYPE
+SIZE(libat_load) (UTYPE *mptr, int smodel)
+{
+  if (!((uintptr_t)mptr & 0xf))
+{
+  return __atomic_load_n ((UTYPE *)__builtin_assume_aligned (mptr, 16),
+ __ATOMIC_SEQ_CST);
+}
+  else
+{
+  UTYPE ret;
+  UWORD magic;
+
+  pre_seq_barrier (smodel);
+  magic = protect_start (mptr);
+
+  ret = *mptr;
+
+  protect_end (mptr, magic);
+  post_seq_barrier (smodel);
+
+  return ret;
+}
+}
+#define DONE 1
+#endif /* N == 16 */
+
+#include "../../load_n.c"
diff --git a/libatomic/config/s390/store_n.c b/libatomic/config/s390/store_n.c
new file mode 100644
index 000..9e5b2b8213d
--- /dev/null
+++ b/l

Re: [PATCH] amdgcn: Enable SIMD vectorization of math functions


Hello

I've made the suggested changes. Should I hold off on committing this 
until GCC 13 has been branched off?


Kwok

On 01/03/2023 10:01 am, Andrew Stubbs wrote:

On 28/02/2023 23:01, Kwok Cheung Yeung wrote:

Hello

This patch implements the TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 
target hook for the AMD GCN architecture, such that when vectorized, 
calls to builtin standard math functions such as asinf, exp, pow etc. 
are converted to calls to the recently added vectorized math functions 
for GCN in Newlib. The -fno-math-errno flag is required in addition to 
the usual vectorization optimization flags for this to occur, and some 
of the math functions (the larger double-precision ones) require a 
large stack size to function properly.


This patch requires the GCN vector math functions in Newlib to 
function - these were included in the recent 4.3.0.20230120 snapshot. 
As this was a minimum requirement starting from the patch 'amdgcn, 
libgomp: Manually allocated stacks', this should not be a problem.


I have added new testcases in the testsuite that compare the output of 
the vectorized math functions against the scalar, passing if they are 
sufficiently close. With the testcase for standalone GCN (without 
libgomp) in gcc.target/gcn/, there is a problem since gcn-run 
currently cannot set the stack size correctly in DejaGnu testing, so I 
have made it a compile test for now - it is still useful to check that 
calls to the correct functions are being made. The runtime correctness 
is still covered by the libgomp test.


Okay for trunk?


The main part of the patch is OK, with the small changes below.

Others have pointed out that "omp declare simd" exists, but you and I 
have been all through that verbally, long ago, and as Tobias says the 
offload compiler cannot rely on markup in the host compiler's header 
files to solve this problem.



@@ -7324,6 +7429,11 @@ gcn_dwarf_register_span (rtx rtl)
   gcn_simd_clone_compute_vecsize_and_simdlen
 #undef  TARGET_SIMD_CLONE_USABLE
 #define TARGET_SIMD_CLONE_USABLE gcn_simd_clone_usable
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+  gcn_vectorize_builtin_vectorized_function
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
 #undef  TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
   gcn_small_register_classes_for_mode_p


Please keep these in alphabetical order.

+/* Ideally this test should be run, but the math routines require a 
large
+   stack and gcn-run currently does not respect the stack-size 
parameter.  */

+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fno-math-errno 
-mstack-size=300 -fdump-tree-vect" } */


This isn't ideal. The dg-set-target-env-var directive (I think this is 
it?) can set GCN_STACK_SIZE, which gcn-run does honour, but I realise 
that doesn't work with remote test targets (like ours).


I suggest adding an additional test that sets the envvar and #includes 
the code from this one; one test to scan the dumps, one test to run it. 
Like this  (untested, syntax uncertain).


/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fno-math-errno" } */
/* { dg-set-target-env-var "GCN_STACK_SIZE" "300" } */
#include "simd-math-1.c"

The run test will get skipped in our test environment (and anyone else 
using remote), but the libgomp test should make up for that.


AndrewFrom 0b43ef3c2d6afd4aecfc03fd1d2df675626e017b Mon Sep 17 00:00:00 2001
From: Kwok Cheung Yeung 
Date: Tue, 28 Feb 2023 14:15:47 +
Subject: [PATCH] amdgcn: Enable SIMD vectorization of math functions

Calls to vectorized versions of routines in the math library will now
be inserted when vectorizing code containing supported math functions.

2023-02-28  Kwok Cheung Yeung  
Paul-Antoine Arras  

gcc/
* builtins.cc (mathfn_built_in_explicit): New.
* config/gcn/gcn.cc: Include case-cfn-macros.h.
(mathfn_built_in_explicit): Add prototype.
(gcn_vectorize_builtin_vectorized_function): New.
(gcn_libc_has_function): New.
(TARGET_LIBC_HAS_FUNCTION): Define.
(TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION): Define.

gcc/testsuite/
* gcc.target/gcn/simd-math-1.c: New testcase.
* gcc.target/gcn/simd-math-2.c: New testcase.

libgomp/
* testsuite/libgomp.c/simd-math-1.c: New testcase.
---
 gcc/builtins.cc|   8 +
 gcc/config/gcn/gcn.cc  | 110 +++
 gcc/testsuite/gcc.target/gcn/simd-math-1.c | 206 +++
 gcc/testsuite/gcc.target/gcn/simd-math-2.c |   8 +
 libgomp/testsuite/libgomp.c/simd-math-1.c  | 217 +
 5 files changed, 549 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/gcn/simd-math-1.c
 create mode 100644 gcc/testsuite/gcc.target/gcn/simd-math-2.c
 create mode 100644 libgomp/te

RE: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

Oops, looks I missed that part for assertion. Thank you for coaching.
Added and tested the below changes at the end of emit_mode_adjustments already 
but looks we may have other problems about the size, the precision and the C 
types.

Looks like I need to hold this PATCH for a while until we have a conclusion. 
Feel free to let me know if there is mistake or misleading.

+ 
+  for_all_modes (c, m)
+printf ("  gcc_checking_assert (!mode_size[E_%smode].is_constant()"
+   " || mode_size[E_%smode].coeffs[0] != -1);\n", m->name, m->name);
+

Thank you and have a nice day!

Pan


-Original Message-
From: Richard Sandiford  
Sent: Thursday, March 2, 2023 5:44 PM
To: Li, Pan2 
Cc: gcc-patches@gcc.gnu.org; juzhe.zh...@rivai.ai; kito.ch...@sifive.com; 
rguent...@suse.de
Subject: Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

pan2...@intel.com writes:
> From: Pan Li 
>
>   Fix the bug of the rvv bool mode precision with the adjustment.
>   The bits size of vbool*_t will be adjusted to
>   [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>   adjusted mode precison of vbool*_t will help underlying pass to
>   make the right decision for both the correctness and optimization.
>
>   Given below sample code:
>   void test_1(int8_t * restrict in, int8_t * restrict out)
>   {
> vbool8_t v2 = *(vbool8_t*)in;
> vbool16_t v5 = *(vbool16_t*)in;
> *(vbool16_t*)(out + 200) = v5;
> *(vbool8_t*)(out + 100) = v2;
>   }
>
>   Before the precision adjustment:
>   addia4,a1,100
>   vsetvli a5,zero,e8,m1,ta,ma
>   addia1,a1,200
>   vlm.v   v24,0(a0)
>   vsm.v   v24,0(a4)
>   // Need one vsetvli and vlm.v for correctness here.
>   vsm.v   v24,0(a1)
>
>   After the precision adjustment:
>   csrrt0,vlenb
>   sllit1,t0,1
>   csrra3,vlenb
>   sub sp,sp,t1
>   sllia4,a3,1
>   add a4,a4,sp
>   sub a3,a4,a3
>   vsetvli a5,zero,e8,m1,ta,ma
>   addia2,a1,200
>   vlm.v   v24,0(a0)
>   vsm.v   v24,0(a3)
>   addia1,a1,100
>   vsetvli a4,zero,e8,mf2,ta,ma
>   csrrt0,vlenb
>   vlm.v   v25,0(a3)
>   vsm.v   v25,0(a2)
>   sllit1,t0,1
>   vsetvli a5,zero,e8,m1,ta,ma
>   vsm.v   v24,0(a1)
>   add sp,sp,t1
>   jr  ra
>
>   However, there may be some optimization opportunates after
>   the mode precision adjustment. It can be token care of in
>   the RISC-V backend in the underlying separted PR(s).
>
>   PR 108185
>   PR 108654
>
> gcc/ChangeLog:
>
>   * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>   * config/riscv/riscv.cc (riscv_v_adjust_precision):
>   * config/riscv/riscv.h (riscv_v_adjust_precision):
>   * genmodes.cc (ADJUST_PRECISION):
>   (emit_mode_adjustments):
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/pr108185-1.c: New test.
>   * gcc.target/riscv/pr108185-2.c: New test.
>   * gcc.target/riscv/pr108185-3.c: New test.
>   * gcc.target/riscv/pr108185-4.c: New test.
>   * gcc.target/riscv/pr108185-5.c: New test.
>   * gcc.target/riscv/pr108185-6.c: New test.
>   * gcc.target/riscv/pr108185-7.c: New test.
>   * gcc.target/riscv/pr108185-8.c: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-modes.def|  8 +++
>  gcc/config/riscv/riscv.cc   | 12 
>  gcc/config/riscv/riscv.h|  1 +
>  gcc/genmodes.cc | 20 +-
>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++  
> gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
>  12 files changed, 592 insertions(+), 2 deletions(-)  create mode 
> 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>
> diff --git a/gcc/config/riscv/riscv-modes.def 
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6

[PATCH] vect: Don't apply masks to operations on invariants [PR108979]

The loop body in the testcase contains an operation on invariants.
SLP detects this and can hoist/schedule the operation outside of
the loop.  However, after the fix for PR96373, we would try to
apply a loop mask to this operation, even though the mask is
defined in the loop.

The patch does what Richi suggested in the PR: suppress the
masking for externs and constants.

Tested on aarch64-linux-gnu.  OK to install?

Richard


gcc/
PR tree-optimization/108979
* tree-vect-stmts.cc (vectorizable_operation): Don't mask
operations on invariants.

gcc/testsuite/
PR tree-optimization/108979
* gfortran.dg/vect/pr108979.f90: New test.
---
 gcc/testsuite/gfortran.dg/vect/pr108979.f90 | 21 +
 gcc/tree-vect-stmts.cc  | 25 -
 2 files changed, 40 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gfortran.dg/vect/pr108979.f90

diff --git a/gcc/testsuite/gfortran.dg/vect/pr108979.f90 
b/gcc/testsuite/gfortran.dg/vect/pr108979.f90
new file mode 100644
index 000..623eb67826f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/vect/pr108979.f90
@@ -0,0 +1,21 @@
+! { dg-do compile }
+! { dg-additional-options "-fnon-call-exceptions" }
+! { dg-additional-options "-march=armv8.2-a+sve" { target aarch64*-*-* } }
+
+MODULE hfx_contract_block
+  INTEGER, PARAMETER :: dp=8
+CONTAINS
+  SUBROUTINE block_2_1_2_1(kbd,kbc,kad,kac,pbd,pbc,pad,pac,prim,scale)
+REAL(KIND=dp) :: kbd(1*1), kbc(1*2), kad(2*1), kac(2*2), pbd(1*1), &
+  pbc(1*2), pad(2*1), pac(2*2), prim(2*1*2*1), scale
+  DO md = 1,1
+DO mc = 1,2
+  DO mb = 1,1
+DO ma = 1,2
+  kac((mc-1)*2+ma) = kac((mc-1)*2+ma)-tmp*p_bd
+END DO
+  END DO
+END DO
+  END DO
+  END SUBROUTINE block_2_1_2_1
+END MODULE hfx_contract_block
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 77ad8b78506..b56457617c0 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -6254,6 +6254,8 @@ vectorizable_operation (vec_info *vinfo,
  "use not simple.\n");
   return false;
 }
+  bool is_invariant = (dt[0] == vect_external_def
+  || dt[0] == vect_constant_def);
   /* If op0 is an external or constant def, infer the vector type
  from the scalar type.  */
   if (!vectype)
@@ -6307,6 +6309,8 @@ vectorizable_operation (vec_info *vinfo,
  "use not simple.\n");
  return false;
}
+  is_invariant &= (dt[1] == vect_external_def
+  || dt[1] == vect_constant_def);
   if (vectype2
  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype2)))
return false;
@@ -6321,6 +6325,8 @@ vectorizable_operation (vec_info *vinfo,
  "use not simple.\n");
  return false;
}
+  is_invariant &= (dt[2] == vect_external_def
+  || dt[2] == vect_constant_def);
   if (vectype3
  && maybe_ne (nunits_out, TYPE_VECTOR_SUBPARTS (vectype3)))
return false;
@@ -6426,16 +6432,23 @@ vectorizable_operation (vec_info *vinfo,
   int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
   vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
   internal_fn cond_fn = get_conditional_internal_fn (code);
-  bool could_trap = gimple_could_trap_p (stmt);
+
+  /* If operating on inactive elements could generate spurious traps,
+ we need to restrict the operation to active lanes.  Note that this
+ specifically doesn't apply to unhoisted invariants, since they
+ operate on the same value for every lane.
+
+ Similarly, if this operation is part of a reduction, a fully-masked
+ loop should only change the active lanes of the reduction chain,
+ keeping the inactive lanes as-is.  */
+  bool mask_out_inactive = ((!is_invariant && gimple_could_trap_p (stmt))
+   || reduc_idx >= 0);
 
   if (!vec_stmt) /* transformation not required.  */
 {
-  /* If this operation is part of a reduction, a fully-masked loop
-should only change the active lanes of the reduction chain,
-keeping the inactive lanes as-is.  */
   if (loop_vinfo
  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo)
- && (could_trap || reduc_idx >= 0))
+ && mask_out_inactive)
{
  if (cond_fn == IFN_LAST
  || !direct_internal_fn_supported_p (cond_fn, vectype,
@@ -6578,7 +6591,7 @@ vectorizable_operation (vec_info *vinfo,
   vop1 = ((op_type == binary_op || op_type == ternary_op)
  ? vec_oprnds1[i] : NULL_TREE);
   vop2 = ((op_type == ternary_op) ? vec_oprnds2[i] : NULL_TREE);
-  if (masked_loop_p && (reduc_idx >= 0 || could_trap))
+  if (masked_loop_p && mask_out_inactive)
{
  tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
  

Re: [PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]


Committed attached patch.

On 02/03/2023 10:13, Richard Sandiford wrote:

"Andre Vieira (lists)"  writes:

Hey both,

Sorry about that, don't know how I missed those. Just running a test on
that now and will commit when it's done. I assume the comment and 0 ->
byte change can be seen as obvious, especially since it was supposed to
be in my original patch...


Thanks.  And yeah, agree it counts as obvious.

Richard


On 27/02/2023 15:46, Richard Sandiford wrote:

Uros Bizjak  writes:

On Fri, Feb 17, 2023 at 8:38 AM Richard Biener  wrote:


On Thu, 16 Feb 2023, Uros Bizjak wrote:


simplify_subreg can return VOIDmode const_int operand and will
cause ICE in simplify_gen_subreg when this operand is passed to it.

The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
We can't process const_int operand any further, since outermode
is not an integer mode here.


But if it's a CONST_INT then we know it's of int_outermode, no? That is,
doesn't simplify_subreg (mode, ...) always return something in 'mode'
and thus we can always pass just 'mode' as third argument to the
following simplify_gen_subreg call?


You are right. I am testing the attached patch that works too.


Thanks for this, it's the correct fix.  But as noted in
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610920.html,
the final 0 is also wrong for big-endian.  Andre?

Richard



Uros.

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 0a1dd88b0a8..3955929bb70 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7665,7 +7665,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
   {
 rtx tem = simplify_subreg (int_outermode, op, innermode, byte);
 if (tem)
-   return simplify_gen_subreg (outermode, tem, GET_MODE (tem), 0);
+   return simplify_gen_subreg (outermode, tem, int_outermode, 0);
   }
   
 /* If OP is a vector comparison and the subreg is not changing thediff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 
2c82256af664bf2bc43172fc8fb4dfb2849e64b1..3b33afa24617f3185872ddc43284e4c9cd073510
 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7667,10 +7667,10 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
}
 }
 
-  /* Try simplifying a SUBREG expression of a non-integer OUTERMODE by using a
- NEW_OUTERMODE of the same size instead, other simplifications rely on
- integer to integer subregs and we'd potentially miss out on optimizations
- otherwise.  */
+  /* If the outer mode is not integral, try taking a subreg with the equivalent
+ integer outer mode and then bitcasting the result.
+ Other simplifications rely on integer to integer subregs and we'd
+ potentially miss out on optimizations otherwise.  */
   if (known_gt (GET_MODE_SIZE (innermode),
GET_MODE_SIZE (outermode))
   && SCALAR_INT_MODE_P (innermode)
@@ -7680,7 +7680,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
 {
   rtx tem = simplify_subreg (int_outermode, op, innermode, byte);
   if (tem)
-   return simplify_gen_subreg (outermode, tem, int_outermode, 0);
+   return simplify_gen_subreg (outermode, tem, int_outermode, byte);
 }
 
   /* If OP is a vector comparison and the subreg is not changing the


Re: [PATCH] libiberty: fix memory leak in pex-win32.c and refactor

Thanks for the review.

What is the next step please?

Thanks,
Costas

On Thu, 2 Mar 2023 at 10:08, Richard Biener 
wrote:

> On Thu, Mar 2, 2023 at 10:21 AM Costas Argyris 
> wrote:
> >
> > I forgot to mention that:
> >
> > 1) The CreateProcess documentation
> >
> >
> https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessa
> >
> > doesn't mention anything about taking ownership of this or any other
> buffer passed to it.
>
> Thanks - thus the patch is OK.
>
> Thanks,
> Richard.
>
> > 2) The cmdline buffer gets created by the argv_to_cmdline function
> >
> > https://github.com/gcc-mirror/gcc/blob/master/libiberty/pex-win32.c#L339
> >
> > which has this comment right above it:
> >
> > /* Return a Windows command-line from ARGV.  It is the caller's
> >responsibility to free the string returned.  */
> >
> > Thanks,
> > Costas
> >
> > On Thu, 2 Mar 2023 at 07:32, Richard Biener 
> wrote:
> >>
> >> On Wed, Mar 1, 2023 at 7:14 PM Costas Argyris via Gcc-patches
> >>  wrote:
> >> >
> >> > Hi
> >> >
> >> > It seems that the win32_spawn function in libiberty/pex-win32.c is
> leaking
> >> > the cmdline buffer in 2/3 exit scenarios (it is only free'd in 1/3).
>   The
> >> > problem here is that the cleanup code is written 3 times, one at each
> exit
> >> > scenario.
> >> >
> >> > The proposed attached refactoring has the cleanup code appearing just
> once
> >> > and is executed for all exit scenarios, reducing the likelihood of
> such
> >> > leaks in the future.
> >>
> >> One could imagine that CreateProcess in case of success takes ownership
> of
> >> the buffer pointed to by cmdline?  If you can confirm it is not then
> the patch
> >> looks OK to me.
> >>
> >> Thanks,
> >> Richard.
> >>
> >> > Thanks,
> >> > Costas
>


Re: [Patch] gcc.dg/overflow-warn-9.c: exclude from LLP64

Jonathan Yong <10wa...@gmail.com> writes:
> On 3/2/23 10:44, Richard Sandiford wrote:
>>> diff --git a/gcc/testsuite/gcc.dg/overflow-warn-9.c 
>>> b/gcc/testsuite/gcc.dg/overflow-warn-9.c
>>> index 57c0f17bc91..ae588bd8491 100644
>>> --- a/gcc/testsuite/gcc.dg/overflow-warn-9.c
>>> +++ b/gcc/testsuite/gcc.dg/overflow-warn-9.c
>>> @@ -59,7 +59,8 @@ const struct Types t1 = {
>>> .ui = UINT_MAX + 1L,  /* { dg-warning "signed conversion from .long 
>>> int. to .unsigned int. changes value from .4294967296. to .0." "lp64" { 
>>> target lp64 } } */
>>> .ui = UINT_MAX + 1LU, /* { dg-warning "conversion from .long 
>>> unsigned int. to .unsigned int. changes value from .4294967296. to .0." 
>>> "lp64" { target lp64 } } */
>>>   
>>> -  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long 
>>> unsigned int. to .long int. changes value from .9223372036854775808. to 
>>> .-9223372036854775808." "not-ilp32" { target { ! ilp32 } } } */
>>> +  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long 
>>> unsigned int. to .long int. changes value from .9223372036854775808. to 
>>> .-9223372036854775808." "lp64" { target lp64 } } */
>>> /* { dg-warning "signed conversion from .long unsigned int. to .long 
>>> int. changes value from .2147483648. to .-2147483648." "ilp32" { target 
>>> ilp32 } .-1 } */
>>> +  /* { dg-warning "signed conversion from .long unsigned int. to .long 
>>> int. changes value from .2147483648. to .-2147483648." "llp64" { target 
>>> llp64 } .-2 } */
>>> .ul = ULONG_MAX + 1LU /* there should be some warning here */
>>>   };
>> 
>> OK, although in general I think would be good to use
>> { target { ilp32 || llp64 } } for this kind of thing.
>> 
>> No need to change this patch though, just saying for the future.
>> 
>
> Thanks for reviewing, how does the "ilp32" or "llp64" before the curly 
> target brackets work?

That's just a free-form string, to make the test name unique.

Richard


Re: [Patch] gcc.dg/memchr-3.c: fix for LLP64


On 3/2/23 10:46, Richard Sandiford wrote:

diff --git a/gcc/testsuite/gcc.dg/memchr-3.c b/gcc/testsuite/gcc.dg/memchr-3.c
index c38d9cf3349..af1b26ef3ae 100644
--- a/gcc/testsuite/gcc.dg/memchr-3.c
+++ b/gcc/testsuite/gcc.dg/memchr-3.c
@@ -6,7 +6,7 @@
  typedef __INT8_TYPE__  int8_t;
  typedef __INT32_TYPE__ int32_t;
  
-extern void* memchr (const void*, int, long);

+extern void* memchr (const void*, int, long); /* { dg-warning 
"-Wbuiltin-declaration-mismatch" { target llp64 } } */
  
  struct SX

  {


OK, thanks.

Richard


Thanks, pushed to master branch.



Re: [Patch] gcc.dg/overflow-warn-9.c: exclude from LLP64


On 3/2/23 10:44, Richard Sandiford wrote:

diff --git a/gcc/testsuite/gcc.dg/overflow-warn-9.c 
b/gcc/testsuite/gcc.dg/overflow-warn-9.c
index 57c0f17bc91..ae588bd8491 100644
--- a/gcc/testsuite/gcc.dg/overflow-warn-9.c
+++ b/gcc/testsuite/gcc.dg/overflow-warn-9.c
@@ -59,7 +59,8 @@ const struct Types t1 = {
.ui = UINT_MAX + 1L,  /* { dg-warning "signed conversion from .long int. to .unsigned 
int. changes value from .4294967296. to .0." "lp64" { target lp64 } } */
.ui = UINT_MAX + 1LU, /* { dg-warning "conversion from .long unsigned int. to 
.unsigned int. changes value from .4294967296. to .0." "lp64" { target lp64 } } */
  
-  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value from .9223372036854775808. to .-9223372036854775808." "not-ilp32" { target { ! ilp32 } } } */

+  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long unsigned int. to 
.long int. changes value from .9223372036854775808. to .-9223372036854775808." 
"lp64" { target lp64 } } */
/* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value 
from .2147483648. to .-2147483648." "ilp32" { target ilp32 } .-1 } */
+  /* { dg-warning "signed conversion from .long unsigned int. to .long int. changes value 
from .2147483648. to .-2147483648." "llp64" { target llp64 } .-2 } */
.ul = ULONG_MAX + 1LU /* there should be some warning here */
  };


OK, although in general I think would be good to use
{ target { ilp32 || llp64 } } for this kind of thing.

No need to change this patch though, just saying for the future.



Thanks for reviewing, how does the "ilp32" or "llp64" before the curly 
target brackets work?


Pushed existing patch to master branch.



Re: [PATCH] wwwdocs: Document several further C++23 changes

On Thu, Mar 02, 2023 at 12:46:42PM +0100, Jakub Jelinek wrote:
> Hi!
> 
> Tobias mentioned on IRC that assume attribute wasn't mentioned in
> changes.html.  The P1774R8 entry was missing for C++, so I went through
> projects/cxx-status.html#cxx23 and filled in all the missing papers
> which have been implemented newly in GCC 13, plus a small note for C family
> about assume attribute.
> 
> Ok for wwwdocs?

Sure, thanks.
 
> diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
> index 410594ae..839b73d0 100644
> --- a/htdocs/gcc-13/changes.html
> +++ b/htdocs/gcc-13/changes.html
> @@ -182,6 +182,10 @@ a work-in-progress.
>   href="https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html";>-fanalyzer
>  to detect misuses of file descriptors.
>
> +  A new statement attribute for C++23  href="https://wg21.link/p1774r8";>P1774R8 Portable
> +  assumptions support also in C or older C++:
> + href="https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#index-assume-statement-attribute";>__attribute__((assume(EXPR)));
> +  
>  
>  
>  C
> @@ -290,6 +294,29 @@ a work-in-progress.
>   Operator You Are Looking For
>   (https://gcc.gnu.org/PR106644";>PR106644)
>
> +   https://wg21.link/p2362r3";>P2362R3, Remove 
> non-encodable
> +  wide character literals and multicharacter wide character literals
> + (https://gcc.gnu.org/PR106647";>PR106647)
> +  
> +   https://wg21.link/p2448r2";>P2448R2, Relaxing some
> +  constexpr restrictions
> + (https://gcc.gnu.org/PR106649";>PR106649)
> +  
> +   https://wg21.link/p1467r9";>P1467R9, Extended
> +  floating-point types and standard names
> + (https://gcc.gnu.org/PR106652";>PR106652)
> +  
> +   https://wg21.link/p1774r8";>P1774R8, Portable
> +  assumptions
> + (https://gcc.gnu.org/PR106654";>PR106654)
> +  
> +   https://wg21.link/p2295r6";>P2295R6, Support for
> +  UTF-8 as a portable source file encoding
> + (https://gcc.gnu.org/PR106655";>PR106655)
> +  
> +   https://wg21.link/p2589r1";>P2589R1, static operator[]
> + (https://gcc.gnu.org/PR107684";>PR107684)
> +  
>  
>
>New warnings:
> 
>   Jakub
> 

Marek



[PATCH] target/108738 - limit STV chain discovery

The following puts a hard limit on the inherently quadratic STV chain
discovery.  Without a limit for the compiler.i testcase in PR26854
we see at -O2

 machine dep reorg  : 574.45 ( 53%)

with release checking while with the proposed limit it's

 machine dep reorg  :   2.86 (  1%)

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Thanks,
Richard.

PR target/108738
* config/i386/i386.opt (--param x86-stv-max-visits): New param.
* doc/invoke.texi (--param x86-stv-max-visits): Document it.
* config/i386/i386-features.h (scalar_chain::max_visits): New.
(scalar_chain::build): Add bitmap parameter, return boolean.
(scalar_chain::add_insn): Likewise.
(scalar_chain::analyze_register_chain): Likewise.
* config/i386/i386-features.cc (scalar_chain::scalar_chain):
Initialize max_visits.
(scalar_chain::analyze_register_chain): When we exhaust
max_visits, abort.  Also abort when running into any
disallowed insn.
(scalar_chain::add_insn): Propagate abort.
(scalar_chain::build): Likewise.  When aborting amend
the set of disallowed insn with the insns set.
(convert_scalars_to_vector): Adjust.  Do not convert aborted
chains.
---
 gcc/config/i386/i386-features.cc | 77 +++-
 gcc/config/i386/i386-features.h  | 10 +++--
 gcc/config/i386/i386.opt |  4 ++
 gcc/doc/invoke.texi  |  4 ++
 4 files changed, 70 insertions(+), 25 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index eff91301009..c09abf8fc20 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -296,6 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum 
machine_mode vmode_)
 
   n_sse_to_integer = 0;
   n_integer_to_sse = 0;
+
+  max_visits = x86_stv_max_visits;
 }
 
 /* Free chain's data.  */
@@ -354,10 +356,12 @@ scalar_chain::mark_dual_mode_def (df_ref def)
 }
 
 /* Check REF's chain to add new insns into a queue
-   and find registers requiring conversion.  */
+   and find registers requiring conversion.  Return true if OK, false
+   if the analysis was aborted.  */
 
-void
-scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref)
+bool
+scalar_chain::analyze_register_chain (bitmap candidates, df_ref ref,
+ bitmap disallowed)
 {
   df_link *chain;
   bool mark_def = false;
@@ -371,6 +375,9 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
df_ref ref)
   if (!NONDEBUG_INSN_P (DF_REF_INSN (chain->ref)))
continue;
 
+  if (--max_visits == 0)
+   return false;
+
   if (!DF_REF_REG_MEM_P (chain->ref))
{
  if (bitmap_bit_p (insns, uid))
@@ -381,6 +388,10 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
df_ref ref)
  add_to_queue (uid);
  continue;
}
+
+ /* If we run into parts of an aborted chain discovery abort.  */
+ if (bitmap_bit_p (disallowed, uid))
+   return false;
}
 
   if (DF_REF_REG_DEF_P (chain->ref))
@@ -401,15 +412,19 @@ scalar_chain::analyze_register_chain (bitmap candidates, 
df_ref ref)
 
   if (mark_def)
 mark_dual_mode_def (ref);
+
+  return true;
 }
 
-/* Add instruction into a chain.  */
+/* Add instruction into a chain.  Return true if OK, false if the search
+   was aborted.  */
 
-void
-scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid)
+bool
+scalar_chain::add_insn (bitmap candidates, unsigned int insn_uid,
+   bitmap disallowed)
 {
   if (!bitmap_set_bit (insns, insn_uid))
-return;
+return true;
 
   if (dump_file)
 fprintf (dump_file, "  Adding insn %d to chain #%d\n", insn_uid, chain_id);
@@ -426,22 +441,27 @@ scalar_chain::add_insn (bitmap candidates, unsigned int 
insn_uid)
   df_ref ref;
   for (ref = DF_INSN_UID_DEFS (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
 if (!HARD_REGISTER_P (DF_REF_REG (ref)))
-  analyze_register_chain (candidates, ref);
+  if (!analyze_register_chain (candidates, ref, disallowed))
+   return false;
 
   /* The operand(s) of VEC_SELECT don't need to be converted/convertible.  */
   if (def_set && GET_CODE (SET_SRC (def_set)) == VEC_SELECT)
-return;
+return true;
 
   for (ref = DF_INSN_UID_USES (insn_uid); ref; ref = DF_REF_NEXT_LOC (ref))
 if (!DF_REF_REG_MEM_P (ref))
-  analyze_register_chain (candidates, ref);
+  if (!analyze_register_chain (candidates, ref, disallowed))
+   return false;
+
+  return true;
 }
 
 /* Build new chain starting from insn INSN_UID recursively
-   adding all dependent uses and definitions.  */
+   adding all dependent uses and definitions.  Return true if OK, false
+   if the chain discovery was aborted.  */
 
-void
-scalar_chain::build (bitmap candidates, unsigned insn_uid)
+bool
+scalar_chain::bu

Re: [PATCH] debug/108772 - ICE with late debug generated with -flto


On 3/2/23 02:43, Richard Biener wrote:

On Wed, 1 Mar 2023, Jason Merrill wrote:


On 3/1/23 08:09, Jakub Jelinek wrote:

On Wed, Mar 01, 2023 at 01:07:02PM +, Richard Biener wrote:

When combining -g1 with -flto we run into the DIE location annotation
machinery for globals calling dwarf2out_late_global_decl but not
having any early generated DIE for function scope statics.  In
this process we'd generate a limbo DIE since also the function scope
doesn't have any early generated DIE.  The limbo handling then tries
to force a DIE for the context chain which ultimatively fails and
ICEs at the std namespace decl because at -g1 we don't represent that.

The following avoids this situation by making sure to never generate
any limbo DIEs from dwarf2out_late_global_decl in the in_lto_p path
but instead for function scope globals rely on DIE generation for
the function to output a DIE for the local static (which doesn't
happen for -g1).


So the issue is that we're trying to force out a DIE for a decl that we
wouldn't have generated without -flto?  How is it avoided in the non-LTO case?


When we go rest_of_decl_compilation for this decl we defer to the
containing function to generate an early DIE but that doesn't
(because of -g1).  The call to late_global_decl that's done by
assemble_decl then does nothing because there's no early DIE.  But with
-flto we cannot completely rely on early DIE presence (not even without,
in case of cloning - but we don't clone global variables), esp. because
there's still the "supported" non-early-LTO path for non-ELF targets.

So at this point it seems to be the best thing to mimic what
rest_of_decl_compilation does and defer to dwarf2out of the
containing function to generate the DIE (or not).  For the reason
of the least amount of changes at this point in stage4 I went for
querying the DECL_CONTEXT DIE instead of right-out not handling
local_function_static () decls in this path.

If you'd prefer that, so

   if (! die && in_lto_p
   /* Function scope variables are emitted when emitting the
  DIE for the function.  */
   && ! local_function_static (decl))
 dwarf2out_decl (decl);

then I can test that variant as well which feels a bit more
consistent.


That variant is OK, thanks.

Jason



Re: [wwwdocs] gcc-13/porting_to.html: Document C++ -fexcess-precision=standard

On 3/2/23 11:32, Jakub Jelinek wrote:
> Hi!
> 
> On Fri, Feb 10, 2023 at 10:06:03AM +0100, Gerald Pfeifer wrote:
>> Yes, thank you! Two minor suggestions/questions below:
>>
>>> --- a/htdocs/gcc-13/changes.html
>>> +++ b/htdocs/gcc-13/changes.html
>>> +  -fexcess-precision=fast.  The option affects mainly
>>
>> Here I'd say "mainly affects".
>>
>>> +  IA-32/x86-64 where when defaulting to x87 math and in some cases on
>>> +  Motorola 68000 float and double expressions
>>> +  are evaluated in long double precision and S/390, 
>>> System z,
>>> +  IBM z Systems where float expressions are evaluated in
>>> +  double precision.
>>
>> The "where when" part proved a bit tricky for my brain. :-) 
>>
>> I think it is precise, but am wondering whether
>>
>>   ...IA-32/x64 using x87 math and in some cases on Motorola 68000, where
>>   float and double expressions are evaluated...
>>
>> might work? What do you think?
> 
> Thanks, committed with those tweaks.
> 
> Martin would like to see some note in porting_to.html for it too,
> here is my attempt to do so:

I like the suggested wording. Thanks for it.

Martin

> 
> diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
> index 9a9a3147..5cbeefb6 100644
> --- a/htdocs/gcc-13/porting_to.html
> +++ b/htdocs/gcc-13/porting_to.html
> @@ -120,5 +120,29 @@ the operand as an lvalue.
> }
>  
>  
> +Excess precision changes
> +GCC 13 implements in C++ excess precision 
> support
> +which has been implemented just in the C front-end before.  The new behavior 
> is
> +enabled by default in -std=c++NN modes and when
> +FLT_EVAL_METHOD is 1 or 2 affects behavior of floating point
> +constants and expressions.  E.g. for FLT_EVAL_METHOD equal
> +to 2 on ia32:
> +
> +
> +#include 
> +void foo (void) { if (1.1f + 3.3f != 1.1L + 3.3L) abort (); }
> +void bar (void) { double d = 4.2; if (d == 4.2) abort (); }
> +
> +
> +will not abort with standard excess precision, because constants and 
> expressions
> +in float or double are evaluated in precision of
> +long double and demoted only on casts or assignments, but will
> +abort with fast excess precision, where whether something is evaluated in
> +precision of long double or not depends on what evaluations are
> +done in the i387 floating point stack or are spilled from it.
> +
> +The -fexcess-precision=fast option can be used to request the
> +previous behavior.
> +
>  
>  
> 
> 
>   Jakub
> 



[PATCH] wwwdocs: Document several further C++23 changes

Hi!

Tobias mentioned on IRC that assume attribute wasn't mentioned in
changes.html.  The P1774R8 entry was missing for C++, so I went through
projects/cxx-status.html#cxx23 and filled in all the missing papers
which have been implemented newly in GCC 13, plus a small note for C family
about assume attribute.

Ok for wwwdocs?

diff --git a/htdocs/gcc-13/changes.html b/htdocs/gcc-13/changes.html
index 410594ae..839b73d0 100644
--- a/htdocs/gcc-13/changes.html
+++ b/htdocs/gcc-13/changes.html
@@ -182,6 +182,10 @@ a work-in-progress.
 https://gcc.gnu.org/onlinedocs/gcc/Static-Analyzer-Options.html";>-fanalyzer
 to detect misuses of file descriptors.
   
+  A new statement attribute for C++23 https://wg21.link/p1774r8";>P1774R8 Portable
+  assumptions support also in C or older C++:
+https://gcc.gnu.org/onlinedocs/gcc/Statement-Attributes.html#index-assume-statement-attribute";>__attribute__((assume(EXPR)));
+  
 
 
 C
@@ -290,6 +294,29 @@ a work-in-progress.
Operator You Are Looking For
(https://gcc.gnu.org/PR106644";>PR106644)
   
+   https://wg21.link/p2362r3";>P2362R3, Remove 
non-encodable
+  wide character literals and multicharacter wide character literals
+   (https://gcc.gnu.org/PR106647";>PR106647)
+  
+   https://wg21.link/p2448r2";>P2448R2, Relaxing some
+  constexpr restrictions
+   (https://gcc.gnu.org/PR106649";>PR106649)
+  
+   https://wg21.link/p1467r9";>P1467R9, Extended
+  floating-point types and standard names
+   (https://gcc.gnu.org/PR106652";>PR106652)
+  
+   https://wg21.link/p1774r8";>P1774R8, Portable
+  assumptions
+   (https://gcc.gnu.org/PR106654";>PR106654)
+  
+   https://wg21.link/p2295r6";>P2295R6, Support for
+  UTF-8 as a portable source file encoding
+   (https://gcc.gnu.org/PR106655";>PR106655)
+  
+   https://wg21.link/p2589r1";>P2589R1, static operator[]
+   (https://gcc.gnu.org/PR107684";>PR107684)
+  
 
   
   New warnings:

Jakub



[PATCH] c++, v3: Emit fundamental tinfos for _Float16/decltype(0.0bf16) types on ia32 with -mno-sse2 [PR108883]

Hi!

On Wed, Mar 01, 2023 at 05:50:47PM -0500, Jason Merrill wrote:
> > And then there is a question whether we want to emit rtti for
> > _Float{16,32,64,128}, _Float{32,64,128}x and decltype(0.0bf16) regardless
> > of whether the target supports them at all or not.
> > Emitting them always would have an advantage, if say bfloat16_t support
> > isn't added for aarch64 for GCC 13 (it is still pending review), we wouldn't
> > need to deal with symbol versioning for it in GCC 14 or later.
> > On the other side, on some arches some types are very unlikely to be
> > supported.  And e.g. _Float128x isn't supported on any arch right now.
> 
> A good point.  Incidentally, it seems problematic for embedded users that
> all the fundamental type_infos are emitted in the same .o, making it hard to
> link in only the ones you care about.  And new floating-point variants add
> to that problem.  So perhaps until that is addressed, it's better to avoid
> adding a bunch more on targets that don't support them.

Ok, so here is a variant of the patch which still drops the fallback_* stuff,
but for float*_type_node doesn't do the automatic fallback in generic code
and leaves those to a target hook.

So far lightly tested on x86_64-linux -m32/-m64:

2023-03-02  Jakub Jelinek  

PR target/108883
gcc/
* target.h (emit_support_tinfos_callback): New typedef.
* targhooks.h (default_emit_support_tinfos): Declare.
* targhooks.cc (default_emit_support_tinfos): New function.
* target.def (emit_support_tinfos): New target hook.
* doc/tm.texi.in (emit_support_tinfos): Document it.
* doc/tm.texi: Regenerated.
* config/i386/i386.cc (ix86_emit_support_tinfos): New function.
(TARGET_EMIT_SUPPORT_TINFOS): Redefine.
gcc/cp/
* cp-tree.h (enum cp_tree_index): Remove CPTI_FALLBACK_DFLOAT*_TYPE
enumerators.
(fallback_dfloat32_type, fallback_dfloat64_type,
fallback_dfloat128_type): Remove.
* rtti.cc (emit_support_tinfo_1): If not emitted already, call
emit_tinfo_decl and remove from unemitted_tinfo_decls right away.
(emit_support_tinfos): Move &dfloat*_type_node from fundamentals array
into new fundamentals_with_fallback array.  Call emit_support_tinfo_1
on elements of that array too, with the difference that if
the type is NULL, use a fallback REAL_TYPE for it temporarily.
Drop the !targetm.decimal_float_supported_p () handling.  Call
targetm.emit_support_tinfos at the end.
* mangle.cc (write_builtin_type): Remove references to
fallback_dfloat*_type.  Handle bfloat16_type_node mangling.

--- gcc/target.h.jj 2023-02-17 12:45:08.056638510 +0100
+++ gcc/target.h2023-03-02 12:06:59.248146213 +0100
@@ -260,6 +260,8 @@ enum poly_value_estimate_kind
   POLY_VALUE_LIKELY
 };
 
+typedef void (*emit_support_tinfos_callback) (tree);
+
 extern bool verify_type_context (location_t, type_context_kind, const_tree,
 bool = false);
 
--- gcc/targhooks.h.jj  2023-01-02 09:32:50.422880177 +0100
+++ gcc/targhooks.h 2023-03-02 12:06:22.559686384 +0100
@@ -98,6 +98,8 @@ extern int default_builtin_vectorization
 
 extern tree default_builtin_reciprocal (tree);
 
+extern void default_emit_support_tinfos (emit_support_tinfos_callback);
+
 extern HOST_WIDE_INT default_static_rtx_alignment (machine_mode);
 extern HOST_WIDE_INT default_constant_alignment (const_tree, HOST_WIDE_INT);
 extern HOST_WIDE_INT constant_alignment_word_strings (const_tree,
--- gcc/targhooks.cc.jj 2023-01-02 09:32:52.591848839 +0100
+++ gcc/targhooks.cc2023-03-02 12:01:39.576868114 +0100
@@ -752,6 +752,11 @@ default_builtin_reciprocal (tree)
   return NULL_TREE;
 }
 
+void
+default_emit_support_tinfos (emit_support_tinfos_callback)
+{
+}
+
 bool
 hook_bool_CUMULATIVE_ARGS_arg_info_false (cumulative_args_t,
  const function_arg_info &)
--- gcc/target.def.jj   2023-02-22 15:58:50.252996452 +0100
+++ gcc/target.def  2023-03-02 12:01:52.002684436 +0100
@@ -2606,6 +2606,19 @@ types.",
  const char *, (const_tree type),
  hook_constcharptr_const_tree_null)
 
+/* Temporarily add conditional target specific types for the purpose of
+   emitting C++ fundamental type tinfos.  */
+DEFHOOK
+(emit_support_tinfos,
+ "If your target defines any fundamental types which depend on ISA flags,\n\
+they might need C++ tinfo symbols in libsupc++/libstdc++ regardless of\n\
+ISA flags the library is compiled with.\n\
+This hook allows creating tinfo symbols even for those cases, by temporarily\n\
+creating corresponding fundamental type trees, calling the @var{callback}\n\
+function on it and setting the type back to @code{nullptr}.",
+ void, (emit_support_tinfos_callback callback),
+ default_emit_support_tinfos)
+
 /* Make any adjustments to libfunc names needed for this target.  */
 DEFHOOK
 (init_libfuncs,
--- gcc/doc/tm.texi.in.jj   2023-02-

Re: [Patch] gcc.dg/memchr-3.c: fix for LLP64

Jonathan Yong <10wa...@gmail.com> writes:
> On 2/27/23 16:55, Richard Sandiford wrote:
>> Jonathan Yong via Gcc-patches  writes:
>>> Attached patch OK?
>>>
>>>   gcc.dg/memchr-3.c: fix for LLP64
>>>
>>>   gcc/testsuite/ChangeLog:
>>>
>>>   PR middle-end/97956
>>>   * gcc.dg/memchr-3.c (memchr): fix long to size_t in
>>>   prototype.
>>>
>>>  From 194eb3d43964276beeaea14ebee4b241799cd966 Mon Sep 17 00:00:00 2001
>>> From: Jonathan Yong <10wa...@gmail.com>
>>> Date: Mon, 27 Feb 2023 10:02:32 +
>>> Subject: [PATCH] gcc.dg/memchr-3.c: fix for LLP64
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> PR middle-end/97956
>>> * gcc.dg/memchr-3.c (memchr): fix long to size_t in
>>> prototype.
>> 
>> It looks like the current type signature could have been a deliberate
>> part of the test.  I think we should just skip it for LLP64 instead.
>> Preapproved if you agree.
>> 
>> Thanks,
>> Richard
>> 
>
> Revised, account for the warning in LLP64.
>
>
> From 2dbfa538fe11c65914b28f94d066daee789f881a Mon Sep 17 00:00:00 2001
> From: Jonathan Yong <10wa...@gmail.com>
> Date: Mon, 27 Feb 2023 10:02:32 +
> Subject: [PATCH 6/7] gcc.dg/memchr-3.c: Account for LLP64 warnings
>
>   gcc/testsuite/ChangeLog:
>
>   PR middle-end/97956
>   * gcc.dg/memchr-3.c (memchr): Account for LLP64 warnings.
>
> Signed-off-by: Jonathan Yong <10wa...@gmail.com>
> ---
>  gcc/testsuite/gcc.dg/memchr-3.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.dg/memchr-3.c b/gcc/testsuite/gcc.dg/memchr-3.c
> index c38d9cf3349..af1b26ef3ae 100644
> --- a/gcc/testsuite/gcc.dg/memchr-3.c
> +++ b/gcc/testsuite/gcc.dg/memchr-3.c
> @@ -6,7 +6,7 @@
>  typedef __INT8_TYPE__  int8_t;
>  typedef __INT32_TYPE__ int32_t;
>  
> -extern void* memchr (const void*, int, long);
> +extern void* memchr (const void*, int, long); /* { dg-warning 
> "-Wbuiltin-declaration-mismatch" { target llp64 } } */
>  
>  struct SX
>  {

OK, thanks.

Richard


Re: [PATCH 1/2] gcov: Fix "do-while" structure in case statement leads to incorrect code coverage [PR93680]

On Thu, Mar 2, 2023 at 11:22 AM Xionghu Luo  wrote:
>
>
>
> On 2023/3/2 16:41, Richard Biener wrote:
> > On Thu, Mar 2, 2023 at 3:31 AM Xionghu Luo via Gcc-patches
> >  wrote:
> >>
> >> When spliting edge with self loop, the split edge should be placed just 
> >> next to
> >> the edge_in->src, otherwise it may generate different position latch bbs 
> >> for
> >> two consecutive self loops.  For details, please refer to:
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93680#c4
> >>
> >> Regression tested pass on x86_64-linux-gnu and aarch64-linux-gnu, OK for
> >> master?
> >>
> >> gcc/ChangeLog:
> >>
> >>  PR gcov/93680
> >>  * tree-cfg.cc (split_edge_bb_loc): Return edge_in->src for self 
> >> loop.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  PR gcov/93680
> >>  * gcc.misc-tests/gcov-pr93680.c: New test.
> >>
> >> Signed-off-by: Xionghu Luo 
> >> ---
> >>   gcc/testsuite/gcc.misc-tests/gcov-pr93680.c | 24 +
> >>   gcc/tree-cfg.cc |  2 +-
> >>   2 files changed, 25 insertions(+), 1 deletion(-)
> >>   create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
> >>
> >> diff --git a/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c 
> >> b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
> >> new file mode 100644
> >> index 000..b2bf9e626fc
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
> >> @@ -0,0 +1,24 @@
> >> +/* { dg-options "-fprofile-arcs -ftest-coverage" } */
> >> +/* { dg-do run { target native } } */
> >> +
> >> +int f(int s, int n)
> >> +{
> >> +  int p = 0;
> >> +
> >> +  switch (s)
> >> +  {
> >> +case 0: /* count(5) */
> >> +  do { p++; } while (--n); /* count(5) */
> >> +  return p; /* count(1) */
> >> +
> >> +case 1: /* count(5) */
> >> +  do { p++; } while (--n); /* count(5) */
> >> +  return p; /* count(1) */
> >> +  }
> >> +
> >> +  return 0;
> >> +}
> >> +
> >> +int main() { f(0, 5); f(1, 5); return 0; }
> >> +
> >> +/* { dg-final { run-gcov gcov-pr93680.c } } */
> >> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> >> index a9fcc7fd050..6fa1d83d366 100644
> >> --- a/gcc/tree-cfg.cc
> >> +++ b/gcc/tree-cfg.cc
> >> @@ -3009,7 +3009,7 @@ split_edge_bb_loc (edge edge_in)
> >> if (dest_prev)
> >>   {
> >> edge e = find_edge (dest_prev, dest);
> >> -  if (e && !(e->flags & EDGE_COMPLEX))
> >> +  if ((e && !(e->flags & EDGE_COMPLEX)) || edge_in->src == 
> >> edge_in->dest)
> >
> > I think this should eventually apply to all backedge edge_in, correct?
> >   But of course
> > we cannot easily test for this here.
> >
> > Still since this affects ordering in the {next,prev}_bb chain only but not 
> > CFG
> > semantics I wonder how it can affect coverage?  Isn't it only by chance that
> > this block order survives?
>
> For case:
>
> 1 int f(int s, int n)
> 2 {
> 3  int p = 0;
> 4  int q = 0;
> 5
> 6  switch (s)
> 7{
> 8case 0:
> 9  do { p++; } while (--n);
> 10  return p;
> 11
> 12case 1:
> 13  do { p++; } while (--n);
> 14  return p;
> 15}
> 16
> 17  return 0;
> 18 }
> 19
> 20 int main() { f(0, 5); f(1, 5);}
>
>
> current GCC generates:
>
>  :
> ...
>
>:<= first loop
> ...
>  goto ; [INV]
>else
>  goto ; [INV]
>
> :   <= first latch bb
>goto ; [100.00%]
>
> :
> ...
>goto ; [INV]
>
> :   <= second latch bb
>
> :<= second loop
> ...
>  goto ; [INV]
>else
>  goto ; [INV]
>
>
>  and  are created by split_edge->split_edge_bb_loc, 
> is located after the loop, but  is located before the loop.
>
> First call of split_edge_bb_loc, the dest_prev is , and find_edge
> did find a edge from  to , the returned afte_bb is , so
> latch  is put after the loop
>
> but second call of split_edge_bb_loc, the dest_prev is , so find_edge
> return 0, and the returned after_bb is , then the created latch 
> is put before the loop...
>
> Different latch bb position caused different gcno, while gcov has poor
> information and not that smart to recognize it:(, is it reasonable to keep
> this kind of loops same order?
>
>
>   small.gcno:  648:  block 2:`small.c':1, 3, 4, 6
>   small.gcno:  688:0145:  36:LINES
>   small.gcno:  700:  block 3:`small.c':8, 9
>   small.gcno:  732:0145:  32:LINES
>   small.gcno:  744:  block 5:`small.c':10
> -small.gcno:  772:0145:  32:LINES
> -small.gcno:  784:  block 6:`small.c':12
> -small.gcno:  812:0145:  36:LINES
> -small.gcno:  824:  block 7:`small.c':12, 13
> +small.gcno:  772:0145:  36:LINES
> +small.gcno:  784:  block 6:`small.c':12, 13
> +small.gcno:  816:0145:  32:LINES
> +small.gcno:  828:  block 8:`small.c':14
>   small.gcno:  856:0145:  32:LINES
> -small.gcno:  868:  block 8:`small.c':14
> -small.gcno:  896:   

Re: [Patch] gcc.dg/overflow-warn-9.c: exclude from LLP64

Jonathan Yong via Gcc-patches  writes:
> On 2/28/23 03:06, Hans-Peter Nilsson wrote:
>> 
>> On Mon, 27 Feb 2023, Jonathan Yong via Gcc-patches wrote:
>> 
>>> This test is for LP64 only, exclude LLP64 too.
>>> Patch OK?
>> 
>> I may be confused, but you're not making use of the "llp64"
>> effective target, there instead excluding/including lp64 /
>> ilp32 in sets that not obviously mean "exclude LLP64".
>> 
>> To wit, how is "! ilp32" -> "lp64" and "ilp32" -> "! lp64"
>> expressing "! llp64"?
>> 
>> brgds, H-P
>
> Attached new version, hopefully it is clearer.
>
> From 91102d00dc701a65dfac5820a2bc57e1e4bed5b2 Mon Sep 17 00:00:00 2001
> From: Jonathan Yong <10wa...@gmail.com>
> Date: Mon, 27 Feb 2023 09:49:31 +
> Subject: [PATCH 5/7] gcc.dg/overflow-warn-9.c: Fix LLP64
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.dg/overflow-warn-9.c: Add LLP64 case.
>
> Signed-off-by: Jonathan Yong <10wa...@gmail.com>
> ---
>  gcc/testsuite/gcc.dg/overflow-warn-9.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/gcc/testsuite/gcc.dg/overflow-warn-9.c 
> b/gcc/testsuite/gcc.dg/overflow-warn-9.c
> index 57c0f17bc91..ae588bd8491 100644
> --- a/gcc/testsuite/gcc.dg/overflow-warn-9.c
> +++ b/gcc/testsuite/gcc.dg/overflow-warn-9.c
> @@ -59,7 +59,8 @@ const struct Types t1 = {
>.ui = UINT_MAX + 1L,  /* { dg-warning "signed conversion from .long 
> int. to .unsigned int. changes value from .4294967296. to .0." "lp64" { 
> target lp64 } } */
>.ui = UINT_MAX + 1LU, /* { dg-warning "conversion from .long unsigned 
> int. to .unsigned int. changes value from .4294967296. to .0." "lp64" { 
> target lp64 } } */
>  
> -  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long 
> unsigned int. to .long int. changes value from .9223372036854775808. to 
> .-9223372036854775808." "not-ilp32" { target { ! ilp32 } } } */
> +  .sl = LONG_MAX + 1LU, /* { dg-warning "signed conversion from .long 
> unsigned int. to .long int. changes value from .9223372036854775808. to 
> .-9223372036854775808." "lp64" { target lp64 } } */
>/* { dg-warning "signed conversion from .long unsigned int. to .long int. 
> changes value from .2147483648. to .-2147483648." "ilp32" { target ilp32 } 
> .-1 } */
> +  /* { dg-warning "signed conversion from .long unsigned int. to .long int. 
> changes value from .2147483648. to .-2147483648." "llp64" { target llp64 } 
> .-2 } */
>.ul = ULONG_MAX + 1LU /* there should be some warning here */
>  };

OK, although in general I think would be good to use
{ target { ilp32 || llp64 } } for this kind of thing.

No need to change this patch though, just saying for the future.

Thanks,
Richard



Re: [PATCH] libstdc++: Limit allocations in _Rb_tree 2/2

On Thu, 2 Mar 2023 at 05:40, François Dumont via Libstdc++
 wrote:
>
> Just forget about this patch, bad idea.
>
> The key_type might have additional data not used for the comparison.
> This data would not be preserved if we were inserting the already stored
> equivalent key instead of the user provided.

Right. Key equivalence does not imply substitutability, or even equality.

struct Key {
  int i = 0;
  int j = 0;
  bool operator<(const Key& k) const { return i < k.j; }
  bool operator==(const Key& k) const { return i == k.i && j == k.j; }
};


Re: [PATCH] MIPS: Bugfix for fix Dejagnu issues with RTL checking enabled.

"Xin Liu"  writes:
> From: Robert Suchanek 
>
> gcc/ChangeLog:
>
>* config/mips/mips.cc (mips_set_text_contents_type): Modified parameter 
>* config/mips/mips-protos.h (mips_set_text_contents_type): Likewise
>
> Signed-off-by: Xin Liu 

Thanks, pushed to trunk.  I guess this is a regression from GCC 6.

Richard

>
> ---
>  gcc/config/mips/mips-protos.h | 2 +-
>  gcc/config/mips/mips.c| 4 ++--
>  2 files changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
> index 75432677da2..fae71fe776c 100644
> --- a/gcc/config/mips/mips-protos.h
> +++ b/gcc/config/mips/mips-protos.h
> @@ -272,7 +272,7 @@ extern void mips_declare_object (FILE *, const char *, 
> const char *,
>  extern void mips_declare_object_name (FILE *, const char *, tree);
>  extern void mips_finish_declare_object (FILE *, tree, int, int);
>  extern void mips_set_text_contents_type (FILE *, const char *,
> -  unsigned long, bool);
> +  unsigned HOST_WIDE_INT, bool);
>  
>  extern bool mips_small_data_pattern_p (rtx);
>  extern rtx mips_rewrite_small_data (rtx);
> diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
> index fb903a2a630..2d87d4f3627 100644
> --- a/gcc/config/mips/mips.cc
> +++ b/gcc/config/mips/mips.cc
> @@ -11090,7 +11090,7 @@ mips_finish_declare_object (FILE *stream, tree decl, 
> int top_level, int at_end)
>  void
>  mips_set_text_contents_type (FILE *file ATTRIBUTE_UNUSED,
>const char *prefix ATTRIBUTE_UNUSED,
> -  unsigned long num ATTRIBUTE_UNUSED,
> +  unsigned HOST_WIDE_INT num ATTRIBUTE_UNUSED,
>bool function_p ATTRIBUTE_UNUSED)
>  {
>  #ifdef ASM_OUTPUT_TYPE_DIRECTIVE
> @@ -11099,7 +11099,7 @@ mips_set_text_contents_type (FILE *file 
> ATTRIBUTE_UNUSED,
>char *sname;
>rtx symbol;
>  
> -  sprintf (buf, "%lu", num);
> +  sprintf (buf, HOST_WIDE_INT_PRINT_UNSIGNED, num);
>symbol = XEXP (DECL_RTL (current_function_decl), 0);
>fnname = targetm.strip_name_encoding (XSTR (symbol, 0));
>sname = ACONCAT ((prefix, fnname, "_", buf, NULL));


Re: [PATCH v2] MIPS: Add buildtime option to set msa default

"Junxian Zhu"  writes:
> From: Junxian Zhu 
>
> Add buildtime option to decide whether will compiler build with `-mmsa` 
> option default.
>
> gcc/ChangeLog:
>   * config.gcc: add -with-{no-}msa build option.
>   * config/mips/mips.h: Likewise.
>   * doc/install.texi: Likewise.

Thanks, pushed to trunk.  I think it's equivalent to adding an extra
mips*-*-* stanza and so suitable for stage 4.

Richard

>
> Signed-off-by: Junxian Zhu 
> ---
>  gcc/config.gcc | 19 +--
>  gcc/config/mips/mips.h |  3 ++-
>  gcc/doc/install.texi   |  8 
>  3 files changed, 27 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config.gcc b/gcc/config.gcc
> index c070e6ecd2e..da3a6d3ba1f 100644
> --- a/gcc/config.gcc
> +++ b/gcc/config.gcc
> @@ -4709,7 +4709,7 @@ case "${target}" in
>   ;;
>  
>   mips*-*-*)
> - supported_defaults="abi arch arch_32 arch_64 float fpu nan 
> fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
> madd4 compact-branches"
> + supported_defaults="abi arch arch_32 arch_64 float fpu nan 
> fp_32 odd_spreg_32 tune tune_32 tune_64 divide llsc mips-plt synci lxc1-sxc1 
> madd4 compact-branches msa"
>  
>   case ${with_float} in
>   "" | soft | hard)
> @@ -4871,6 +4871,21 @@ case "${target}" in
>   exit 1
>   ;;
>   esac
> +
> + case ${with_msa} in
> + yes)
> + with_msa=msa
> + ;;
> + no)
> + with_msa=no-msa
> + ;;
> + "")
> + ;;
> + *)
> + echo "Unknown msa type used in --with-msa" 1>&2
> + exit 1
> + ;;
> + esac
>   ;;
>  
>   loongarch*-*-*)
> @@ -5815,7 +5830,7 @@ case ${target} in
>  esac
>  
>  t=
> -all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 
> tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt 
> synci tls lxc1-sxc1 madd4 isa_spec compact-branches"
> +all_defaults="abi cpu cpu_32 cpu_64 arch arch_32 arch_64 tune tune_32 
> tune_64 schedule float mode fpu nan fp_32 odd_spreg_32 divide llsc mips-plt 
> synci tls lxc1-sxc1 madd4 isa_spec compact-branches msa"
>  for option in $all_defaults
>  do
>   eval "val=\$with_"`echo $option | sed s/-/_/g`
> diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
> index fbb4372864f..13bc193b752 100644
> --- a/gcc/config/mips/mips.h
> +++ b/gcc/config/mips/mips.h
> @@ -916,7 +916,8 @@ struct mips_cpu_info {
>{"synci", "%{!msynci:%{!mno-synci:-m%(VALUE)}}" }, \
>{"lxc1-sxc1", "%{!mlxc1-sxc1:%{!mno-lxc1-sxc1:-m%(VALUE)}}" }, \
>{"madd4", "%{!mmadd4:%{!mno-madd4:-m%(VALUE)}}" }, \
> -  {"compact-branches", "%{!mcompact-branches=*:-mcompact-branches=%(VALUE)}" 
> } \
> +  {"compact-branches", "%{!mcompact-branches=*:-mcompact-branches=%(VALUE)}" 
> }, \
> +  {"msa", "%{!mmsa:%{!mno-msa:-m%(VALUE)}}" } \
>  
>  /* A spec that infers the:
> -mnan=2008 setting from a -mips argument,
> diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
> index 8ef5c1414da..718f48fbaeb 100644
> --- a/gcc/doc/install.texi
> +++ b/gcc/doc/install.texi
> @@ -1653,6 +1653,14 @@ unfused is normally expected).  Disabling these 
> instructions is the
>  only way to ensure compatible code is generated; this will incur
>  a performance penalty.
>  
> +@item --with-msa
> +On MIPS targets, make @option{-mmsa} the default when no
> +@option{-mno-msa} option is passed.
> +
> +@item --without-msa
> +On MIPS targets, make @option{-mno-msa} the default when no
> +@option{-mmsa} option is passed. This is the default.
> +
>  @item --with-mips-plt
>  On MIPS targets, make use of copy relocations and PLTs.
>  These features are extensions to the traditional


[wwwdocs] gcc-13/porting_to.html: Document C++ -fexcess-precision=standard

Hi!

On Fri, Feb 10, 2023 at 10:06:03AM +0100, Gerald Pfeifer wrote:
> Yes, thank you! Two minor suggestions/questions below:
> 
> > --- a/htdocs/gcc-13/changes.html
> > +++ b/htdocs/gcc-13/changes.html
> > +  -fexcess-precision=fast.  The option affects mainly
> 
> Here I'd say "mainly affects".
> 
> > +  IA-32/x86-64 where when defaulting to x87 math and in some cases on
> > +  Motorola 68000 float and double expressions
> > +  are evaluated in long double precision and S/390, 
> > System z,
> > +  IBM z Systems where float expressions are evaluated in
> > +  double precision.
> 
> The "where when" part proved a bit tricky for my brain. :-) 
> 
> I think it is precise, but am wondering whether
> 
>   ...IA-32/x64 using x87 math and in some cases on Motorola 68000, where
>   float and double expressions are evaluated...
> 
> might work? What do you think?

Thanks, committed with those tweaks.

Martin would like to see some note in porting_to.html for it too,
here is my attempt to do so:

diff --git a/htdocs/gcc-13/porting_to.html b/htdocs/gcc-13/porting_to.html
index 9a9a3147..5cbeefb6 100644
--- a/htdocs/gcc-13/porting_to.html
+++ b/htdocs/gcc-13/porting_to.html
@@ -120,5 +120,29 @@ the operand as an lvalue.
}
 
 
+Excess precision changes
+GCC 13 implements in C++ excess precision 
support
+which has been implemented just in the C front-end before.  The new behavior is
+enabled by default in -std=c++NN modes and when
+FLT_EVAL_METHOD is 1 or 2 affects behavior of floating point
+constants and expressions.  E.g. for FLT_EVAL_METHOD equal
+to 2 on ia32:
+
+
+#include 
+void foo (void) { if (1.1f + 3.3f != 1.1L + 3.3L) abort (); }
+void bar (void) { double d = 4.2; if (d == 4.2) abort (); }
+
+
+will not abort with standard excess precision, because constants and 
expressions
+in float or double are evaluated in precision of
+long double and demoted only on casts or assignments, but will
+abort with fast excess precision, where whether something is evaluated in
+precision of long double or not depends on what evaluations are
+done in the i387 floating point stack or are spilled from it.
+
+The -fexcess-precision=fast option can be used to request the
+previous behavior.
+
 
 


Jakub



Re: [PATCH] Avoid creating (const (reg ...)) [PR108603]

On Thu, Mar 2, 2023 at 11:21 AM Richard Sandiford via Gcc-patches
 wrote:
>
> convert_memory_address_addr_space_1 has two modes: one in which it
> tries to create a self-contained RTL expression (which might fail)
> and one in which it can emit new instructions where necessary.
>
> When handling a CONST, the function recurses into the CONST's
> operand and then constifies the result.  But that's only valid if
> the result is still a self-contained expression.  If new instructions
> have been emitted, the expression will refer to the (non-constant)
> results of those instructions.
>
> In the PR, this caused us to emit a nonsensical (const (reg ...))
> REG_EQUAL note.
>
> Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

OK.

Thanks,
Richard.

> Richard
>
>
> gcc/
> PR tree-optimization/108603
> * explow.cc (convert_memory_address_addr_space_1): Only wrap
> the result of a recursive call in a CONST if no instructions
> were emitted.
>
> gcc/testsuite/
> PR tree-optimization/108603
> * gcc.target/aarch64/sve/pr108603.c: New test.
> ---
>  gcc/explow.cc   | 11 ---
>  gcc/testsuite/gcc.target/aarch64/sve/pr108603.c |  8 
>  2 files changed, 16 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr108603.c
>
> diff --git a/gcc/explow.cc b/gcc/explow.cc
> index 83439b32abe..32e9498ee07 100644
> --- a/gcc/explow.cc
> +++ b/gcc/explow.cc
> @@ -349,9 +349,14 @@ convert_memory_address_addr_space_1 (scalar_int_mode 
> to_mode ATTRIBUTE_UNUSED,
>return temp;
>
>  case CONST:
> -  temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0), as,
> - true, no_emit);
> -  return temp ? gen_rtx_CONST (to_mode, temp) : temp;
> +  {
> +   auto *last = no_emit ? nullptr : get_last_insn ();
> +   temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0), as,
> +   true, no_emit);
> +   if (temp && (no_emit || last == get_last_insn ()))
> + return gen_rtx_CONST (to_mode, temp);
> +   return temp;
> +  }
>
>  case PLUS:
>  case MULT:
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c
> new file mode 100644
> index 000..a2aea9f0b12
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2 -mabi=ilp32 -fdata-sections" } */
> +
> +int a[128];
> +long long *p;
> +void f() {
> +  for (long i = 0; i < sizeof(long); i++)
> +p[i] = a[i];
> +}
> --
> 2.25.1
>


Re: [PATCH 0/8] aarch64: testsuite: Fix test failures with --enable-default-pie or --enable-default-ssp

Xi Ruoyao  writes:
> Hi,
>
> This patch series fixes a lot of test failures with --enable-default-pie
> or --enable-default-ssp for AArch64 target.  Only test files are changed
> to disable PIE or SSP to satisify the expectation of the developer who
> programmed the test.
>
> Bootstrapped and regtested on aarch64-linux-gnu.  Ok for trunk?

OK for the series.  Thanks for doing this!

Richard

> Xi Ruoyao (8):
>   aarch64: testsuite: disable PIE for aapcs64 tests [PR70150]
>   aarch64: testsuite: disable PIE for tests with large code model
> [PR70150]
>   aarch64: testsuite: disable PIE for fuse_adrp_add_1.c [PR70150]
>   aarch64: testsuite: disable stack protector for sve-pcs tests
>   aarch64: testsuite: disable stack protector for pr103147-10 tests
>   aarch64: testsuite: disable stack protector for auto-init-7.c
>   aarch64: testsuite: disable stack protector for pr104005.c
>   aarch64: testsuite: disable stack protector for tests relying on stack
> offset
>
>  gcc/testsuite/g++.target/aarch64/pr103147-10.C | 2 +-
>  gcc/testsuite/gcc.dg/tls/pr78796.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/aapcs64/aapcs64.exp   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/auto-init-7.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/fuse_adrp_add_1.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr103147-10.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr104005.c| 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr63304_1.c   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr70120-2.c   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr78733.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr79041-2.c   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr94530.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/pr94577.c | 2 +-
>  gcc/testsuite/gcc.target/aarch64/reload-valid-spoff.c  | 2 +-
>  gcc/testsuite/gcc.target/aarch64/shrink_wrap_1.c   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/stack-check-cfa-1.c   | 2 +-
>  gcc/testsuite/gcc.target/aarch64/stack-check-cfa-2.c   | 2 +-
>  .../gcc.target/aarch64/sve/pcs/aarch64-sve-pcs.exp | 7 ---
>  gcc/testsuite/gcc.target/aarch64/test_frame_17.c   | 2 +-
>  19 files changed, 22 insertions(+), 21 deletions(-)


Re: [PATCH] vect: Fix voluntarily-masked negative conditionals [PR108430]

On Thu, Mar 2, 2023 at 11:19 AM Richard Sandiford via Gcc-patches
 wrote:
>
> vectorizable_condition checks whether a COND_EXPR condition is used
> elsewhere with a loop mask.  If so, it applies the loop mask to the
> COND_EXPR too, to reduce the number of live masks and to increase the
> chance of combining the AND with the comparison.
>
> There is also code to do this for inverted conditions.  E.g. if
> we have a < b ? c : d and something else is conditional on !(a < b)
> (such as a load in d), we use !(a < b) ? d : c and apply the loop
> mask to !(a < b).
>
> This inversion relied on the function's bitop1/bitop2 mechanism.
> However, that mechanism is skipped if the condition is split out of
> the COND_EXPR as a separate statement.  This meant that we could end
> up using the inverse of the intended condition.
>
> There is a separate way of negating the condition when a mask
> is being applied (which is also used for EXTRACT_LAST reductions).
> This patch uses that instead.
>
> As well as the testcase, this fixes aarch64/sve/vcond_{4,17}_run.c.
>
> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

OK.

> Richard
>
>
> gcc/
> PR tree-optimization/108430
> * tree-vect-stmts.cc (vectorizable_condition): Fix handling
> of inverted condition.
>
> gcc/testsuite/
> PR tree-optimization/108430
> * gcc.target/aarch64/sve/pr108430.c: New test.
> ---
>  .../gcc.target/aarch64/sve/pr108430.c | 21 +++
>  gcc/tree-vect-stmts.cc|  3 +--
>  2 files changed, 22 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr108430.c
>
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c
> new file mode 100644
> index 000..e7ce0f6d793
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c
> @@ -0,0 +1,21 @@
> +/* { dg-do run { target aarch64_sve512_hw } } */
> +/* { dg-options "-O3 -msve-vector-bits=512" } */
> +
> +long d = 1;
> +static int i = 37;
> +static unsigned long a[22];
> +static unsigned short c[22];
> +static unsigned g[80];
> +static unsigned short *h = c;
> +static unsigned long *j = a;
> +int main() {
> +  for (long m = 0; m < 8; ++m)
> +d = 1;
> +  for (unsigned char p = 0; p < 17; p += 2)
> +  {
> +long t = h[p] ? i : j[p];
> +g[p] = t;
> +  }
> +  if (g[0])
> +__builtin_abort ();
> +}
> diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
> index 9e5ffbe252e..77ad8b78506 100644
> --- a/gcc/tree-vect-stmts.cc
> +++ b/gcc/tree-vect-stmts.cc
> @@ -10756,11 +10756,10 @@ vectorizable_condition (vec_info *vinfo,
>   cond.code = orig_code;
>   if (loop_vinfo->scalar_cond_masked_set.contains (cond))
> {
> - bitop1 = orig_code;
> - bitop2 = BIT_NOT_EXPR;
>   masks = &LOOP_VINFO_MASKS (loop_vinfo);
>   cond_code = cond.code;
>   swap_cond_operands = true;
> + must_invert_cmp_result = true;
> }
> }
> }
> --
> 2.25.1
>


Re: [PATCH 1/2] gcov: Fix "do-while" structure in case statement leads to incorrect code coverage [PR93680]





On 2023/3/2 16:41, Richard Biener wrote:

On Thu, Mar 2, 2023 at 3:31 AM Xionghu Luo via Gcc-patches
 wrote:


When spliting edge with self loop, the split edge should be placed just next to
the edge_in->src, otherwise it may generate different position latch bbs for
two consecutive self loops.  For details, please refer to:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93680#c4

Regression tested pass on x86_64-linux-gnu and aarch64-linux-gnu, OK for
master?

gcc/ChangeLog:

 PR gcov/93680
 * tree-cfg.cc (split_edge_bb_loc): Return edge_in->src for self loop.

gcc/testsuite/ChangeLog:

 PR gcov/93680
 * gcc.misc-tests/gcov-pr93680.c: New test.

Signed-off-by: Xionghu Luo 
---
  gcc/testsuite/gcc.misc-tests/gcov-pr93680.c | 24 +
  gcc/tree-cfg.cc |  2 +-
  2 files changed, 25 insertions(+), 1 deletion(-)
  create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr93680.c

diff --git a/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c 
b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
new file mode 100644
index 000..b2bf9e626fc
--- /dev/null
+++ b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
@@ -0,0 +1,24 @@
+/* { dg-options "-fprofile-arcs -ftest-coverage" } */
+/* { dg-do run { target native } } */
+
+int f(int s, int n)
+{
+  int p = 0;
+
+  switch (s)
+  {
+case 0: /* count(5) */
+  do { p++; } while (--n); /* count(5) */
+  return p; /* count(1) */
+
+case 1: /* count(5) */
+  do { p++; } while (--n); /* count(5) */
+  return p; /* count(1) */
+  }
+
+  return 0;
+}
+
+int main() { f(0, 5); f(1, 5); return 0; }
+
+/* { dg-final { run-gcov gcov-pr93680.c } } */
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
index a9fcc7fd050..6fa1d83d366 100644
--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
@@ -3009,7 +3009,7 @@ split_edge_bb_loc (edge edge_in)
if (dest_prev)
  {
edge e = find_edge (dest_prev, dest);
-  if (e && !(e->flags & EDGE_COMPLEX))
+  if ((e && !(e->flags & EDGE_COMPLEX)) || edge_in->src == edge_in->dest)


I think this should eventually apply to all backedge edge_in, correct?
  But of course
we cannot easily test for this here.

Still since this affects ordering in the {next,prev}_bb chain only but not CFG
semantics I wonder how it can affect coverage?  Isn't it only by chance that
this block order survives?


For case:

1 int f(int s, int n)
2 {
3  int p = 0;
4  int q = 0;
5
6  switch (s)
7{
8case 0:
9  do { p++; } while (--n);
10  return p;
11
12case 1:
13  do { p++; } while (--n);
14  return p;
15}
16
17  return 0;
18 }
19
20 int main() { f(0, 5); f(1, 5);}


current GCC generates:

 :
...

  :<= first loop
...
goto ; [INV]
  else
goto ; [INV]

   :   <= first latch bb
  goto ; [100.00%]

   :
...
  goto ; [INV]

   :   <= second latch bb

   :<= second loop
...
goto ; [INV]
  else
goto ; [INV]


 and  are created by split_edge->split_edge_bb_loc, 
is located after the loop, but  is located before the loop.

First call of split_edge_bb_loc, the dest_prev is , and find_edge
did find a edge from  to , the returned afte_bb is , so
latch  is put after the loop

but second call of split_edge_bb_loc, the dest_prev is , so find_edge
return 0, and the returned after_bb is , then the created latch 
is put before the loop...

Different latch bb position caused different gcno, while gcov has poor
information and not that smart to recognize it:(, is it reasonable to keep
this kind of loops same order?


 small.gcno:  648:  block 2:`small.c':1, 3, 4, 6
 small.gcno:  688:0145:  36:LINES
 small.gcno:  700:  block 3:`small.c':8, 9
 small.gcno:  732:0145:  32:LINES
 small.gcno:  744:  block 5:`small.c':10
-small.gcno:  772:0145:  32:LINES
-small.gcno:  784:  block 6:`small.c':12
-small.gcno:  812:0145:  36:LINES
-small.gcno:  824:  block 7:`small.c':12, 13
+small.gcno:  772:0145:  36:LINES
+small.gcno:  784:  block 6:`small.c':12, 13
+small.gcno:  816:0145:  32:LINES
+small.gcno:  828:  block 8:`small.c':14
 small.gcno:  856:0145:  32:LINES
-small.gcno:  868:  block 8:`small.c':14
-small.gcno:  896:0145:  32:LINES
-small.gcno:  908:  block 9:`small.c':17
+small.gcno:  868:  block 9:`small.c':17





For the case when both edge_in->src has more than one successor and
edge_in->dest has more than one predecessor there isn't any good heuristic
to make printing the blocks in chain order "nice" (well, the backedge
one maybe).

But as said - this order shouldn't have any effect on semantics ...


 return edge_in->src;
  }
return dest_prev;
--
2.27.0



[PATCH] Avoid creating (const (reg ...)) [PR108603]

convert_memory_address_addr_space_1 has two modes: one in which it
tries to create a self-contained RTL expression (which might fail)
and one in which it can emit new instructions where necessary.

When handling a CONST, the function recurses into the CONST's
operand and then constifies the result.  But that's only valid if
the result is still a self-contained expression.  If new instructions
have been emitted, the expression will refer to the (non-constant)
results of those instructions.

In the PR, this caused us to emit a nonsensical (const (reg ...))
REG_EQUAL note.

Tested on aarch64-linux-gnu & x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR tree-optimization/108603
* explow.cc (convert_memory_address_addr_space_1): Only wrap
the result of a recursive call in a CONST if no instructions
were emitted.

gcc/testsuite/
PR tree-optimization/108603
* gcc.target/aarch64/sve/pr108603.c: New test.
---
 gcc/explow.cc   | 11 ---
 gcc/testsuite/gcc.target/aarch64/sve/pr108603.c |  8 
 2 files changed, 16 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr108603.c

diff --git a/gcc/explow.cc b/gcc/explow.cc
index 83439b32abe..32e9498ee07 100644
--- a/gcc/explow.cc
+++ b/gcc/explow.cc
@@ -349,9 +349,14 @@ convert_memory_address_addr_space_1 (scalar_int_mode 
to_mode ATTRIBUTE_UNUSED,
   return temp;
 
 case CONST:
-  temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0), as,
- true, no_emit);
-  return temp ? gen_rtx_CONST (to_mode, temp) : temp;
+  {
+   auto *last = no_emit ? nullptr : get_last_insn ();
+   temp = convert_memory_address_addr_space_1 (to_mode, XEXP (x, 0), as,
+   true, no_emit);
+   if (temp && (no_emit || last == get_last_insn ()))
+ return gen_rtx_CONST (to_mode, temp);
+   return temp;
+  }
 
 case PLUS:
 case MULT:
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c
new file mode 100644
index 000..a2aea9f0b12
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr108603.c
@@ -0,0 +1,8 @@
+/* { dg-options "-O2 -mabi=ilp32 -fdata-sections" } */
+
+int a[128];
+long long *p;
+void f() {
+  for (long i = 0; i < sizeof(long); i++)
+p[i] = a[i];
+}
-- 
2.25.1



Re: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

>> I realise it's a corner case.  But I don't think making GET_MODE_SIZE
>>bigger than the real size is conservatively correct.

I don't understand which corner case will riscks wrong code.
Would you mind giving me some examples?

VNx8BI ABI size is the same as machine size.
The only inconsistency is VNx1BI VNx2BI VNx4BI.

These 3 ABI size is larger than machine behavior.
For example, VNx1BI GET_MODE_SIZE is poly (1,1) same as VNx8BI.
I emit vsetvl e8mf8 + vlm for VNx1BI which make GCC

load 1/8 of poly (1,1) bytes from the memory, even though the VNx1BI occupy the 
whole poly (1,1) size.



juzhe.zh...@rivai.ai
 
From: Richard Sandiford
Date: 2023-03-02 17:39
To: juzhe.zhong\@rivai.ai
CC: rguenther; pan2.li; gcc-patches; incarnation.p.lee; Kito.cheng
Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
Thanks for the explanation about the sizes.
 
"juzhe.zh...@rivai.ai"  writes:
> Fortunately, we won't have aggregates, arrays of vbool*_t in the future.
> I think it's not an issue.
 
But isn't it possible to allocate a char/byte array and construct
vbool*_ts at addresses calculated by intrinsics?  E.g. I don't see
anything wrong in principle with doing:
 
#include 
 
void f(char *x, svbool_t p1, svbool_t p2) {
*(svbool_t *)(x + svcntd()) = p2;
*(svbool_t *)(x) = p1;
}
 
If the mode size for svbool_t was too big, I think RTL DSE would be
within its rights to delete the first store.  (Precision doesn't matter,
at least not currently.)
 
There's no problem if the ABI is defined such that vbool8_t has the same
size as the GET_MODE_SIZE recorded in GCC.  (But of course, it would need
to be consistently so, even when the vector length is known at compile time.)
In that case, the difference between the size stored by the machine and the
size used by the ABI would be padding, and there is no requirement to
preserve padding.  But if the ABI size of vbool8_t matches the machine
behaviour, I think making GCC's size bigger risks wrong code.
 
I realise it's a corner case.  But I don't think making GET_MODE_SIZE
bigger than the real size is conservatively correct.
 
Thanks,
Richard
 
>
>
> juzhe.zh...@rivai.ai
>  
> From: Richard Biener
> Date: 2023-03-02 16:25
> To: juzhe.zhong
> CC: richard.sandiford; pan2.li; gcc-patches; Pan Li; kito.cheng
> Subject: Re: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
> On Thu, 2 Mar 2023, juzhe.zh...@rivai.ai wrote:
>  
>> >> Does the eventual value set by ADJUST_BYTESIZE equal the real number of
>> >> bytes loaded by vlm.v and stored by vstm.v (after the appropriate vsetvl)?
>> >> Or is the GCC size larger in some cases than the number of bytes
>> >> loaded and stored?
>> For VNx1BI,VNx2BI,VNx4BI,VNx8BI, we allocate the larger size of memory or 
>> stack for register spillling
>> according to ADJUST_BYTESIZE. 
>> After appropriate vsetvl, VNx1BI is loaded/stored 1/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf8).
>> After appropriate vsetvl, VNx2BI is loaded/stored 2/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf2).
>> After appropriate vsetvl, VNx4BI is loaded/stored 4/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf4).
>> After appropriate vsetvl, VNx8BI is loaded/stored 8/8 of ADJUST_BYTESIZE 
>> (vsetvl e8m1).
>> 
>> Note: except these 4 machine modes, all other machine modes of RVV, 
>> ADJUST_BYTESIZE
>> are equal to the real number of bytes of load/store instruction that RVV ISA 
>> define.
>> 
>> Well, as I said, it's fine that we allocated larger memory for 
>> VNx1BI,VNx2BI,VNx4BI, 
>> we can emit appropriate vsetvl to gurantee the correctness in RISC-V 
>> backward according 
>> to the machine_mode as long as long GCC didn't do the incorrect elimination 
>> in middle-end.
>> 
>> Besides, poly (1,1) is 1/8 of machine vector-length which is already really 
>> a small number,
>> which is the real number bytes loaded/stored for VNx8BI.
>> You can say VNx1BI, VNx2BI, VNx4BI are consuming larger memory than we 
>> actually load/stored by appropriate vsetvl
>> since they are having same ADJUST_BYTESIZE as VNx8BI. However, I think it's 
>> totally fine so far as long as we can
>> gurantee the correctness and I think optimizing such memory storage 
>> consuming is trivial.
>> 
>> >> And does it equal the size of the corresponding LLVM machine type?
>> 
>> Well, for some reason, in case of register spilling, LLVM consume much more 
>> memory than GCC.
>> And they always do whole register load/store (a single vector register 
>> vector-length) for register spilling.
>> That's another story (I am not going to talk too much about this since it's 
>> a quite ugly implementation). 
>> They don't model the types accurately according RVV ISA for register 
>> spilling.
>> 
>> In case of normal load/store like:
>> vbool8_t v2 = *(vbool8_t*)in;  *(vbool8_t*)(out + 100) = v2;
>> This kind of load/store, their load/stores instructions of codegen are 
>> accurate.
>> Even though their instructions are accurate for load/store accessing 
>> behavior, I am not sure whether si

[PATCH] vect: Fix voluntarily-masked negative conditionals [PR108430]

vectorizable_condition checks whether a COND_EXPR condition is used
elsewhere with a loop mask.  If so, it applies the loop mask to the
COND_EXPR too, to reduce the number of live masks and to increase the
chance of combining the AND with the comparison.

There is also code to do this for inverted conditions.  E.g. if
we have a < b ? c : d and something else is conditional on !(a < b)
(such as a load in d), we use !(a < b) ? d : c and apply the loop
mask to !(a < b).

This inversion relied on the function's bitop1/bitop2 mechanism.
However, that mechanism is skipped if the condition is split out of
the COND_EXPR as a separate statement.  This meant that we could end
up using the inverse of the intended condition.

There is a separate way of negating the condition when a mask
is being applied (which is also used for EXTRACT_LAST reductions).
This patch uses that instead.

As well as the testcase, this fixes aarch64/sve/vcond_{4,17}_run.c.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


gcc/
PR tree-optimization/108430
* tree-vect-stmts.cc (vectorizable_condition): Fix handling
of inverted condition.

gcc/testsuite/
PR tree-optimization/108430
* gcc.target/aarch64/sve/pr108430.c: New test.
---
 .../gcc.target/aarch64/sve/pr108430.c | 21 +++
 gcc/tree-vect-stmts.cc|  3 +--
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr108430.c

diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c
new file mode 100644
index 000..e7ce0f6d793
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr108430.c
@@ -0,0 +1,21 @@
+/* { dg-do run { target aarch64_sve512_hw } } */
+/* { dg-options "-O3 -msve-vector-bits=512" } */
+
+long d = 1;
+static int i = 37;
+static unsigned long a[22];
+static unsigned short c[22];
+static unsigned g[80];
+static unsigned short *h = c;
+static unsigned long *j = a;
+int main() {
+  for (long m = 0; m < 8; ++m)
+d = 1;
+  for (unsigned char p = 0; p < 17; p += 2)
+  {
+long t = h[p] ? i : j[p];
+g[p] = t;
+  }
+  if (g[0])
+__builtin_abort ();
+}
diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
index 9e5ffbe252e..77ad8b78506 100644
--- a/gcc/tree-vect-stmts.cc
+++ b/gcc/tree-vect-stmts.cc
@@ -10756,11 +10756,10 @@ vectorizable_condition (vec_info *vinfo,
  cond.code = orig_code;
  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
{
- bitop1 = orig_code;
- bitop2 = BIT_NOT_EXPR;
  masks = &LOOP_VINFO_MASKS (loop_vinfo);
  cond_code = cond.code;
  swap_cond_operands = true;
+ must_invert_cmp_result = true;
}
}
}
-- 
2.25.1



Re: [PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]

"Andre Vieira (lists)"  writes:
> Hey both,
>
> Sorry about that, don't know how I missed those. Just running a test on 
> that now and will commit when it's done. I assume the comment and 0 -> 
> byte change can be seen as obvious, especially since it was supposed to 
> be in my original patch...

Thanks.  And yeah, agree it counts as obvious.

Richard

> On 27/02/2023 15:46, Richard Sandiford wrote:
>> Uros Bizjak  writes:
>>> On Fri, Feb 17, 2023 at 8:38 AM Richard Biener  wrote:

 On Thu, 16 Feb 2023, Uros Bizjak wrote:

> simplify_subreg can return VOIDmode const_int operand and will
> cause ICE in simplify_gen_subreg when this operand is passed to it.
>
> The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
> We can't process const_int operand any further, since outermode
> is not an integer mode here.

 But if it's a CONST_INT then we know it's of int_outermode, no? That is,
 doesn't simplify_subreg (mode, ...) always return something in 'mode'
 and thus we can always pass just 'mode' as third argument to the
 following simplify_gen_subreg call?
>>>
>>> You are right. I am testing the attached patch that works too.
>> 
>> Thanks for this, it's the correct fix.  But as noted in
>> https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610920.html,
>> the final 0 is also wrong for big-endian.  Andre?
>> 
>> Richard
>> 
>>>
>>> Uros.
>>>
>>> diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
>>> index 0a1dd88b0a8..3955929bb70 100644
>>> --- a/gcc/simplify-rtx.cc
>>> +++ b/gcc/simplify-rtx.cc
>>> @@ -7665,7 +7665,7 @@ simplify_context::simplify_subreg (machine_mode 
>>> outermode, rtx op,
>>>   {
>>> rtx tem = simplify_subreg (int_outermode, op, innermode, byte);
>>> if (tem)
>>> -   return simplify_gen_subreg (outermode, tem, GET_MODE (tem), 0);
>>> +   return simplify_gen_subreg (outermode, tem, int_outermode, 0);
>>>   }
>>>   
>>> /* If OP is a vector comparison and the subreg is not changing the


Re: [PATCH] libiberty: fix memory leak in pex-win32.c and refactor

On Thu, Mar 2, 2023 at 10:21 AM Costas Argyris  wrote:
>
> I forgot to mention that:
>
> 1) The CreateProcess documentation
>
> https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessa
>
> doesn't mention anything about taking ownership of this or any other buffer 
> passed to it.

Thanks - thus the patch is OK.

Thanks,
Richard.

> 2) The cmdline buffer gets created by the argv_to_cmdline function
>
> https://github.com/gcc-mirror/gcc/blob/master/libiberty/pex-win32.c#L339
>
> which has this comment right above it:
>
> /* Return a Windows command-line from ARGV.  It is the caller's
>responsibility to free the string returned.  */
>
> Thanks,
> Costas
>
> On Thu, 2 Mar 2023 at 07:32, Richard Biener  
> wrote:
>>
>> On Wed, Mar 1, 2023 at 7:14 PM Costas Argyris via Gcc-patches
>>  wrote:
>> >
>> > Hi
>> >
>> > It seems that the win32_spawn function in libiberty/pex-win32.c is leaking
>> > the cmdline buffer in 2/3 exit scenarios (it is only free'd in 1/3).The
>> > problem here is that the cleanup code is written 3 times, one at each exit
>> > scenario.
>> >
>> > The proposed attached refactoring has the cleanup code appearing just once
>> > and is executed for all exit scenarios, reducing the likelihood of such
>> > leaks in the future.
>>
>> One could imagine that CreateProcess in case of success takes ownership of
>> the buffer pointed to by cmdline?  If you can confirm it is not then the 
>> patch
>> looks OK to me.
>>
>> Thanks,
>> Richard.
>>
>> > Thanks,
>> > Costas


Re: [PATCH] simplify-rtx: Fix VOIDmode operand handling in simplify_subreg [PR108805]


Hey both,

Sorry about that, don't know how I missed those. Just running a test on 
that now and will commit when it's done. I assume the comment and 0 -> 
byte change can be seen as obvious, especially since it was supposed to 
be in my original patch...


On 27/02/2023 15:46, Richard Sandiford wrote:

Uros Bizjak  writes:

On Fri, Feb 17, 2023 at 8:38 AM Richard Biener  wrote:


On Thu, 16 Feb 2023, Uros Bizjak wrote:


simplify_subreg can return VOIDmode const_int operand and will
cause ICE in simplify_gen_subreg when this operand is passed to it.

The patch prevents VOIDmode temporary from entering simplify_gen_subreg.
We can't process const_int operand any further, since outermode
is not an integer mode here.


But if it's a CONST_INT then we know it's of int_outermode, no? That is,
doesn't simplify_subreg (mode, ...) always return something in 'mode'
and thus we can always pass just 'mode' as third argument to the
following simplify_gen_subreg call?


You are right. I am testing the attached patch that works too.


Thanks for this, it's the correct fix.  But as noted in
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/610920.html,
the final 0 is also wrong for big-endian.  Andre?

Richard



Uros.

diff --git a/gcc/simplify-rtx.cc b/gcc/simplify-rtx.cc
index 0a1dd88b0a8..3955929bb70 100644
--- a/gcc/simplify-rtx.cc
+++ b/gcc/simplify-rtx.cc
@@ -7665,7 +7665,7 @@ simplify_context::simplify_subreg (machine_mode 
outermode, rtx op,
  {
rtx tem = simplify_subreg (int_outermode, op, innermode, byte);
if (tem)
-   return simplify_gen_subreg (outermode, tem, GET_MODE (tem), 0);
+   return simplify_gen_subreg (outermode, tem, int_outermode, 0);
  }
  
/* If OP is a vector comparison and the subreg is not changing the


Re: [PATCH 2/2] gcov: Fix incorrect gimple line LOCATION [PR97923]

On Thu, 2 Mar 2023, Xionghu Luo wrote:

> 
> 
> On 2023/3/2 16:16, Richard Biener wrote:
> > On Thu, Mar 2, 2023 at 3:31 AM Xionghu Luo via Gcc-patches
> >  wrote:
> >>
> >> For case like belowi test.c:
> >>
> >> 1:int foo(char c)
> >> 2:{
> >> 3:  return ((c >= 'A' && c <= 'Z')
> >> 4:   || (c >= 'a' && c <= 'z')
> >> 5:   || (c >= '0' && c <='0'));}
> >>
> >> the generated line number is incorrect for condition c>='A' of block 2:
> >> Thus correct the condition op0 location.
> >>
> >> gcno diff before and with this patch:
> >>
> >> test.gcno:  575:  block 11: 1:0001(tree)
> >> test.gcno:  583:0145:  35:LINES
> >> -test.gcno:  595:  block 2:`test.c':1, 5
> >> +test.gcno:  595:  block 2:`test.c':1, 3
> >> test.gcno:  626:0145:  31:LINES
> >> test.gcno:  638:  block 3:`test.c':3
> >> test.gcno:  665:0145:  31:LINES
> >> test.gcno:  677:  block 4:`test.c':4
> >> test.gcno:  704:0145:  31:LINES
> >> test.gcno:  716:  block 5:`test.c':4
> >> test.gcno:  743:0145:  31:LINES
> >> test.gcno:  755:  block 6:`test.c':5
> >>
> >> Also save line id in line vector for gcov debug use.
> >>
> >> Regression tested pass on x86_64-linux-gnu and aarch64-linux-gnu, OK for
> >> master?
> >>
> >> gcc/ChangeLog:
> >>
> >>  PR gcov/97923
> >>  * gcov.cc (line_info::line_info): Init id.
> >>  (solve_flow_graph): Fix typo.
> >>  (add_line_counts): Set line->id.
> >>  * gimplify.cc (shortcut_cond_r): Correct cond expr op0 location.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>  PR gcov/97923
> >>  * gcc.misc-tests/gcov-pr97923.c: New test.
> >>
> >> Signed-off-by: Xionghu Luo 
> >> ---
> >>   gcc/gcov.cc |  9 ++---
> >>   gcc/gimplify.cc |  6 --
> >>   gcc/testsuite/gcc.misc-tests/gcov-pr97923.c | 13 +
> >>   3 files changed, 23 insertions(+), 5 deletions(-)
> >>   create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr97923.c
> >>
> >> diff --git a/gcc/gcov.cc b/gcc/gcov.cc
> >> index 2ec7248cc0e..77ca94c71c4 100644
> >> --- a/gcc/gcov.cc
> >> +++ b/gcc/gcov.cc
> >> @@ -205,6 +205,8 @@ public:
> >> /* Execution count.  */
> >> gcov_type count;
> >>
> >> +  unsigned id;
> >> +
> >> /* Branches from blocks that end on this line.  */
> >> vector branches;
> >>
> >> @@ -216,8 +218,8 @@ public:
> >> unsigned has_unexecuted_block : 1;
> >>   };
> >>
> >> -line_info::line_info (): count (0), branches (), blocks (), exists
> >> (false),
> >> -  unexceptional (0), has_unexecuted_block (0)
> >> +line_info::line_info (): count (0), id (0), branches (), blocks (),
> >> +  exists (false), unexceptional (0), has_unexecuted_block (0)
> >>   {
> >>   }
> >>
> >> @@ -2370,7 +2372,7 @@ solve_flow_graph (function_info *fn)
> >>
> >> /* If the graph has been correctly solved, every block will have a
> >>valid count.  */
> >> -  for (unsigned i = 0; ix < fn->blocks.size (); i++)
> >> +  for (unsigned i = 0; i < fn->blocks.size (); i++)
> >>   if (!fn->blocks[i].count_valid)
> >> {
> >>  fnotice (stderr, "%s:graph is unsolvable for '%s'\n",
> >> @@ -2730,6 +2732,7 @@ add_line_counts (coverage_info *coverage,
> >> function_info *fn)
> >>  }
> >>line->count += block->count;
> >>  }
> >> + line->id = ln;
> >>  }
> >>
> >>has_any_line = true;
> >> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> >> index ade6e335da7..341a27b033e 100644
> >> --- a/gcc/gimplify.cc
> >> +++ b/gcc/gimplify.cc
> >> @@ -3915,7 +3915,8 @@ shortcut_cond_r (tree pred, tree *true_label_p, tree
> >> *false_label_p,
> >>  false_label_p = &local_label;
> >>
> >> /* Keep the original source location on the first 'if'.  */
> >> -  t = shortcut_cond_r (TREE_OPERAND (pred, 0), NULL, false_label_p,
> >> locus);
> >> +  tree op0 = TREE_OPERAND (pred, 0);
> >> +  t = shortcut_cond_r (op0, NULL, false_label_p, EXPR_LOCATION (op0));
> >> append_to_statement_list (t, &expr);
> > 
> > The comment now no longer is true?  For the else arm we use
> > rexpr_location, why not
> > here as well?  To quote the following lines:
> > 
> >/* Set the source location of the && on the second 'if'.  */
> >new_locus = rexpr_location (pred, locus);
> >t = shortcut_cond_r (TREE_OPERAND (pred, 1), true_label_p,
> >false_label_p,
> > new_locus);
> >append_to_statement_list (t, &expr);
> 
> Thanks, should use rexpr_location with each operand like below.
> 
> 
> > 
> > with your change the location of the outer COND_EXPR is lost?  Can we
> > guarantee
> > that it's used for the first operand of a if (a && b && c)?  It would
> > be nice to expand
> > the leading comment for such a three 

Ping: [PATCH V2] extract DF/SF/SI/HI/QI subreg from parameter word on stack

Hi,

Gentle ping:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609396.html

Thanks for comments and suggestions!

I'm thinking that we may use these patches to fix some of the issues
on parm and returns.

Sorry for the late ping for this patch to ask if this is acceptable.


BR,
Jeff (Jiufu)

Jiufu Guo  writes:

> Hi,
>
> This patch is fixing an issue about parameter accessing if the
> parameter is struct type and passed through integer registers, and
> there is floating member is accessed. Like below code:
>
> typedef struct DF {double a[4]; long l; } DF;
> double foo_df (DF arg){return arg.a[3];}
>
> On ppc64le, with trunk gcc, "std 6,-24(1) ; lfd 1,-24(1)" is
> generated.  While instruction "mtvsrd 1, 6" would be enough for
> this case.
>
> This patch updates the behavior when loading floating members of a
> parameter: if that floating member is stored via integer register,
> then loading it as integer mode first, and converting it to floating
> mode.
>
> Compare with previous patch:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608872.html
> Previous version supports converion from DImode to DF/SF, this
> version also supports conversion from DImode to SI/HI/QI modes.
>
> I also tried to enhance CSE/DSE for this issue.  But because the
> limitations (e.g. CSE does not like new pseudo, DSE is not good
> at cross-blocks), some cases (as this patch) can not be handled.
>
> Bootstrap and regtest passes on ppc64{,le}.
> Is this ok for trunk?  Thanks for comments!
>
>
> BR,
> Jeff (Jiufu)
>
>
>   PR target/108073
>
> gcc/ChangeLog:
>
>   * expr.cc (extract_subreg_from_loading_word): New function.
>   (expand_expr_real_1): Call extract_subreg_from_loading_word.
>
> gcc/testsuite/ChangeLog:
>
>   * g++.target/powerpc/pr102024.C: Updated.
>   * gcc.target/powerpc/pr108073.c: New test.
>
> ---
>  gcc/expr.cc | 76 +
>  gcc/testsuite/g++.target/powerpc/pr102024.C |  2 +-
>  gcc/testsuite/gcc.target/powerpc/pr108073.c | 30 
>  3 files changed, 107 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr108073.c
>
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d9407432ea5..6de4a985c8b 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -10631,6 +10631,69 @@ stmt_is_replaceable_p (gimple *stmt)
>return false;
>  }
>  
> +/* Return the content of the memory slot SOURCE as MODE.
> +   SOURCE is based on BASE. BASE is a memory block that is stored via words.
> +
> +   To get the content from SOURCE:
> +   first load the word from the memory which covers the SOURCE slot first;
> +   next return the word's subreg which offsets to SOURCE slot;
> +   then convert to MODE as necessary.  */
> +
> +static rtx
> +extract_subreg_from_loading_word (machine_mode mode, rtx source, rtx base)
> +{
> +  rtx src_base = XEXP (source, 0);
> +  poly_uint64 offset = MEM_OFFSET (source);
> +
> +  if (GET_CODE (src_base) == PLUS && CONSTANT_P (XEXP (src_base, 1)))
> +{
> +  offset += INTVAL (XEXP (src_base, 1));
> +  src_base = XEXP (src_base, 0);
> +}
> +
> +  if (!rtx_equal_p (XEXP (base, 0), src_base))
> +return NULL_RTX;
> +
> +  /* Subreg(DI,n) -> DF/SF/SI/HI/QI */
> +  poly_uint64 word_size = GET_MODE_SIZE (word_mode);
> +  poly_uint64 mode_size = GET_MODE_SIZE (mode);
> +  poly_uint64 byte_off;
> +  unsigned int start;
> +  machine_mode int_mode;
> +  if (known_ge (word_size, mode_size) && multiple_p (word_size, mode_size)
> +  && int_mode_for_mode (mode).exists (&int_mode)
> +  && can_div_trunc_p (offset, word_size, &start, &byte_off)
> +  && multiple_p (byte_off, mode_size))
> +{
> +  rtx word_mem = copy_rtx (source);
> +  PUT_MODE (word_mem, word_mode);
> +  word_mem = adjust_address (word_mem, word_mode, -byte_off);
> +
> +  rtx word_reg = gen_reg_rtx (word_mode);
> +  emit_move_insn (word_reg, word_mem);
> +
> +  poly_uint64 low_off = subreg_lowpart_offset (int_mode, word_mode);
> +  if (!known_eq (byte_off, low_off))
> + {
> +   poly_uint64 shift_bytes = known_gt (byte_off, low_off)
> +   ? byte_off - low_off
> +   : low_off - byte_off;
> +   word_reg = expand_shift (RSHIFT_EXPR, word_mode, word_reg,
> +shift_bytes * BITS_PER_UNIT, word_reg, 0);
> + }
> +
> +  rtx int_subreg = gen_lowpart (int_mode, word_reg);
> +  if (mode == int_mode)
> + return int_subreg;
> +
> +  rtx int_mode_reg = gen_reg_rtx (int_mode);
> +  emit_move_insn (int_mode_reg, int_subreg);
> +  return gen_lowpart (mode, int_mode_reg);
> +}
> +
> +  return NULL_RTX;
> +}
> +
>  rtx
>  expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
>   enum expand_modifier modifier, rtx *alt_rtl,
> @@ -11812,6 +11875,19 @@ expand_expr_real_1 (tree exp, rtx target, 
> machine_mode tmode,
>   && modifier != 

Re: [PATCH 2/2] gcov: Fix incorrect gimple line LOCATION [PR97923]





On 2023/3/2 16:16, Richard Biener wrote:

On Thu, Mar 2, 2023 at 3:31 AM Xionghu Luo via Gcc-patches
 wrote:


For case like belowi test.c:

1:int foo(char c)
2:{
3:  return ((c >= 'A' && c <= 'Z')
4:   || (c >= 'a' && c <= 'z')
5:   || (c >= '0' && c <='0'));}

the generated line number is incorrect for condition c>='A' of block 2:
Thus correct the condition op0 location.

gcno diff before and with this patch:

test.gcno:  575:  block 11: 1:0001(tree)
test.gcno:  583:0145:  35:LINES
-test.gcno:  595:  block 2:`test.c':1, 5
+test.gcno:  595:  block 2:`test.c':1, 3
test.gcno:  626:0145:  31:LINES
test.gcno:  638:  block 3:`test.c':3
test.gcno:  665:0145:  31:LINES
test.gcno:  677:  block 4:`test.c':4
test.gcno:  704:0145:  31:LINES
test.gcno:  716:  block 5:`test.c':4
test.gcno:  743:0145:  31:LINES
test.gcno:  755:  block 6:`test.c':5

Also save line id in line vector for gcov debug use.

Regression tested pass on x86_64-linux-gnu and aarch64-linux-gnu, OK for
master?

gcc/ChangeLog:

 PR gcov/97923
 * gcov.cc (line_info::line_info): Init id.
 (solve_flow_graph): Fix typo.
 (add_line_counts): Set line->id.
 * gimplify.cc (shortcut_cond_r): Correct cond expr op0 location.

gcc/testsuite/ChangeLog:

 PR gcov/97923
 * gcc.misc-tests/gcov-pr97923.c: New test.

Signed-off-by: Xionghu Luo 
---
  gcc/gcov.cc |  9 ++---
  gcc/gimplify.cc |  6 --
  gcc/testsuite/gcc.misc-tests/gcov-pr97923.c | 13 +
  3 files changed, 23 insertions(+), 5 deletions(-)
  create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr97923.c

diff --git a/gcc/gcov.cc b/gcc/gcov.cc
index 2ec7248cc0e..77ca94c71c4 100644
--- a/gcc/gcov.cc
+++ b/gcc/gcov.cc
@@ -205,6 +205,8 @@ public:
/* Execution count.  */
gcov_type count;

+  unsigned id;
+
/* Branches from blocks that end on this line.  */
vector branches;

@@ -216,8 +218,8 @@ public:
unsigned has_unexecuted_block : 1;
  };

-line_info::line_info (): count (0), branches (), blocks (), exists (false),
-  unexceptional (0), has_unexecuted_block (0)
+line_info::line_info (): count (0), id (0), branches (), blocks (),
+  exists (false), unexceptional (0), has_unexecuted_block (0)
  {
  }

@@ -2370,7 +2372,7 @@ solve_flow_graph (function_info *fn)

/* If the graph has been correctly solved, every block will have a
   valid count.  */
-  for (unsigned i = 0; ix < fn->blocks.size (); i++)
+  for (unsigned i = 0; i < fn->blocks.size (); i++)
  if (!fn->blocks[i].count_valid)
{
 fnotice (stderr, "%s:graph is unsolvable for '%s'\n",
@@ -2730,6 +2732,7 @@ add_line_counts (coverage_info *coverage, function_info 
*fn)
 }
   line->count += block->count;
 }
+ line->id = ln;
 }

   has_any_line = true;
diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index ade6e335da7..341a27b033e 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -3915,7 +3915,8 @@ shortcut_cond_r (tree pred, tree *true_label_p, tree 
*false_label_p,
 false_label_p = &local_label;

/* Keep the original source location on the first 'if'.  */
-  t = shortcut_cond_r (TREE_OPERAND (pred, 0), NULL, false_label_p, locus);
+  tree op0 = TREE_OPERAND (pred, 0);
+  t = shortcut_cond_r (op0, NULL, false_label_p, EXPR_LOCATION (op0));
append_to_statement_list (t, &expr);


The comment now no longer is true?  For the else arm we use
rexpr_location, why not
here as well?  To quote the following lines:

   /* Set the source location of the && on the second 'if'.  */
   new_locus = rexpr_location (pred, locus);
   t = shortcut_cond_r (TREE_OPERAND (pred, 1), true_label_p, false_label_p,
new_locus);
   append_to_statement_list (t, &expr);


Thanks, should use rexpr_location with each operand like below.




with your change the location of the outer COND_EXPR is lost?  Can we guarantee
that it's used for the first operand of a if (a && b && c)?  It would
be nice to expand
the leading comment for such a three operand case and explain how it's supposed
to work.


I tested the three operand case, it will iteratively call shortcut_cond_r and
also works as expected.  Seems the outer COND_EXPR is useless if we do the
followed conversion?


   if (TREE_CODE (pred) == TRUTH_ANDIF_EXPR)
 {
   location_t new_locus;

   /* Turn if (a && b) into

 if (a); else goto no;
 if (b) goto yes; else goto no;
 (no:) */

   if (false_label_p == NULL)
false_label_p = &local_label;

-  /* Keep the original source location on the first 'if'.  */
-  tree op0 = TREE_OPERAND (pred, 0);
-  t = shortcut_cond_r (op0, NULL, fa

Re: [PATCH v2] RISC-V: Bugfix for rvv bool mode precision adjustment

pan2...@intel.com writes:
> From: Pan Li 
>
>   Fix the bug of the rvv bool mode precision with the adjustment.
>   The bits size of vbool*_t will be adjusted to
>   [1, 2, 4, 8, 16, 32, 64] according to the rvv spec 1.0 isa. The
>   adjusted mode precison of vbool*_t will help underlying pass to
>   make the right decision for both the correctness and optimization.
>
>   Given below sample code:
>   void test_1(int8_t * restrict in, int8_t * restrict out)
>   {
> vbool8_t v2 = *(vbool8_t*)in;
> vbool16_t v5 = *(vbool16_t*)in;
> *(vbool16_t*)(out + 200) = v5;
> *(vbool8_t*)(out + 100) = v2;
>   }
>
>   Before the precision adjustment:
>   addia4,a1,100
>   vsetvli a5,zero,e8,m1,ta,ma
>   addia1,a1,200
>   vlm.v   v24,0(a0)
>   vsm.v   v24,0(a4)
>   // Need one vsetvli and vlm.v for correctness here.
>   vsm.v   v24,0(a1)
>
>   After the precision adjustment:
>   csrrt0,vlenb
>   sllit1,t0,1
>   csrra3,vlenb
>   sub sp,sp,t1
>   sllia4,a3,1
>   add a4,a4,sp
>   sub a3,a4,a3
>   vsetvli a5,zero,e8,m1,ta,ma
>   addia2,a1,200
>   vlm.v   v24,0(a0)
>   vsm.v   v24,0(a3)
>   addia1,a1,100
>   vsetvli a4,zero,e8,mf2,ta,ma
>   csrrt0,vlenb
>   vlm.v   v25,0(a3)
>   vsm.v   v25,0(a2)
>   sllit1,t0,1
>   vsetvli a5,zero,e8,m1,ta,ma
>   vsm.v   v24,0(a1)
>   add sp,sp,t1
>   jr  ra
>
>   However, there may be some optimization opportunates after
>   the mode precision adjustment. It can be token care of in
>   the RISC-V backend in the underlying separted PR(s).
>
>   PR 108185
>   PR 108654
>
> gcc/ChangeLog:
>
>   * config/riscv/riscv-modes.def (ADJUST_PRECISION):
>   * config/riscv/riscv.cc (riscv_v_adjust_precision):
>   * config/riscv/riscv.h (riscv_v_adjust_precision):
>   * genmodes.cc (ADJUST_PRECISION):
>   (emit_mode_adjustments):
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/riscv/pr108185-1.c: New test.
>   * gcc.target/riscv/pr108185-2.c: New test.
>   * gcc.target/riscv/pr108185-3.c: New test.
>   * gcc.target/riscv/pr108185-4.c: New test.
>   * gcc.target/riscv/pr108185-5.c: New test.
>   * gcc.target/riscv/pr108185-6.c: New test.
>   * gcc.target/riscv/pr108185-7.c: New test.
>   * gcc.target/riscv/pr108185-8.c: New test.
>
> Signed-off-by: Pan Li 
> ---
>  gcc/config/riscv/riscv-modes.def|  8 +++
>  gcc/config/riscv/riscv.cc   | 12 
>  gcc/config/riscv/riscv.h|  1 +
>  gcc/genmodes.cc | 20 +-
>  gcc/testsuite/gcc.target/riscv/pr108185-1.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-2.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-3.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-4.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-5.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-6.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-7.c | 68 ++
>  gcc/testsuite/gcc.target/riscv/pr108185-8.c | 77 +
>  12 files changed, 592 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-4.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-5.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-6.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-7.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/pr108185-8.c
>
> diff --git a/gcc/config/riscv/riscv-modes.def 
> b/gcc/config/riscv/riscv-modes.def
> index d5305efa8a6..110bddce851 100644
> --- a/gcc/config/riscv/riscv-modes.def
> +++ b/gcc/config/riscv/riscv-modes.def
> @@ -72,6 +72,14 @@ ADJUST_BYTESIZE (VNx16BI, riscv_vector_chunks * 
> riscv_bytes_per_vector_chunk);
>  ADJUST_BYTESIZE (VNx32BI, riscv_vector_chunks * 
> riscv_bytes_per_vector_chunk);
>  ADJUST_BYTESIZE (VNx64BI, riscv_v_adjust_nunits (VNx64BImode, 8));
>  
> +ADJUST_PRECISION (VNx1BI, riscv_v_adjust_precision (VNx1BImode, 1));
> +ADJUST_PRECISION (VNx2BI, riscv_v_adjust_precision (VNx2BImode, 2));
> +ADJUST_PRECISION (VNx4BI, riscv_v_adjust_precision (VNx4BImode, 4));
> +ADJUST_PRECISION (VNx8BI, riscv_v_adjust_precision (VNx8BImode, 8));
> +ADJUST_PRECISION (VNx16BI, riscv_v_adjust_precision (VNx16BImode, 16));
> +ADJUST_PRECISION (VNx32BI, riscv_v_adjust_precision (VNx32BImode, 32));
> +ADJUST_PRECISION (VNx64BI, riscv_v_adjust_precision (VNx64BImode, 64));
> +
>  /*
> | Mode| MIN_VLEN=32 | MIN_VLEN=32 | MIN_VLEN=64 | MIN_VLEN=64 |
> | | LMUL

Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

Thanks for the explanation about the sizes.

"juzhe.zh...@rivai.ai"  writes:
> Fortunately, we won't have aggregates, arrays of vbool*_t in the future.
> I think it's not an issue.

But isn't it possible to allocate a char/byte array and construct
vbool*_ts at addresses calculated by intrinsics?  E.g. I don't see
anything wrong in principle with doing:

#include 

void f(char *x, svbool_t p1, svbool_t p2) {
*(svbool_t *)(x + svcntd()) = p2;
*(svbool_t *)(x) = p1;
}

If the mode size for svbool_t was too big, I think RTL DSE would be
within its rights to delete the first store.  (Precision doesn't matter,
at least not currently.)

There's no problem if the ABI is defined such that vbool8_t has the same
size as the GET_MODE_SIZE recorded in GCC.  (But of course, it would need
to be consistently so, even when the vector length is known at compile time.)
In that case, the difference between the size stored by the machine and the
size used by the ABI would be padding, and there is no requirement to
preserve padding.  But if the ABI size of vbool8_t matches the machine
behaviour, I think making GCC's size bigger risks wrong code.

I realise it's a corner case.  But I don't think making GET_MODE_SIZE
bigger than the real size is conservatively correct.

Thanks,
Richard

>
>
> juzhe.zh...@rivai.ai
>  
> From: Richard Biener
> Date: 2023-03-02 16:25
> To: juzhe.zhong
> CC: richard.sandiford; pan2.li; gcc-patches; Pan Li; kito.cheng
> Subject: Re: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
> On Thu, 2 Mar 2023, juzhe.zh...@rivai.ai wrote:
>  
>> >> Does the eventual value set by ADJUST_BYTESIZE equal the real number of
>> >> bytes loaded by vlm.v and stored by vstm.v (after the appropriate vsetvl)?
>> >> Or is the GCC size larger in some cases than the number of bytes
>> >> loaded and stored?
>> For VNx1BI,VNx2BI,VNx4BI,VNx8BI, we allocate the larger size of memory or 
>> stack for register spillling
>> according to ADJUST_BYTESIZE. 
>> After appropriate vsetvl, VNx1BI is loaded/stored 1/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf8).
>> After appropriate vsetvl, VNx2BI is loaded/stored 2/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf2).
>> After appropriate vsetvl, VNx4BI is loaded/stored 4/8 of ADJUST_BYTESIZE 
>> (vsetvl e8mf4).
>> After appropriate vsetvl, VNx8BI is loaded/stored 8/8 of ADJUST_BYTESIZE 
>> (vsetvl e8m1).
>> 
>> Note: except these 4 machine modes, all other machine modes of RVV, 
>> ADJUST_BYTESIZE
>> are equal to the real number of bytes of load/store instruction that RVV ISA 
>> define.
>> 
>> Well, as I said, it's fine that we allocated larger memory for 
>> VNx1BI,VNx2BI,VNx4BI, 
>> we can emit appropriate vsetvl to gurantee the correctness in RISC-V 
>> backward according 
>> to the machine_mode as long as long GCC didn't do the incorrect elimination 
>> in middle-end.
>> 
>> Besides, poly (1,1) is 1/8 of machine vector-length which is already really 
>> a small number,
>> which is the real number bytes loaded/stored for VNx8BI.
>> You can say VNx1BI, VNx2BI, VNx4BI are consuming larger memory than we 
>> actually load/stored by appropriate vsetvl
>> since they are having same ADJUST_BYTESIZE as VNx8BI. However, I think it's 
>> totally fine so far as long as we can
>> gurantee the correctness and I think optimizing such memory storage 
>> consuming is trivial.
>> 
>> >> And does it equal the size of the corresponding LLVM machine type?
>> 
>> Well, for some reason, in case of register spilling, LLVM consume much more 
>> memory than GCC.
>> And they always do whole register load/store (a single vector register 
>> vector-length) for register spilling.
>> That's another story (I am not going to talk too much about this since it's 
>> a quite ugly implementation). 
>> They don't model the types accurately according RVV ISA for register 
>> spilling.
>> 
>> In case of normal load/store like:
>> vbool8_t v2 = *(vbool8_t*)in;  *(vbool8_t*)(out + 100) = v2;
>> This kind of load/store, their load/stores instructions of codegen are 
>> accurate.
>> Even though their instructions are accurate for load/store accessing 
>> behavior, I am not sure whether size 
>> of their machine type is accurate.
>> 
>> For example, in IR presentation: VNx1BI of GCC is represented as vscale x 1 
>> x i1
>>   VNx2BI of GCC is represented as vscale x 2 x i1
>> in LLVM IR.
>> I am not sure the bytesize of  vscale x 1 x i1 and vscale x 2 x i1.
>> I didn't take a deep a look at it.
>> 
>> I think this question is not that important, no matter whether VNx1BI and 
>> VNx2BI are modeled accurately in case of ADUST_BYTESIZE
>> in GCC or  vscale x 1 x i1 and vscale x 2 x i1 are modeled accurately in 
>> case of  their bytesize,
>> I think as long as we can emit appropriate vsetvl + vlm/vsm, it's totally 
>> fine for RVV  even though in some case, their memory allocation
>> is not accurate in compiler.
>  
> I'm not sure how it works for variable-length types but isn't
> sizeof (vbool8_t) part of the ABI 

Ping: [PATCH V4] Use reg mode to move sub blocks for parameters and returns

Hi,

Ping this patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-January/609394.html

Thanks for any comments and suggestions!


BR,
Jeff (Jiufu)


Jiufu Guo  writes:

> Hi,
>
> When assigning a parameter to a variable, or assigning a variable to
> return value with struct type, "block move" may be used to expand
> the assignment if the parameter/return is passing through registers and
> the parameter/return has BLK mode.
> For this kind of case, when moving the blocks, it would be better to use
> the nature mode of the registers.
> This would raise more opportunities for other optimization passes(cse,
> dse, xprop).
>
> As the example code (like code in PR65421):
>
> typedef struct SA {double a[3];} A;
> A ret_arg_pt (A *a) {return *a;} // on ppc64le, expect only 3 lfd(s)
> A ret_arg (A a) {return a;} // just empty fun body
> void st_arg (A a, A *p) {*p = a;} //only 3 stfd(s)
>
> This patches check the "from" and "to" of an assignment in
> "expand_assignment", if it is about param/ret which may passing via
> register, then use the register nature mode to move sub-blocks for
> the assignning.
>
> This patch may be still useful even if we change the behavior of
> parameter setup or adopt SRA-like code in expender.
>
> Comparing with previous version:
> https://gcc.gnu.org/pipermail/gcc-patches/2022-December/608081.html
> This patch update the code slightly and merged/added test cases.
> And I checked the cases with large struct or non-homogeneous struct
> to confirm it does not degrade the code.
>
> Bootstrap and regtest pass on ppc64{,le} and x86_64.
> Is this ok for trunk?
>
> BR,
> Jeff (Jiufu)
>
>   PR target/65421
>
> gcc/ChangeLog:
>
>   * cfgexpand.cc (expand_used_vars): Update to mark DECL_USEDBY_RETURN_P
>   for returns.
>   * expr.cc (move_sub_blocks): New function.
>   (expand_assignment): Update to call move_sub_blocks for returns or
>   parameters.
>   * function.cc (assign_parm_setup_block): Update to mark
>   DECL_REGS_TO_STACK_P for parameter.
>   * tree-core.h (struct tree_decl_common): Add comment.
>   * tree.h (DECL_USEDBY_RETURN_P): New define.
>   (DECL_REGS_TO_STACK_P): New define.
>
> gcc/testsuite/ChangeLog:
>
>   * gcc.target/powerpc/pr65421-1.c: New test.
>   * gcc.target/powerpc/pr65421.c: New test.
>
> ---
>  gcc/cfgexpand.cc | 14 
>  gcc/expr.cc  | 77 
>  gcc/function.cc  |  3 +
>  gcc/tree-core.h  |  4 +-
>  gcc/tree.h   |  9 +++
>  gcc/testsuite/gcc.target/powerpc/pr65421-1.c |  6 ++
>  gcc/testsuite/gcc.target/powerpc/pr65421.c   | 33 +
>  7 files changed, 145 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421-1.c
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr65421.c
>
> diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
> index dd29c03..09b8ec64cea 100644
> --- a/gcc/cfgexpand.cc
> +++ b/gcc/cfgexpand.cc
> @@ -2158,6 +2158,20 @@ expand_used_vars (bitmap forced_stack_vars)
>  frame_phase = off ? align - off : 0;
>}
>  
> +  /* Collect VARs on returns.  */
> +  if (DECL_RESULT (current_function_decl))
> +{
> +  edge_iterator ei;
> +  edge e;
> +  FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
> + if (greturn *ret = safe_dyn_cast (last_stmt (e->src)))
> +   {
> + tree val = gimple_return_retval (ret);
> + if (val && VAR_P (val))
> +   DECL_USEDBY_RETURN_P (val) = 1;
> +   }
> +}
> +
>/* Set TREE_USED on all variables in the local_decls.  */
>FOR_EACH_LOCAL_DECL (cfun, i, var)
>  TREE_USED (var) = 1;
> diff --git a/gcc/expr.cc b/gcc/expr.cc
> index d9407432ea5..afcec6f3c10 100644
> --- a/gcc/expr.cc
> +++ b/gcc/expr.cc
> @@ -5559,6 +5559,51 @@ mem_ref_refers_to_non_mem_p (tree ref)
>return non_mem_decl_p (base);
>  }
>  
> +/* Sub routine of expand_assignment, invoked when assigning from a
> +   parameter or assigning to a return val on struct type which may
> +   be passed through registers.  The mode of register is used to
> +   move the content for the assignment.
> +
> +   This routine generates code for expression FROM which is BLKmode,
> +   and move the generated content to TO_RTX by su-blocks in SUB_MODE.  */
> +
> +static void
> +move_sub_blocks (rtx to_rtx, tree from, machine_mode sub_mode, bool 
> nontemporal)
> +{
> +  gcc_assert (MEM_P (to_rtx));
> +
> +  HOST_WIDE_INT size = MEM_SIZE (to_rtx).to_constant ();
> +  HOST_WIDE_INT sub_size = GET_MODE_SIZE (sub_mode).to_constant ();
> +  HOST_WIDE_INT len = size / sub_size;
> +
> +  /* It would be not profitable to move through sub-modes, if the size does
> + not meet register mode.  */
> +  if ((size % sub_size) != 0)
> +{
> +  push_temp_slots ();
> +  rtx result = store_expr (from, to_rtx, 0, nontemporal, false);
> +  prese

Re: [PATCH] libiberty: fix memory leak in pex-win32.c and refactor

I forgot to mention that:

1) The CreateProcess documentation

https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessa

doesn't mention anything about taking ownership of this or any other buffer
passed to it.

2) The cmdline buffer gets created by the argv_to_cmdline function

https://github.com/gcc-mirror/gcc/blob/master/libiberty/pex-win32.c#L339

which has this comment right above it:

/* Return a Windows command-line from ARGV.  It is the caller's
   responsibility to free the string returned.  */

Thanks,
Costas

On Thu, 2 Mar 2023 at 07:32, Richard Biener 
wrote:

> On Wed, Mar 1, 2023 at 7:14 PM Costas Argyris via Gcc-patches
>  wrote:
> >
> > Hi
> >
> > It seems that the win32_spawn function in libiberty/pex-win32.c is
> leaking
> > the cmdline buffer in 2/3 exit scenarios (it is only free'd in 1/3).
> The
> > problem here is that the cleanup code is written 3 times, one at each
> exit
> > scenario.
> >
> > The proposed attached refactoring has the cleanup code appearing just
> once
> > and is executed for all exit scenarios, reducing the likelihood of such
> > leaks in the future.
>
> One could imagine that CreateProcess in case of success takes ownership of
> the buffer pointed to by cmdline?  If you can confirm it is not then the
> patch
> looks OK to me.
>
> Thanks,
> Richard.
>
> > Thanks,
> > Costas
>


Re: [PATCH 1/2] gcov: Fix "do-while" structure in case statement leads to incorrect code coverage [PR93680]

On Thu, Mar 2, 2023 at 3:31 AM Xionghu Luo via Gcc-patches
 wrote:
>
> When spliting edge with self loop, the split edge should be placed just next 
> to
> the edge_in->src, otherwise it may generate different position latch bbs for
> two consecutive self loops.  For details, please refer to:
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93680#c4
>
> Regression tested pass on x86_64-linux-gnu and aarch64-linux-gnu, OK for
> master?
>
> gcc/ChangeLog:
>
> PR gcov/93680
> * tree-cfg.cc (split_edge_bb_loc): Return edge_in->src for self loop.
>
> gcc/testsuite/ChangeLog:
>
> PR gcov/93680
> * gcc.misc-tests/gcov-pr93680.c: New test.
>
> Signed-off-by: Xionghu Luo 
> ---
>  gcc/testsuite/gcc.misc-tests/gcov-pr93680.c | 24 +
>  gcc/tree-cfg.cc |  2 +-
>  2 files changed, 25 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
>
> diff --git a/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c 
> b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
> new file mode 100644
> index 000..b2bf9e626fc
> --- /dev/null
> +++ b/gcc/testsuite/gcc.misc-tests/gcov-pr93680.c
> @@ -0,0 +1,24 @@
> +/* { dg-options "-fprofile-arcs -ftest-coverage" } */
> +/* { dg-do run { target native } } */
> +
> +int f(int s, int n)
> +{
> +  int p = 0;
> +
> +  switch (s)
> +  {
> +case 0: /* count(5) */
> +  do { p++; } while (--n); /* count(5) */
> +  return p; /* count(1) */
> +
> +case 1: /* count(5) */
> +  do { p++; } while (--n); /* count(5) */
> +  return p; /* count(1) */
> +  }
> +
> +  return 0;
> +}
> +
> +int main() { f(0, 5); f(1, 5); return 0; }
> +
> +/* { dg-final { run-gcov gcov-pr93680.c } } */
> diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
> index a9fcc7fd050..6fa1d83d366 100644
> --- a/gcc/tree-cfg.cc
> +++ b/gcc/tree-cfg.cc
> @@ -3009,7 +3009,7 @@ split_edge_bb_loc (edge edge_in)
>if (dest_prev)
>  {
>edge e = find_edge (dest_prev, dest);
> -  if (e && !(e->flags & EDGE_COMPLEX))
> +  if ((e && !(e->flags & EDGE_COMPLEX)) || edge_in->src == edge_in->dest)

I think this should eventually apply to all backedge edge_in, correct?
 But of course
we cannot easily test for this here.

Still since this affects ordering in the {next,prev}_bb chain only but not CFG
semantics I wonder how it can affect coverage?  Isn't it only by chance that
this block order survives?

For the case when both edge_in->src has more than one successor and
edge_in->dest has more than one predecessor there isn't any good heuristic
to make printing the blocks in chain order "nice" (well, the backedge
one maybe).

But as said - this order shouldn't have any effect on semantics ...

> return edge_in->src;
>  }
>return dest_prev;
> --
> 2.27.0
>


Re: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment

Fortunately, we won't have aggregates, arrays of vbool*_t in the future.
I think it's not an issue.


juzhe.zh...@rivai.ai
 
From: Richard Biener
Date: 2023-03-02 16:25
To: juzhe.zhong
CC: richard.sandiford; pan2.li; gcc-patches; Pan Li; kito.cheng
Subject: Re: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
On Thu, 2 Mar 2023, juzhe.zh...@rivai.ai wrote:
 
> >> Does the eventual value set by ADJUST_BYTESIZE equal the real number of
> >> bytes loaded by vlm.v and stored by vstm.v (after the appropriate vsetvl)?
> >> Or is the GCC size larger in some cases than the number of bytes
> >> loaded and stored?
> For VNx1BI,VNx2BI,VNx4BI,VNx8BI, we allocate the larger size of memory or 
> stack for register spillling
> according to ADJUST_BYTESIZE. 
> After appropriate vsetvl, VNx1BI is loaded/stored 1/8 of ADJUST_BYTESIZE 
> (vsetvl e8mf8).
> After appropriate vsetvl, VNx2BI is loaded/stored 2/8 of ADJUST_BYTESIZE 
> (vsetvl e8mf2).
> After appropriate vsetvl, VNx4BI is loaded/stored 4/8 of ADJUST_BYTESIZE 
> (vsetvl e8mf4).
> After appropriate vsetvl, VNx8BI is loaded/stored 8/8 of ADJUST_BYTESIZE 
> (vsetvl e8m1).
> 
> Note: except these 4 machine modes, all other machine modes of RVV, 
> ADJUST_BYTESIZE
> are equal to the real number of bytes of load/store instruction that RVV ISA 
> define.
> 
> Well, as I said, it's fine that we allocated larger memory for 
> VNx1BI,VNx2BI,VNx4BI, 
> we can emit appropriate vsetvl to gurantee the correctness in RISC-V backward 
> according 
> to the machine_mode as long as long GCC didn't do the incorrect elimination 
> in middle-end.
> 
> Besides, poly (1,1) is 1/8 of machine vector-length which is already really a 
> small number,
> which is the real number bytes loaded/stored for VNx8BI.
> You can say VNx1BI, VNx2BI, VNx4BI are consuming larger memory than we 
> actually load/stored by appropriate vsetvl
> since they are having same ADJUST_BYTESIZE as VNx8BI. However, I think it's 
> totally fine so far as long as we can
> gurantee the correctness and I think optimizing such memory storage consuming 
> is trivial.
> 
> >> And does it equal the size of the corresponding LLVM machine type?
> 
> Well, for some reason, in case of register spilling, LLVM consume much more 
> memory than GCC.
> And they always do whole register load/store (a single vector register 
> vector-length) for register spilling.
> That's another story (I am not going to talk too much about this since it's a 
> quite ugly implementation). 
> They don't model the types accurately according RVV ISA for register spilling.
> 
> In case of normal load/store like:
> vbool8_t v2 = *(vbool8_t*)in;  *(vbool8_t*)(out + 100) = v2;
> This kind of load/store, their load/stores instructions of codegen are 
> accurate.
> Even though their instructions are accurate for load/store accessing 
> behavior, I am not sure whether size 
> of their machine type is accurate.
> 
> For example, in IR presentation: VNx1BI of GCC is represented as vscale x 1 x 
> i1
>   VNx2BI of GCC is represented as vscale x 2 x i1
> in LLVM IR.
> I am not sure the bytesize of  vscale x 1 x i1 and vscale x 2 x i1.
> I didn't take a deep a look at it.
> 
> I think this question is not that important, no matter whether VNx1BI and 
> VNx2BI are modeled accurately in case of ADUST_BYTESIZE
> in GCC or  vscale x 1 x i1 and vscale x 2 x i1 are modeled accurately in case 
> of  their bytesize,
> I think as long as we can emit appropriate vsetvl + vlm/vsm, it's totally 
> fine for RVV  even though in some case, their memory allocation
> is not accurate in compiler.
 
I'm not sure how it works for variable-length types but isn't
sizeof (vbool8_t) part of the ABI and thus its TYPE_SIZE / GET_MODE_SIZE
are relevant there?  It might of course be that you can never have
these types as part of aggregates, arrays or objects of them address-taken
in which case the issue is moot?
 
Richard.
 
> 
> juzhe.zh...@rivai.ai
>  
> From: Richard Sandiford
> Date: 2023-03-02 00:14
> To: Li\, Pan2
> CC: juzhe.zhong\@rivai.ai; rguenther; gcc-patches; Pan Li; kito.cheng
> Subject: Re: [PATCH] RISC-V: Bugfix for rvv bool mode precision adjustment
> "Li, Pan2"  writes:
> > Thanks all for so much valuable and helpful materials.
> >
> > As I understand (Please help to correct me if any mistake.), for the VNx*BI 
> > (aka, 1, 2, 4, 8, 16, 32, 64),
> > the precision and mode size need to be adjusted as below.
> >
> > Precision size [1, 2, 4, 8, 16, 32, 64]
> > Mode size [1, 1, 1, 1, 2, 4, 8]
> >
> > Given that, if we ignore the self-test failure, only the adjust_precision 
> > part is able to fix the bug I mentioned.
> > The genmode will first get the precision, and then leverage the mode_size = 
> > exact_div / 8 to generate.
> > Meanwhile, it also provides the adjust_mode_size after the mode_size 
> > generation.
> >
> > The riscv parts has the mode_size_adjust already and the value of mode_size 
> > will be overridden by the adjustments.
>  
> Ah, OK! 

[PATCH v4 9/9] riscv: thead: Add support for the XTheadMemPair ISA extension

From: Christoph Müllner 

The XTheadMemPair ISA extension allows to pair two loads or stores:
* th.ldd (2x LD)
* th.lwd (2x LW)
* th.lwud (2x LWU)
* th.sdd (2x SD)
* th.swd (2x SW)

The displacement of these instructions is quite limited:
* Displacement := imm2 << shamt
* imm2 is a 2-bit unsigned value {0..3}
* shamt is 4 for th.ldd/th.sdd and 3 otherwise
But even with this small displacement we can identify many candidates.

The merge of the two loads/stores is realized in form of peephole2
passes that support instruction reordering.
The CFA expansion (save/restore registers on/from stack) is not
processed by the peephole2 pass and, therefore, needs special-treatment.
Many ideas of this patch are inspired by similar/equal approaches
in other backends.

gcc/ChangeLog:

* config.gcc: Add thead.o to RISC-V extra_objs.
* config/riscv/peephole.md: Add mempair peephole passes.
* config/riscv/riscv-protos.h (riscv_split_64bit_move_p): New
prototype.
(th_mempair_operands_p): Likewise.
(th_mempair_order_operands): Likewise.
(th_mempair_prepare_save_restore_operands): Likewise.
(th_mempair_save_restore_regs): Likewise.
(th_mempair_output_move): Likewise.
* config/riscv/riscv.cc (riscv_save_reg): Move code.
(riscv_restore_reg): Move code.
(riscv_for_each_saved_reg): Add code to emit mempair insns.
* config/riscv/t-riscv: Add thead.cc.
* config/riscv/thead.md (*th_mempair_load_2):
New insn.
(*th_mempair_store_2): Likewise.
(*th_mempair_load_extendsidi2): Likewise.
(*th_mempair_load_zero_extendsidi2): Likewise.
* config/riscv/thead.cc: New file.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/xtheadmempair-1.c: New test.
* gcc.target/riscv/xtheadmempair-2.c: New test.
* gcc.target/riscv/xtheadmempair-3.c: New test.

Changes in v4:
- Unify function prefix ("th_")
- Move th_* functions to thead.cc
- Minimize code in CFA code in riscv.cc

Changes in v3:
- Don't emit instructions during peephole2, but emit parallel INSNs
- Add proper checks for the INSN patterns to avoid ICEs or illegal
  instructions reported by the assembler
- Don't insert any `add` instructions
- Rework the constraint handling
- Simplify the output function
- Restructure and simplify CFA processing
- Add debug notes to CFA instructions
- Emit parallel INSNs in the CFA code (same as peephole2)
- Drop tests that target reordering
- Drop tests that are irrelevant (e.g. unrolled loops)
- Add tests for all possible displacements and all instructions
- Add tests for CFA

Signed-off-by: Christoph Müllner 
---
 gcc/config.gcc|   1 +
 gcc/config/riscv/peephole.md  |  56 +++
 gcc/config/riscv/riscv-protos.h   |  14 +
 gcc/config/riscv/riscv.cc |  88 ++--
 gcc/config/riscv/t-riscv  |   4 +
 gcc/config/riscv/thead.cc | 427 ++
 gcc/config/riscv/thead.md |  52 +++
 .../gcc.target/riscv/xtheadmempair-1.c|  98 
 .../gcc.target/riscv/xtheadmempair-2.c|  84 
 .../gcc.target/riscv/xtheadmempair-3.c|  29 ++
 10 files changed, 824 insertions(+), 29 deletions(-)
 create mode 100644 gcc/config/riscv/thead.cc
 create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadmempair-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadmempair-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadmempair-3.c

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c070e6ecd2e..ad0b6d3302a 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -531,6 +531,7 @@ riscv*)
cpu_type=riscv
extra_objs="riscv-builtins.o riscv-c.o riscv-sr.o 
riscv-shorten-memrefs.o riscv-selftests.o riscv-v.o riscv-vsetvl.o"
extra_objs="${extra_objs} riscv-vector-builtins.o 
riscv-vector-builtins-shapes.o riscv-vector-builtins-bases.o"
+   extra_objs="${extra_objs} thead.o"
d_target_objs="riscv-d.o"
extra_headers="riscv_vector.h"
target_gtfiles="$target_gtfiles 
\$(srcdir)/config/riscv/riscv-vector-builtins.cc"
diff --git a/gcc/config/riscv/peephole.md b/gcc/config/riscv/peephole.md
index 0ef0c04410b..67e7046d7e6 100644
--- a/gcc/config/riscv/peephole.md
+++ b/gcc/config/riscv/peephole.md
@@ -38,3 +38,59 @@ (define_peephole2
 {
   operands[5] = GEN_INT (INTVAL (operands[2]) - INTVAL (operands[5]));
 })
+
+;; XTheadMemPair: merge two SI or DI loads
+(define_peephole2
+  [(set (match_operand:GPR 0 "register_operand" "")
+   (match_operand:GPR 1 "memory_operand" ""))
+   (set (match_operand:GPR 2 "register_operand" "")
+   (match_operand:GPR 3 "memory_operand" ""))]
+  "TARGET_XTHEADMEMPAIR
+  && th_mempair_operands_p (operands, true, mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+ (set (match_dup 2) (match_dup 3))])]
+{
+  th_mempair_order_operands (operands, true, mod

[PATCH v4 7/9] riscv: thead: Add support for the XTheadMac ISA extension

From: Christoph Müllner 

The XTheadMac ISA extension provides multiply-accumulate/subtract
instructions:
* mula/mulaw/mulah
* muls/mulsw/mulsh

To benefit from middle-end passes, we expand the following named
patterns in riscv.md (as they are not T-Head-specific):
* maddhisi4
* msubhisi4

gcc/ChangeLog:

* config/riscv/riscv.md (maddhisi4): New expand.
(msubhisi4): New expand.
* config/riscv/thead.md (*th_mula): New pattern.
(*th_mulawsi): New pattern.
(*th_mulawsi2): New pattern.
(*th_maddhisi4): New pattern.
(*th_sextw_maddhisi4): New pattern.
(*th_muls): New pattern.
(*th_mulswsi): New pattern.
(*th_mulswsi2): New pattern.
(*th_msubhisi4): New pattern.
(*th_sextw_msubhisi4): New pattern.

gcc/testsuite/ChangeLog:

* gcc.target/riscv/thead-mula-muls.c: New test.

Co-Developed-by: Xianmiao Qu 
Signed-off-by: Xianmiao Qu 
Signed-off-by: Christoph Müllner 

Changed in v2:
- Add missing prefix in on INSN
---
 gcc/config/riscv/riscv.md |  18 +++
 gcc/config/riscv/thead.md | 121 ++
 .../gcc.target/riscv/xtheadmac-mula-muls.c|  43 +++
 3 files changed, 182 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/xtheadmac-mula-muls.c

diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 5562e5621fa..112c93f733e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -3106,6 +3106,24 @@ (define_expand "extzv"
 FAIL;
 })
 
+(define_expand "maddhisi4"
+  [(set (match_operand:SI 0 "register_operand")
+   (plus:SI
+ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"))
+  (sign_extend:SI (match_operand:HI 2 "register_operand")))
+ (match_operand:SI 3 "register_operand")))]
+  "TARGET_XTHEADMAC"
+)
+
+(define_expand "msubhisi4"
+  [(set (match_operand:SI 0 "register_operand")
+   (minus:SI
+ (match_operand:SI 3 "register_operand")
+ (mult:SI (sign_extend:SI (match_operand:HI 1 "register_operand"))
+  (sign_extend:SI (match_operand:HI 2 "register_operand")]
+  "TARGET_XTHEADMAC"
+)
+
 (include "bitmanip.md")
 (include "sync.md")
 (include "peephole.md")
diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
index 88b6a95e993..ce709ca79a4 100644
--- a/gcc/config/riscv/thead.md
+++ b/gcc/config/riscv/thead.md
@@ -138,3 +138,124 @@ (define_insn "*th_cond_gpr_mov"
th.mveqz\t%0,%z3,%1"
   [(set_attr "type" "condmove")
(set_attr "mode" "")])
+
+;; XTheadMac
+
+(define_insn "*th_mula"
+  [(set (match_operand:X 0 "register_operand" "=r")
+ (plus:X (mult:X (match_operand:X 1 "register_operand" "r")
+ (match_operand:X 2 "register_operand" "r"))
+ (match_operand:X 3 "register_operand" "0")))]
+  "TARGET_XTHEADMAC"
+  "th.mula\\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "")]
+)
+
+(define_insn "*th_mulawsi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI
+ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r"))
+  (match_operand:SI 3 "register_operand" "0"]
+  "TARGET_XTHEADMAC && TARGET_64BIT"
+  "th.mulaw\\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")]
+)
+
+(define_insn "*th_mulawsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+   (match_operand:SI 2 "register_operand" "r"))
+  (match_operand:SI 3 "register_operand" "0")))]
+  "TARGET_XTHEADMAC && TARGET_64BIT"
+  "th.mulaw\\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")]
+)
+
+(define_insn "*th_maddhisi4"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI
+   (mult:SI
+ (sign_extend:SI (match_operand:HI 1 "register_operand" " r"))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" " r")))
+   (match_operand:SI 3 "register_operand" " 0")))]
+  "TARGET_XTHEADMAC"
+  "th.mulah\\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")]
+)
+
+(define_insn "*th_sextw_maddhisi4"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+   (sign_extend:DI
+ (plus:SI
+   (mult:SI
+ (sign_extend:SI (match_operand:HI 1 "register_operand" " r"))
+ (sign_extend:SI (match_operand:HI 2 "register_operand" " r")))
+   (match_operand:SI 3 "register_operand" " 0"]
+  "TARGET_XTHEADMAC && TARGET_64BIT"
+  "th.mulah\\t%0,%1,%2"
+  [(set_attr "type" "imul")
+   (set_attr "mode" "SI")]
+)
+
+(define_insn "*th_muls"
+  [(set (match_operand:X 0 "register_operand" "=r")
+ (minus:X (match_operand:X 3 "register_operand" "0")
+  (mult:X (ma

  1   2   >