[PATCH v5 02/12] LoongArch Port: gcc build

2022-01-21 Thread Chenghua Xu
From: chenglulu 

gcc/

* common/config/loongarch/loongarch-common.cc: New file.
* config/loongarch/genopts/genstr.sh: New file.
* config/loongarch/genopts/loongarch-strings: New file.
* config/loongarch/genopts/loongarch.opt.in: New file.
* config/loongarch/loongarch-str.h: New file.
* config/loongarch/gnu-user.h: New file.
* config/loongarch/linux.h: New file.
* config/loongarch/loongarch-cpu.cc: New file.
* config/loongarch/loongarch-cpu.h: New file.
* config/loongarch/loongarch-def.c: New file.
* config/loongarch/loongarch-def.h: New file.
* config/loongarch/loongarch-driver.cc: New file.
* config/loongarch/loongarch-driver.h: New file.
* config/loongarch/loongarch-opts.cc: New file.
* config/loongarch/loongarch-opts.h: New file.
* config/loongarch/loongarch.opt: New file.
* config/loongarch/t-linux: New file.
* config/loongarch/t-loongarch: New file.
* config.gcc: Add LoongArch support.
* configure.ac: Add LoongArch support.
---
 .../config/loongarch/loongarch-common.cc  |  73 +++
 gcc/config.gcc| 410 -
 gcc/config/loongarch/genopts/genstr.sh|  91 +++
 .../loongarch/genopts/loongarch-strings   |  58 ++
 gcc/config/loongarch/genopts/loongarch.opt.in | 189 ++
 gcc/config/loongarch/gnu-user.h   |  84 +++
 gcc/config/loongarch/linux.h  |  50 ++
 gcc/config/loongarch/loongarch-cpu.cc | 206 +++
 gcc/config/loongarch/loongarch-cpu.h  |  30 +
 gcc/config/loongarch/loongarch-def.c  | 164 +
 gcc/config/loongarch/loongarch-def.h  | 151 +
 gcc/config/loongarch/loongarch-driver.cc  | 187 ++
 gcc/config/loongarch/loongarch-driver.h   |  69 +++
 gcc/config/loongarch/loongarch-opts.cc| 580 ++
 gcc/config/loongarch/loongarch-opts.h |  86 +++
 gcc/config/loongarch/loongarch-str.h  |  57 ++
 gcc/config/loongarch/loongarch.opt| 189 ++
 gcc/config/loongarch/t-linux  |  53 ++
 gcc/config/loongarch/t-loongarch  |  59 ++
 gcc/configure.ac  |  33 +-
 20 files changed, 2814 insertions(+), 5 deletions(-)
 create mode 100644 gcc/common/config/loongarch/loongarch-common.cc
 create mode 100755 gcc/config/loongarch/genopts/genstr.sh
 create mode 100644 gcc/config/loongarch/genopts/loongarch-strings
 create mode 100644 gcc/config/loongarch/genopts/loongarch.opt.in
 create mode 100644 gcc/config/loongarch/gnu-user.h
 create mode 100644 gcc/config/loongarch/linux.h
 create mode 100644 gcc/config/loongarch/loongarch-cpu.cc
 create mode 100644 gcc/config/loongarch/loongarch-cpu.h
 create mode 100644 gcc/config/loongarch/loongarch-def.c
 create mode 100644 gcc/config/loongarch/loongarch-def.h
 create mode 100644 gcc/config/loongarch/loongarch-driver.cc
 create mode 100644 gcc/config/loongarch/loongarch-driver.h
 create mode 100644 gcc/config/loongarch/loongarch-opts.cc
 create mode 100644 gcc/config/loongarch/loongarch-opts.h
 create mode 100644 gcc/config/loongarch/loongarch-str.h
 create mode 100644 gcc/config/loongarch/loongarch.opt
 create mode 100644 gcc/config/loongarch/t-linux
 create mode 100644 gcc/config/loongarch/t-loongarch

diff --git a/gcc/common/config/loongarch/loongarch-common.cc 
b/gcc/common/config/loongarch/loongarch-common.cc
new file mode 100644
index 000..1d6f370a013
--- /dev/null
+++ b/gcc/common/config/loongarch/loongarch-common.cc
@@ -0,0 +1,73 @@
+/* Common hooks for LoongArch.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+#include "diagnostic-core.h"
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE loongarch_option_optimization_table
+
+/* Set default optimization options.  */
+static const struct default_options loongarch_option_optimization_table[] =
+{
+  { OPT_LEVELS_ALL, OPT_fasynchronous_unwind_tables, NULL, 1 },
+  { OPT_LEVELS_NONE, 0, NULL, 0 }
+};
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+loongarch_

[PATCH v5 10/12] LoongArch Port: libgomp

2022-01-21 Thread Chenghua Xu
From: chenglulu 

libgomp/

* configure.tgt: Add LoongArch triplet.
---
 libgomp/configure.tgt | 4 
 1 file changed, 4 insertions(+)

diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index d4f1e741b5a..2cd7272fcd8 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -56,6 +56,10 @@ if test x$enable_linux_futex = xyes; then
config_path="linux/ia64 linux posix"
;;
 
+loongarch*-*-linux*)
+   config_path="linux posix"
+   ;;
+
 mips*-*-linux*)
config_path="linux/mips linux posix"
;;
-- 
2.27.0



[PATCH v5 09/12] LoongArch Port: Regenerate libgcc/configure.

2022-01-21 Thread Chenghua Xu
From: chenglulu 

---
 libgcc/configure | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/libgcc/configure b/libgcc/configure
index 4919a56f518..ce04c4f529f 100755
--- a/libgcc/configure
+++ b/libgcc/configure
@@ -2412,6 +2412,9 @@ case "${host}" in
# sets the default TLS model and affects inlining.
PICFLAG=-fPIC
;;
+loongarch*-*-*)
+   PICFLAG=-fpic
+   ;;
 mips-sgi-irix6*)
# PIC is the default.
;;
@@ -5066,7 +5069,7 @@ $as_echo "$libgcc_cv_cfi" >&6; }
 # word size rather than the address size.
 cat > conftest.c <

[PATCH v5 07/12] LoongArch Port: Builtin macros.

2022-01-21 Thread Chenghua Xu
From: chenglulu 

gcc/

*config/loongarch/loongarch-c.cc
---
 gcc/config/loongarch/loongarch-c.cc | 109 
 1 file changed, 109 insertions(+)
 create mode 100644 gcc/config/loongarch/loongarch-c.cc

diff --git a/gcc/config/loongarch/loongarch-c.cc 
b/gcc/config/loongarch/loongarch-c.cc
new file mode 100644
index 000..7ca0649c78b
--- /dev/null
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -0,0 +1,109 @@
+/* LoongArch-specific code for C family languages.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "c-family/c-common.h"
+#include "cpplib.h"
+
+#define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM)
+#define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_assert(TXT) cpp_assert (pfile, TXT)
+
+/* Define preprocessor macros for the -march and -mtune options.
+   PREFIX is either _LOONGARCH_ARCH or _LOONGARCH_TUNE, INFO is
+   the selected processor.  If INFO's canonical name is "foo",
+   define PREFIX to be "foo", and define an additional macro
+   PREFIX_FOO.  */
+#define LARCH_CPP_SET_PROCESSOR(PREFIX, CPU_TYPE)  \
+  do   \
+{  \
+  char *macro, *p; \
+  int cpu_type = (CPU_TYPE);   \
+   \
+  macro = concat ((PREFIX), "_",   \
+ loongarch_cpu_strings[cpu_type], NULL);   \
+  for (p = macro; *p != 0; p++)\
+   *p = TOUPPER (*p);  \
+   \
+  builtin_define (macro);  \
+  builtin_define_with_value ((PREFIX), \
+loongarch_cpu_strings[cpu_type], 1);   \
+  free (macro);\
+}  \
+  while (0)
+
+void
+loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+{
+  builtin_assert ("machine=loongarch");
+  builtin_assert ("cpu=loongarch");
+  builtin_define ("__loongarch__");
+
+  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_ARCH", __ACTUAL_ARCH);
+  LARCH_CPP_SET_PROCESSOR ("_LOONGARCH_TUNE", __ACTUAL_TUNE);
+
+  /* Base architecture / ABI.  */
+  if (TARGET_64BIT)
+{
+  builtin_define ("__loongarch_grlen=64");
+  builtin_define ("__loongarch64");
+}
+
+  if (TARGET_ABI_LP64)
+{
+  builtin_define ("_ABILP64=3");
+  builtin_define ("_LOONGARCH_SIM=_ABILP64");
+  builtin_define ("__loongarch_lp64");
+}
+
+  /* These defines reflect the ABI in use, not whether the
+ FPU is directly accessible.  */
+  if (TARGET_DOUBLE_FLOAT_ABI)
+builtin_define ("__loongarch_double_float=1");
+  else if (TARGET_SINGLE_FLOAT_ABI)
+builtin_define ("__loongarch_single_float=1");
+
+  if (TARGET_DOUBLE_FLOAT_ABI || TARGET_SINGLE_FLOAT_ABI)
+builtin_define ("__loongarch_hard_float=1");
+  else
+builtin_define ("__loongarch_soft_float=1");
+
+
+  /* ISA Extensions.  */
+  if (TARGET_DOUBLE_FLOAT)
+builtin_define ("__loongarch_frlen=64");
+  else if (TARGET_SINGLE_FLOAT)
+builtin_define ("__loongarch_frlen=32");
+  else
+builtin_define ("__loongarch_frlen=0");
+
+  /* Native Data Sizes.  */
+  builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_FPSET", 32 / MAX_FPRS_PER_FMT);
+  builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32);
+
+}
-- 
2.27.0



[PATCH v5 03/12] LoongArch Port: Regenerate gcc/configure.

2022-01-21 Thread Chenghua Xu
From: chenglulu 

---
 gcc/configure | 66 ++-
 1 file changed, 60 insertions(+), 6 deletions(-)

diff --git a/gcc/configure b/gcc/configure
index bd4d4721868..3823bc4e783 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -5442,6 +5442,9 @@ case "${target}" in
# sets the default TLS model and affects inlining.
PICFLAG_FOR_TARGET=-fPIC
;;
+loongarch*-*-*)
+   PICFLAG_FOR_TARGET=-fpic
+   ;;
 mips-sgi-irix6*)
# PIC is the default.
;;
@@ -7963,6 +7966,9 @@ else
 mips*-*-*)
   enable_fixed_point=yes
   ;;
+loongarch*-*-*)
+  enable_fixed_point=yes
+  ;;
 *)
   { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: fixed-point is not 
supported for this target, ignored" >&5
 $as_echo "$as_me: WARNING: fixed-point is not supported for this target, 
ignored" >&2;}
@@ -19659,7 +19665,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19662 "configure"
+#line 19668 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19765,7 +19771,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19768 "configure"
+#line 19774 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -25548,6 +25554,17 @@ foo:   data8   25
movlr24 = @tprel(foo#)'
tls_as_opt=--fatal-warnings
;;
+  loongarch*-*-*)
+conftest_s='
+   .section .tdata,"awT",@progbits
+x: .word 2
+   .text
+   la.tls.gd $a0,x
+   bl __tls_get_addr'
+   tls_first_major=0
+   tls_first_minor=0
+   tls_as_opt='--fatal-warnings'
+   ;;
   microblaze*-*-*)
 conftest_s='
.section .tdata,"awT",@progbits
@@ -28770,6 +28787,43 @@ $as_echo "#define HAVE_AS_MARCH_ZIFENCEI 1" 
>>confdefs.h
 fi
 
 ;;
+  loongarch*-*-*)
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
.dtprelword support" >&5
+$as_echo_n "checking assembler for .dtprelword support... " >&6; }
+if ${gcc_cv_as_loongarch_dtprelword+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_loongarch_dtprelword=no
+  if test x$gcc_cv_as != x; then
+$as_echo '' > conftest.s
+if { ac_try='$gcc_cv_as $gcc_cv_as_flags 2,18,0 -o conftest.o conftest.s 
>&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then
+   .section .tdata,"awT",@progbits
+x:
+   .word 2
+   .text
+   .dtprelword x+0x8000
+else
+  echo "configure: failed program was" >&5
+  cat conftest.s >&5
+fi
+rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$gcc_cv_as_loongarch_dtprelword" >&5
+$as_echo "$gcc_cv_as_loongarch_dtprelword" >&6; }
+
+if test $gcc_cv_as_loongarch_dtprelword != yes; then
+
+$as_echo "#define HAVE_AS_DTPRELWORD 1" >>confdefs.h
+
+fi
+;;
 s390*-*-*)
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for 
.gnu_attribute support" >&5
 $as_echo_n "checking assembler for .gnu_attribute support... " >&6; }
@@ -28933,11 +28987,11 @@ fi
 ;;
 esac
 
-# Mips and HP-UX need the GNU assembler.
+# Mips, LoongArch and HP-UX need the GNU assembler.
 # Linux on IA64 might be able to use the Intel assembler.
 
 case "$target" in
-  mips*-*-* | *-*-hpux* )
+  mips*-*-* | loongarch*-*-* | *-*-hpux* )
 if test x$gas_flag = xyes \
|| test x"$host" != x"$build" \
|| test ! -x "$gcc_cv_as" \
@@ -29374,8 +29428,8 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
-  | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
+  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | loongarch | 
m32c \
+  | m68k | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | 
sparc \
   | tilegx | tilepro | visium | xstormy16 | xtensa)
 insn="nop"
 ;;
-- 
2.27.0



[PATCH v5 01/12] LoongArch Port: Regenerate configure

2022-01-21 Thread Chenghua Xu
From: chenglulu 

* config/picflag.m4: Default add build option '-fpic' for LoongArch.
* configure: Add LoongArch tuples.
* configure.ac: Like wise.
---
 config/picflag.m4 |  3 +++
 configure | 10 +-
 configure.ac  | 10 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/config/picflag.m4 b/config/picflag.m4
index 8b106f9af88..0aefcf619bf 100644
--- a/config/picflag.m4
+++ b/config/picflag.m4
@@ -44,6 +44,9 @@ case "${$2}" in
# sets the default TLS model and affects inlining.
$1=-fPIC
;;
+loongarch*-*-*)
+   $1=-fpic
+   ;;
 mips-sgi-irix6*)
# PIC is the default.
;;
diff --git a/configure b/configure
index 9c2d7df1bb2..87548f0da96 100755
--- a/configure
+++ b/configure
@@ -3060,7 +3060,7 @@ case "${ENABLE_GOLD}" in
   # Check for target supported by gold.
   case "${target}" in
 i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-| aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*)
+| aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*)
  configdirs="$configdirs gold"
  if test x${ENABLE_GOLD} = xdefault; then
default_ld=gold
@@ -3646,6 +3646,9 @@ case "${target}" in
   i[3456789]86-*-*)
 libgloss_dir=i386
 ;;
+  loongarch*-*-*)
+libgloss_dir=loongarch
+;;
   m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*)
 libgloss_dir=m68hc11
 ;;
@@ -4030,6 +4033,11 @@ case "${target}" in
   wasm32-*-*)
 noconfigdirs="$noconfigdirs ld"
 ;;
+  loongarch*-*-linux*)
+;;
+  loongarch*-*-*)
+noconfigdirs="$noconfigdirs gprof"
+;;
 esac
 
 # If we aren't building newlib, then don't build libgloss, since libgloss
diff --git a/configure.ac b/configure.ac
index 68cc5cc31fe..55362afeeae 100644
--- a/configure.ac
+++ b/configure.ac
@@ -353,7 +353,7 @@ case "${ENABLE_GOLD}" in
   # Check for target supported by gold.
   case "${target}" in
 i?86-*-* | x86_64-*-* | sparc*-*-* | powerpc*-*-* | arm*-*-* \
-| aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-*)
+| aarch64*-*-* | tilegx*-*-* | mips*-*-* | s390*-*-* | loongarch*-*-*)
  configdirs="$configdirs gold"
  if test x${ENABLE_GOLD} = xdefault; then
default_ld=gold
@@ -899,6 +899,9 @@ case "${target}" in
   i[[3456789]]86-*-*)
 libgloss_dir=i386
 ;;
+  loongarch*-*-*)
+libgloss_dir=loongarch
+;;
   m68hc11-*-*|m6811-*-*|m68hc12-*-*|m6812-*-*)
 libgloss_dir=m68hc11
 ;;
@@ -1283,6 +1286,11 @@ case "${target}" in
   wasm32-*-*)
 noconfigdirs="$noconfigdirs ld"
 ;;
+  loongarch*-*-linux*)
+;;
+  loongarch*-*-*)
+noconfigdirs="$noconfigdirs gprof"
+;;
 esac
 
 # If we aren't building newlib, then don't build libgloss, since libgloss
-- 
2.27.0



[PATCH v5 00/12] Add LoongArch support

2022-01-21 Thread Chenghua Xu
The LoongArch architecture (LoongArch) is an Instruction Set
Architecture (ISA) that has a Reduced Instruction Set Computer (RISC)
style.
The documents are on
https://loongson.github.io/LoongArch-Documentation/README-EN.html

The ELF ABI Documents are on:
https://loongson.github.io/LoongArch-Documentation/LoongArch-ELF-ABI-EN.html

The binutils has been merged into trunk:
https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=560b3fe208255ae909b4b1c88ba9c28b09043307

Note: We split -mabi= into -mabi=lp64d/f/s, the new options not support by 
upstream binutils yet,
this GCC port requires the following patch applied to binutils to build.
https://github.com/loongson/binutils-gdb/commit/aacb0bf860f02aa5a7dcb76dd0e392bf871c7586
(will be submitted to upstream after gcc side comfirmed)


We have compiled more than 300 CLFS packages with this compiler.
The CLFS are currently used on Cfarm machines gcc400 and gcc401.

We know it is stage4, I think it is ok for a new prot.
The kernel side upstream waiting for a approval by gcc side,
if it is blocked by stage4, a approval for GCC13 will be appreciation.


Thanks.



changelog:

v1 -> v2
1. Split patch set.
2. Change some code style.
3. Add -mabi=lp64d/f/s options.
4. Change GLIBC_DYNAMIC_LINKER_LP64 name.

v2 -> v3
1. Change some code style.
2. Bug fix.

v3 -> v4
1. Change some code style.
2. Bug fix.
3. Delete some builtin macros.

v4 -> v5
1. delete wrong insn zero_extendsidi2_internal.
2. Adjust some build options.
3. Change some .c files to .cc.


*** BLURB HERE ***

chenglulu (12):
  LoongArch Port: Regenerate configure
  LoongArch Port: gcc build
  LoongArch Port: Regenerate gcc/configure.
  LoongArch Port: Machine Decsription files.
  LoongArch Port: Machine description C files and .h files.
  LoongArch Port: Builtin functions.
  LoongArch Port: Builtin macros.
  LoongArch Port: libgcc
  LoongArch Port: Regenerate libgcc/configure.
  LoongArch Port: libgomp
  LoongArch Port: gcc/testsuite
  LoongArch Port: Add doc.

 config/picflag.m4 |3 +
 configure |   10 +-
 configure.ac  |   10 +-
 contrib/config-list.mk|5 +-
 .../config/loongarch/loongarch-common.cc  |   73 +
 gcc/config.gcc|  410 +-
 gcc/config/host-linux.cc  |2 +
 gcc/config/loongarch/constraints.md   |  212 +
 gcc/config/loongarch/generic.md   |  132 +
 gcc/config/loongarch/genopts/genstr.sh|   91 +
 .../loongarch/genopts/loongarch-strings   |   58 +
 gcc/config/loongarch/genopts/loongarch.opt.in |  189 +
 gcc/config/loongarch/gnu-user.h   |   84 +
 gcc/config/loongarch/la464.md |  132 +
 gcc/config/loongarch/larchintrin.h|  413 ++
 gcc/config/loongarch/linux.h  |   50 +
 gcc/config/loongarch/loongarch-builtins.cc|  511 ++
 gcc/config/loongarch/loongarch-c.cc   |  109 +
 gcc/config/loongarch/loongarch-cpu.cc |  206 +
 gcc/config/loongarch/loongarch-cpu.h  |   30 +
 gcc/config/loongarch/loongarch-def.c  |  164 +
 gcc/config/loongarch/loongarch-def.h  |  151 +
 gcc/config/loongarch/loongarch-driver.cc  |  187 +
 gcc/config/loongarch/loongarch-driver.h   |   69 +
 gcc/config/loongarch/loongarch-ftypes.def |  106 +
 gcc/config/loongarch/loongarch-modes.def  |   29 +
 gcc/config/loongarch/loongarch-opts.cc|  580 ++
 gcc/config/loongarch/loongarch-opts.h |   86 +
 gcc/config/loongarch/loongarch-protos.h   |  241 +
 gcc/config/loongarch/loongarch-str.h  |   57 +
 gcc/config/loongarch/loongarch-tune.h |   72 +
 gcc/config/loongarch/loongarch.cc | 6326 +
 gcc/config/loongarch/loongarch.h  | 1271 
 gcc/config/loongarch/loongarch.md | 3702 ++
 gcc/config/loongarch/loongarch.opt|  189 +
 gcc/config/loongarch/predicates.md|  553 ++
 gcc/config/loongarch/sync.md  |  574 ++
 gcc/config/loongarch/t-linux  |   53 +
 gcc/config/loongarch/t-loongarch  |   59 +
 gcc/configure |   66 +-
 gcc/configure.ac  |   33 +-
 gcc/doc/install.texi  |   47 +-
 gcc/doc/invoke.texi   |  201 +
 gcc/doc/md.texi   |   55 +
 gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C|2 +-
 gcc/testsuite/g++.old-deja/g++.abi/ptrmem.C   |2 +-
 gcc/testsuite/g++.old-deja/g++.pt/ptrmem6.C   |2 +-
 gcc/testsuite/gcc.dg/20020312-2.c |2 +
 gcc/testsuite/gcc.dg/loop-8.c |2 +-
 .../torture/stackalign/builtin-apply-2.c  |2 +-
 gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-3.c |2 +-
 .../gcc.target/loongarch/loongarch.exp|   40 +
 .../gcc.target/loongarch/tst-asm-const.c

[PATCH] fortran: Extend -fconvert= option for ppc64le r16_ieee and r16_ibm

2022-01-21 Thread Jakub Jelinek via Gcc-patches
Hi!

This patch on top of the previously posted option handling changes patch
allows specifying -fconvert=swap,r16_ieee etc. (but will error on it
when not on powerpc64le because in the library such swapping is only
implemented for HAVE_REAL_17).

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-01-21  Jakub Jelinek  

* lang.opt (fconvert=): Add EnumSet property and mention also
r16_ieee and r16_ibm arguments.
(big-endian, little-endian, native, swap): Add Set(1) property.
(r16_ieee, r16_ibm): New EnumValue entries with Set(2) property.
* trans-types.cc (gfc_init_kinds): Emit gfc_fatal_error for
-fconvert=r16_ieee or -fconvert=r16_ibm when R16_IEEE <=> R16_IBM
conversions aren't supported.

--- gcc/fortran/lang.opt.jj 2022-01-11 23:49:52.167824673 +0100
+++ gcc/fortran/lang.opt2022-01-21 15:15:15.494099716 +0100
@@ -421,23 +421,29 @@ Fortran
 Produce a warning at runtime if a array temporary has been created for a 
procedure argument.
 
 fconvert=
-Fortran RejectNegative Joined Enum(gfc_convert) Var(flag_convert) 
Init(GFC_FLAG_CONVERT_NATIVE)
--fconvert=   The endianness used for 
unformatted files.
+Fortran RejectNegative Joined Enum(gfc_convert) EnumSet Var(flag_convert) 
Init(GFC_FLAG_CONVERT_NATIVE)
+-fconvert=  The 
endianness used for unformatted files.
 
 Enum
 Name(gfc_convert) Type(enum gfc_convert) UnknownError(Unrecognized option to 
endianness value: %qs)
 
 EnumValue
-Enum(gfc_convert) String(big-endian) Value(GFC_FLAG_CONVERT_BIG)
+Enum(gfc_convert) String(big-endian) Value(GFC_FLAG_CONVERT_BIG) Set(1)
 
 EnumValue
-Enum(gfc_convert) String(little-endian) Value(GFC_FLAG_CONVERT_LITTLE)
+Enum(gfc_convert) String(little-endian) Value(GFC_FLAG_CONVERT_LITTLE) Set(1)
 
 EnumValue
-Enum(gfc_convert) String(native) Value(GFC_FLAG_CONVERT_NATIVE)
+Enum(gfc_convert) String(native) Value(GFC_FLAG_CONVERT_NATIVE) Set(1)
 
 EnumValue
-Enum(gfc_convert) String(swap) Value(GFC_FLAG_CONVERT_SWAP)
+Enum(gfc_convert) String(swap) Value(GFC_FLAG_CONVERT_SWAP) Set(1)
+
+EnumValue
+Enum(gfc_convert) String(r16_ieee) Value(GFC_FLAG_CONVERT_R16_IEEE) Set(2)
+
+EnumValue
+Enum(gfc_convert) String(r16_ibm) Value(GFC_FLAG_CONVERT_R16_IBM) Set(2)
 
 fcray-pointer
 Fortran Var(flag_cray_pointer)
--- gcc/fortran/trans-types.cc.jj   2022-01-18 11:58:59.579982099 +0100
+++ gcc/fortran/trans-types.cc  2022-01-21 20:26:29.438558960 +0100
@@ -527,6 +527,9 @@ gfc_init_kinds (void)
  }
  }
 }
+  else if ((flag_convert & (GFC_CONVERT_R16_IEEE | GFC_CONVERT_R16_IBM)) != 0)
+gfc_fatal_error ("%<-fconvert=r16_ieee%> or %<-fconvert=r16_ibm%> not "
+"supported on this architecture");
 
   /* Choose the default integer kind.  We choose 4 unless the user directs us
  otherwise.  Even if the user specified that the default integer kind is 8,

Jakub



[PATCH] options: Fix up -fsanitize-coverage= [PR104158]

2022-01-21 Thread Jakub Jelinek via Gcc-patches
Hi!

This is incremental patch to fix up -fsanitize-coverage= option
handling, allow -fno-sanitize-coverage= again, allow both
options together in one option or make
-fsanitize-coverage=trace-pc -fsanitize-coverage=trace-cmp
actually enable both suboptions rather than the last one.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2022-01-21  Jakub Jelinek  

PR sanitizer/104158
* common.opt (flag_sanitize_coverage): Remove Variable entry.
(fsanitize-coverage=): Remove RejectNegative property, add
Var(flag_sanitize_coverage) and EnumSet properties.
(trace-pc): Add Set(1) property.
(trace-cmp): Add Set(2) property.
* opts.cc (common_handle_option): Don't handle
OPT_fsanitize_coverage_.

* gcc.dg/spellcheck-options-24.c: New test.
* gcc.dg/sancov/pr104158-1.c: New test.
* gcc.dg/sancov/pr104158-2.c: New test.
* gcc.dg/sancov/pr104158-3.c: New test.
* gcc.dg/sancov/pr104158-4.c: New test.
* gcc.dg/sancov/pr104158-5.c: New test.
* gcc.dg/sancov/pr104158-6.c: New test.
* gcc.dg/sancov/pr104158-7.c: New test.

--- gcc/common.opt.jj   2022-01-18 11:58:58.975990728 +0100
+++ gcc/common.opt  2022-01-21 17:03:46.261364018 +0100
@@ -223,10 +223,6 @@ unsigned int flag_sanitize
 Variable
 unsigned int flag_sanitize_recover = (SANITIZE_UNDEFINED | 
SANITIZE_UNDEFINED_NONDEFAULT | SANITIZE_KERNEL_ADDRESS | 
SANITIZE_KERNEL_HWADDRESS) & ~(SANITIZE_UNREACHABLE | SANITIZE_RETURN)
 
-; What the coverage sanitizers should instrument
-Variable
-unsigned int flag_sanitize_coverage
-
 ; Flag whether a prefix has been added to dump_base_name
 Variable
 bool dump_base_name_prefixed = false
@@ -1076,17 +1072,17 @@ Common Driver Joined
 Select what to sanitize.
 
 fsanitize-coverage=
-Common Joined RejectNegative Enum(sanitize_coverage)
+Common Joined Enum(sanitize_coverage) Var(flag_sanitize_coverage) EnumSet
 Select type of coverage sanitization.
 
 Enum
 Name(sanitize_coverage) Type(int)
 
 EnumValue
-Enum(sanitize_coverage) String(trace-pc) Value(SANITIZE_COV_TRACE_PC)
+Enum(sanitize_coverage) String(trace-pc) Value(SANITIZE_COV_TRACE_PC) Set(1)
 
 EnumValue
-Enum(sanitize_coverage) String(trace-cmp) Value(SANITIZE_COV_TRACE_CMP)
+Enum(sanitize_coverage) String(trace-cmp) Value(SANITIZE_COV_TRACE_CMP) Set(2)
 
 fasan-shadow-offset=
 Common Joined RejectNegative Var(common_deferred_options) Defer
--- gcc/opts.cc.jj  2022-01-20 11:30:45.595577895 +0100
+++ gcc/opts.cc 2022-01-21 19:44:56.770505499 +0100
@@ -2621,10 +2621,6 @@ common_handle_option (struct gcc_options
  &= ~(SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT);
   break;
 
-case OPT_fsanitize_coverage_:
-  opts->x_flag_sanitize_coverage = value;
-  break;
-
 case OPT_O:
 case OPT_Os:
 case OPT_Ofast:
--- gcc/testsuite/gcc.dg/spellcheck-options-24.c.jj 2022-01-21 
20:14:19.549779106 +0100
+++ gcc/testsuite/gcc.dg/spellcheck-options-24.c2022-01-21 
20:14:38.12955 +0100
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-pc,tracecmp" } */
+
+/* { dg-error "unrecognized argument in option 
'-fsanitize-coverage=trace-pc,tracecmp'" "" { target *-*-* } 0 } */
+/* { dg-message "valid arguments to '-fsanitize-coverage=' are: trace-cmp 
trace-pc; did you mean 'trace-cmp'?" "" { target *-*-* } 0 } */
--- gcc/testsuite/gcc.dg/sancov/pr104158-1.c.jj 2022-01-21 20:08:17.878843831 
+0100
+++ gcc/testsuite/gcc.dg/sancov/pr104158-1.c2022-01-21 20:03:48.824612895 
+0100
@@ -0,0 +1,11 @@
+/* PR sanitizer/104158 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-cmp -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump "__sanitizer_cov_trace_cmp" "optimized" } } */
+/* { dg-final { scan-tree-dump-not "__sanitizer_cov_trace_pc" "optimized" } } 
*/
+
+int
+foo (int a, int b)
+{
+  return a == b;
+}
--- gcc/testsuite/gcc.dg/sancov/pr104158-2.c.jj 2022-01-21 20:08:17.879843817 
+0100
+++ gcc/testsuite/gcc.dg/sancov/pr104158-2.c2022-01-21 20:04:18.116201478 
+0100
@@ -0,0 +1,11 @@
+/* PR sanitizer/104158 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-pc -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-not "__sanitizer_cov_trace_cmp" "optimized" } } 
*/
+/* { dg-final { scan-tree-dump "__sanitizer_cov_trace_pc" "optimized" } } */
+
+int
+foo (int a, int b)
+{
+  return a == b;
+}
--- gcc/testsuite/gcc.dg/sancov/pr104158-3.c.jj 2022-01-21 20:08:17.880843803 
+0100
+++ gcc/testsuite/gcc.dg/sancov/pr104158-3.c2022-01-21 20:05:42.747016252 
+0100
@@ -0,0 +1,11 @@
+/* PR sanitizer/104158 */
+/* { dg-do compile } */
+/* { dg-options "-fsanitize-coverage=trace-cmp,trace-pc -fdump-tree-optimized" 
} */
+/* { dg-final { scan-tree-dump "__sanitizer_cov_trace_cmp" "optimized" } } */
+/* { dg-final { scan-tree-dump "__sanitizer_cov_trace_pc" "optimized" } } */
+
+int
+foo (int a, int b)
+{
+  

[PATCH] options: Add EnumSet and Set property support [PR104158]

2022-01-21 Thread Jakub Jelinek via Gcc-patches
Hi!

The following patch is infrastructure support for at least 3 different
options that need changes:
1) PR104158 talks about a regression with the -fsanitizer-coverage=
   option; in GCC 11 and older and on trunk prior to r12-1177, this
   option behaved similarly to -f{,no-}sanitizer{,-recover}= options,
   namely that the option allows negative and argument of the option
   is a list of strings, each of them has some enumerator and
   -fsanitize-coverage= enabled those bits in the underlying
   flag_sanitize_coverage, while -fno-sanitize-coverage= disabled them.
   So, -fsanitize-coverage=trace-pc,trace-cmp was equivalent to
   -fsanitize-coverage=trace-pc -fsanitize-coverage=trace-cmp and both
   set flag_sanitize_coverage to
   (SANITIZE_COV_TRACE_PC | SANITIZE_COV_TRACE_CMP)
   Also, e.g.
   -fsanitize-coverage=trace-pc,trace-cmp -fno-sanitize-coverage=trace-pc
   would in the end set flag_sanitize_coverage to
   SANITIZE_COV_TRACE_CMP (first set both bits, then subtract one)
   The r12-1177 change, I think done to improve argument misspelling
   diagnostic, changed the option incompatibly in multiple ways,
   -fno-sanitize-coverage= is now rejected, only a single argument
   is allowed, not multiple and
   -fsanitize-coverage=trace-pc -fsanitize-coverage=trace-cmp
   enables just SANITIZE_COV_TRACE_CMP and not both (each option
   overrides the previous value)
2) Thomas Koenig wants to extend Fortran -fconvert= option for the
   ppc64le real(kind=16) swapping support; currently the option
   accepts -fconvert={native,swap,big-endian,little-endian} and the
   intent is to add support for -fconvert=r16_ibm and -fconvert=r16_ieee
   (that alone is just normal Enum), but also to handle
   -fconvert=swap,r16_ieee or -fconvert=r16_ieee,big-endian but not
   -fconvert=big-endian,little-endian - the
   native/swap/big-endian/little-endian are one mutually exclusive set
   and r16_ieee/r16_ibm another one.
   See https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587943.html
   and thread around that.
3) Similarly Marek Polacek wants to extend the -Wbidi-chars= option,
   such that it will handle not just the current
   -Wbidi-chars={none,bidirectional,any}, but also -Wbidi-chars=ucn
   and bidirectional,ucn and ucn,any etc.  Again two separate sets,
   one none/bidirectional/any and another one ucn.
   See https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588960.html

The following patch adds framework for this and I'll post incremental
patches for 1) and 2).
As I've tried to document, such options are marked by additional
EnumSet property on the option and in that case all the EnumValues
in the Enum referenced from it must use a new Set property with set
number (initially I wanted just mark last enumerator in each mutually
exclusive set, but optionlist is sorted and so it doesn't really work
well).  So e.g. for the Fortran -fconvert=, one specifies:
fconvert=
Fortran RejectNegative Joined Enum(gfc_convert) EnumSet Var(flag_convert) 
Init(GFC_FLAG_CONVERT_NATIVE)
-fconvert=  The 
endianness used for unformatted files.

Enum
Name(gfc_convert) Type(enum gfc_convert) UnknownError(Unrecognized option to 
endianness value: %qs)

EnumValue
Enum(gfc_convert) String(big-endian) Value(GFC_FLAG_CONVERT_BIG) Set(1)

EnumValue
Enum(gfc_convert) String(little-endian) Value(GFC_FLAG_CONVERT_LITTLE) Set(1)

EnumValue
Enum(gfc_convert) String(native) Value(GFC_FLAG_CONVERT_NATIVE) Set(1)

EnumValue
Enum(gfc_convert) String(swap) Value(GFC_FLAG_CONVERT_SWAP) Set(1)

EnumValue
Enum(gfc_convert) String(r16_ieee) Value(GFC_FLAG_CONVERT_R16_IEEE) Set(2)

EnumValue
Enum(gfc_convert) String(r16_ibm) Value(GFC_FLAG_CONVERT_R16_IBM) Set(2)

and this says to the option handling code that
1) if only one arg is specified to one instance of the option, it can be any
of those 6
2) if two args are specified, one has to be from the first 4 and another
from the last 2, in any order
3) at most 2 args may be specified (there are just 2 sets)

There is a requirement on the Value values checked in self-test, the
values from one set ored together must be disjunct from values from
another set ored together.  In the Fortran case, the first 4 are 0-3
so mask is 3, and the last 2 are 4 and 8, so mask is 12.
When say -fconvert=big-endian is specified, it sets the first set
to GFC_FLAG_CONVERT_BIG (2) but doesn't modify whatever value the
other set had, so e.g.
-fconvert=big-endian -fconvert=r16_ieee
-fconvert=r16_ieee -fconvert=big-endian
-fconvert=r16_ieee,big_endian
-fconvert=big_endian,r16_ieee
all behave the same.

Also, with the EnumSet support, it is now possible to allow
not specifying RejectNegative - we can set some set's value and
then clear it and set it again to some other value etc.

I think with the 2) patch I achieve what we want for Fortran, for 1)
the only behavior from gcc 11 is that
-fsanitize-coverage=trace-cmp,trace-cmp is now rejected.
This is mainly from the desire to disallow
-fconvert=big-endian,little-endian o

Re: [PATCH] libgccjit: Add support for bitcasts [PR104071]

2022-01-21 Thread Antoni Boucher via Gcc-patches
Hi.
Here's the updated patch.

See comments below.

Le mardi 18 janvier 2022 à 18:06 -0500, David Malcolm a écrit :
> On Mon, 2022-01-17 at 19:30 -0500, Antoni Boucher via Gcc-patches
> wrote:
> > I was missing the define, so I added it.
> > Here's the new patch with it.
> 
> Thanks for the patch.
> 
> > Le lundi 17 janvier 2022 à 17:18 -0500, Antoni Boucher via Jit a
> > écrit :
> > > Hi.
> > > This patch add support for bitcasts in libgccjit.
> > > 
> > > It passes the JIT tests, but since I added a function in tree.c,
> > > I
> > > wonder if I should run the whole testsuite.
> 
> We're in stage 4 for GCC 12 now, so we need to be especially careful
> and conservative about every change.  A strict reading on the rules
> is
> that we shouldn't be adding new features - but if they're confined to
> libgccjit we may be able to get release manager approval.

Ok, if the 4 patches currently being reviewed (and listed here:
https://github.com/antoyo/libgccjit-patches) were included in gcc 12,
I'd be able to build rustc_codegen_gcc with an unpatched gcc.

It is to be noted however, that I'll need more patches for future work.
Off the top of my head, I'll at least need a patch for the inline
attribute, try/catch and target-specific builtins.
The last 2 features will probably take some time to implement, so I'll
let you judge if you think it's worth merging the 4 patches currently
being reviewed for gcc 12.

> 
> > > 
> > > David, you can now disregard my question in my email about 128-
> > > bit
> > > integers regarding my issue with initialize_sizetypes being
> > > called
> > > multiple times because this patch fix this issue.
> > > I turns out there was a cache of types that needed to be cleared
> > > when
> > > you initialize the JIT.
> > > 
> > > The check for sizes is pending, because it requires the updates
> > > to
> > > get_size I made in my patch for 128-bit integers.
> 
> Sorry, I seem to have mislaid that patch; do you have the "Subject"
> line handy?

I recently sent an email with that patch updated, but here's the
subject line:
[PATCH] libgccjit: Add support for sized integer types, including 128-
bit integers [PR95325]

> 
> Do you have a list of the patches I need to review?

Yes, on this repo:
https://github.com/antoyo/libgccjit-patches

They are outdated but I can update them if you want.

> 
> As for this patch, overall I like it, but there are various nits...
> 
> > > 
> > > Thanks for the review!
> 
> > 2022-01-17  Antoni Boucher 
> > 
> > gcc/jit/
> > PR target/104071
> 
> Should be "jit" rather than "target".
> 
> Various source files are now .cc rather than .c after yesterday's big
> renaming.
> 
> > * docs/topics/compatibility.rst (LIBGCCJIT_ABI_20): New ABI
> > tag.
> > * docs/topics/expressions.rst: Add documentation for the
> > function gcc_jit_context_new_bitcast.
> > * dummy-frontend.c: clear the cache of non-standard integer
> > types to avoid having issues with some optimizations of
> > bitcast where the SSA_NAME will have a size of a cached
> > integer type that should have been invalidated, causing a
> > comparison of integer constant to fail.
> > * jit-playback.c: New function (new_bitcast).
> > * jit-playback.h: New function (new_bitcast).
> > * jit-recording.c: New functions (new_bitcast,
> > bitcast::replay_into, bitcast::visit_children,
> > bitcast::make_debug_string, bitcast::write_reproducer).
> > * jit-recording.h: New calss (bitcast) and new function
> > (new_bitcast, bitcast::replay_into,
> > bitcast::visit_children,
> > bitcast::make_debug_string, bitcast::write_reproducer,
> > bitcast::get_precedence).
> > * libgccjit.c: New function (gcc_jit_context_new_bitcast)
> > * libgccjit.h: New function (gcc_jit_context_new_bitcast)
> > * libgccjit.map (LIBGCCJIT_ABI_20): New ABI tag.
> > 
> > gcc/testsuite/
> > PR target/104071
> > * jit.dg/all-non-failing-tests.h: Add new test-bitcast.
> > * jit.dg/test-bitcast.c: New test.
> > 
> > gcc/
> > PR target/104071
> > * tree.c: New function
> > (clear_nonstandard_integer_type_cache).
> > * tree.h: New function
> > (clear_nonstandard_integer_type_cache).
> > ---
> >  gcc/jit/docs/topics/compatibility.rst    |  9 +++
> >  gcc/jit/docs/topics/expressions.rst  | 17 +
> >  gcc/jit/dummy-frontend.c |  2 +
> >  gcc/jit/jit-playback.c   | 13 
> >  gcc/jit/jit-playback.h   |  5 ++
> >  gcc/jit/jit-recording.c  | 66
> > 
> >  gcc/jit/jit-recording.h  | 32 ++
> >  gcc/jit/libgccjit.c  | 28 +
> >  gcc/jit/libgccjit.h  | 15 +
> >  gcc/jit/libgccjit.map    |  6 ++
> >  gcc/testsuite/jit.dg/all-non-failing-tests.

Re: Ping: [PATCH] PR 103763, Fix fold-vec-splat-floatdouble on power10.

2022-01-21 Thread David Edelsohn via Gcc-patches
On Fri, Jan 21, 2022 at 2:56 PM Michael Meissner  wrote:
>
> Ping patch
> https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587924.html
>
> | Date: Fri, 7 Jan 2022 16:05:53 -0500
> | From: Michael Meissner 
> | Subject: [PATCH] PR 103763, Fix fold-vec-splat-floatdouble on power10.
> | Message-ID: 

This patch is okay.

Thanks, David


Re: [PATCH v3 07/15] arm: Implement MVE predicates as vectors of booleans

2022-01-21 Thread Christophe Lyon via Gcc-patches
Hi Andre,

On Fri, Jan 21, 2022 at 12:23 PM Andre Vieira (lists) via Gcc-patches <
gcc-patches@gcc.gnu.org> wrote:

> Hi Christophe,
>
> On 13/01/2022 14:56, Christophe Lyon via Gcc-patches wrote:
> > diff --git a/gcc/config/arm/arm-simd-builtin-types.def
> b/gcc/config/arm/arm-simd-builtin-types.def
> > index 6ba6f211531..920c2a68e4c 100644
> > --- a/gcc/config/arm/arm-simd-builtin-types.def
> > +++ b/gcc/config/arm/arm-simd-builtin-types.def
> > @@ -51,3 +51,7 @@
> > ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
> > ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
> > ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
> > +
> > +  ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
> > +  ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
> > +  ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)
>
> I'm trying to lower masked loads and when I tried to use the
> arm_simd_types[Pred1x16_t].itype as the mask type I noticed the
> TYPE_SIZE of that is 256, rather than the expected 16. Instead I used
> truth_type_for (arm_simd_types[Uint8x16_t].itype) and that gives me a
> compatible vector of booleans. So the itype for Pred1x16_t seems wrong
> to me.
>
>  How about:
ENTRY (Pred1x16_t, V16BI, predicate, 16, pred1, 21)
ENTRY (Pred2x8_t, V8BI, predicate, 8, pred1, 21)
ENTRY (Pred4x4_t, V4BI, predicate, 4, pred1, 21)

Christophe


Re: [PATCH v2] x86: Properly disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Jakub Jelinek via Gcc-patches
On Fri, Jan 21, 2022 at 01:57:57PM -0800, H.J. Lu wrote:
> Revert x86 changes in
> 
> commit c163647ffbc9a20c8feb6e079dbecccfe016c82e
> Author: Soren Tempel 
> Date:   Fri Jan 21 19:22:46 2022 +
> 
> Disable -fsplit-stack support on non-glibc targets
> 
> and change ix86_supports_split_stack to return true only on glibc.
> 
>   PR bootstrap/104170
>   * common/config/i386/i386-common.cc (ix86_supports_split_stack):
>   Return true only on glibc.
>   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN):
>   Revert commit c163647ffbc.
>   * config/i386/gnu.h (TARGET_LIBC_PROVIDES_SSP): Likewise.

Ok, thanks.

>  gcc/common/config/i386/i386-common.cc | 17 +++--
>  gcc/config/i386/gnu-user-common.h |  5 ++---
>  gcc/config/i386/gnu.h |  5 +
>  3 files changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/gcc/common/config/i386/i386-common.cc 
> b/gcc/common/config/i386/i386-common.cc
> index cae0b880d79..7496d179892 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -1714,16 +1714,21 @@ ix86_option_init_struct (struct gcc_options *opts)
> field in the TCB, so they cannot be used together.  */
>  
>  static bool
> -ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> +ix86_supports_split_stack (bool report,
>  struct gcc_options *opts ATTRIBUTE_UNUSED)
>  {
> +#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
> +  if (opts->x_linux_libc != LIBC_GLIBC)
> +#endif
> +{
> +  if (report)
> + error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
> +  return false;
> +}
> +
>bool ret = true;
>  
> -#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
> -  if (report)
> -error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
> -  ret = false;
> -#else
> +#ifdef TARGET_THREAD_SPLIT_STACK_OFFSET
>if (!HAVE_GAS_CFI_PERSONALITY_DIRECTIVE)
>  {
>if (report)
> diff --git a/gcc/config/i386/gnu-user-common.h 
> b/gcc/config/i386/gnu-user-common.h
> index 7525f788a9c..23b54c5be52 100644
> --- a/gcc/config/i386/gnu-user-common.h
> +++ b/gcc/config/i386/gnu-user-common.h
> @@ -66,8 +66,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define STACK_CHECK_STATIC_BUILTIN 1
>  
>  /* We only build the -fsplit-stack support in libgcc if the
> -   assembler has full support for the CFI directives and
> -   targets glibc.  */
> -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC
> +   assembler has full support for the CFI directives.  */
> +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
>  #define TARGET_CAN_SPLIT_STACK
>  #endif
> diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
> index daa505a5d45..401e60c9a02 100644
> --- a/gcc/config/i386/gnu.h
> +++ b/gcc/config/i386/gnu.h
> @@ -35,10 +35,7 @@ along with GCC.  If not, see 
> .
> crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
>  #endif
>  
> -/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> -   field is only available for glibc.  Disable -fsplit-stack for
> -   other libc implementations to avoid silent TCB corruptions.  */
> -#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
> +#ifdef TARGET_LIBC_PROVIDES_SSP
>  
>  /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
>  #define TARGET_THREAD_SSP_OFFSET0x14
> -- 
> 2.34.1

Jakub



[PATCH v2] x86: Properly disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread H.J. Lu via Gcc-patches
On Fri, Jan 21, 2022 at 10:42:03PM +0100, Jakub Jelinek wrote:
> On Fri, Jan 21, 2022 at 01:31:32PM -0800, H.J. Lu wrote:
> > On Fri, Jan 21, 2022 at 09:18:41PM +0100, Jakub Jelinek via Gcc-patches 
> > wrote:
> > > On Fri, Jan 21, 2022 at 08:16:11PM +0100, soeren--- via Gcc-patches wrote:
> > > > gcc/ChangeLog:
> > > > 
> > > > * common/config/s390/s390-common.c (s390_supports_split_stack):
> > > > Only support split-stack on glibc targets.
> > > > * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): 
> > > > Ditto.
> > > > * config/i386/gnu.h (defined): Ditto.
> > > 
> > > Besides breaking bootstrap, this doesn't do what it talks about:
> > > 
> > > > --- a/gcc/config/i386/gnu.h
> > > > +++ b/gcc/config/i386/gnu.h
> > > > @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> > > > .
> > > > crti.o%s 
> > > > %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
> > > >  #endif
> > > >  
> > > > -#ifdef TARGET_LIBC_PROVIDES_SSP
> > > > +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> > > > +   field is only available for glibc.  Disable -fsplit-stack for
> > > > +   other libc implementations to avoid silent TCB corruptions.  */
> > > > +#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
> > > >  
> > > >  /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
> > > >  #define TARGET_THREAD_SSP_OFFSET0x14
> > > 
> > > Because this doesn't disable just -fsplit-stack support, but also
> > > -fstack-protector*.
> > > Does that one work on musl?
> > > I think common/config/i386/i386-common.c (ix86_supports_split_stack)
> > > should have been changed instead of the config/i386/gnu*.h headers.
> > > 
> > 
> > Like this?
> > 
> > 
> > H.J.
> > ---
> > Revert x86 changes in
> > 
> > commit c163647ffbc9a20c8feb6e079dbecccfe016c82e
> > Author: Soren Tempel 
> > Date:   Fri Jan 21 19:22:46 2022 +
> > 
> > Disable -fsplit-stack support on non-glibc targets
> > 
> > and change ix86_supports_split_stack to return true only on glibc.
> > 
> > PR bootstrap/104170
> > * common/config/i386/i386-common.cc (ix86_supports_split_stack):
> > Return true only on glibc.
> > * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN):
> > Revert commit c163647ffbc.
> > * config/i386/gnu.h (TARGET_LIBC_PROVIDES_SSP): Likewise.
> > ---
> >  gcc/common/config/i386/i386-common.cc | 4 ++--
> >  gcc/config/i386/gnu-user-common.h | 5 ++---
> >  gcc/config/i386/gnu.h | 5 +
> >  3 files changed, 5 insertions(+), 9 deletions(-)
> > 
> > diff --git a/gcc/common/config/i386/i386-common.cc 
> > b/gcc/common/config/i386/i386-common.cc
> > index cae0b880d79..78e6ff730aa 100644
> > --- a/gcc/common/config/i386/i386-common.cc
> > +++ b/gcc/common/config/i386/i386-common.cc
> > @@ -1715,9 +1715,9 @@ ix86_option_init_struct (struct gcc_options *opts)
> >  
> >  static bool
> >  ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> > -  struct gcc_options *opts ATTRIBUTE_UNUSED)
> > +  struct gcc_options *opts)
> >  {
> > -  bool ret = true;
> > +  bool ret = opts->x_linux_libc == LIBC_GLIBC;
> >  
> >  #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
> >if (report)
> 
> Almost.
> I'd think you should honor report and drop ATTRIBUTE_UNUSED from both args.
> 
> So instead:
>   bool ret = true;
> 
>   if (opts->x_linux_libc != LIBC_GLIBC)
> {
>   if (report)
>   error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
>   return false;
> }
> #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
> ...
> 

Here is the v2 patch.


H.J.
---
Revert x86 changes in

commit c163647ffbc9a20c8feb6e079dbecccfe016c82e
Author: Soren Tempel 
Date:   Fri Jan 21 19:22:46 2022 +

Disable -fsplit-stack support on non-glibc targets

and change ix86_supports_split_stack to return true only on glibc.

PR bootstrap/104170
* common/config/i386/i386-common.cc (ix86_supports_split_stack):
Return true only on glibc.
* config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN):
Revert commit c163647ffbc.
* config/i386/gnu.h (TARGET_LIBC_PROVIDES_SSP): Likewise.
---
 gcc/common/config/i386/i386-common.cc | 17 +++--
 gcc/config/i386/gnu-user-common.h |  5 ++---
 gcc/config/i386/gnu.h |  5 +
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index cae0b880d79..7496d179892 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1714,16 +1714,21 @@ ix86_option_init_struct (struct gcc_options *opts)
field in the TCB, so they cannot be used together.  */
 
 static bool
-ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
+ix86_supports_split_stack (bool report,
   struct gcc_options *opts ATTRIBUTE_UNU

Re: [PATCH] x86: Properly disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Jakub Jelinek via Gcc-patches
On Fri, Jan 21, 2022 at 01:31:32PM -0800, H.J. Lu wrote:
> On Fri, Jan 21, 2022 at 09:18:41PM +0100, Jakub Jelinek via Gcc-patches wrote:
> > On Fri, Jan 21, 2022 at 08:16:11PM +0100, soeren--- via Gcc-patches wrote:
> > > gcc/ChangeLog:
> > > 
> > >   * common/config/s390/s390-common.c (s390_supports_split_stack):
> > >   Only support split-stack on glibc targets.
> > >   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> > >   * config/i386/gnu.h (defined): Ditto.
> > 
> > Besides breaking bootstrap, this doesn't do what it talks about:
> > 
> > > --- a/gcc/config/i386/gnu.h
> > > +++ b/gcc/config/i386/gnu.h
> > > @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> > > .
> > > crti.o%s 
> > > %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
> > >  #endif
> > >  
> > > -#ifdef TARGET_LIBC_PROVIDES_SSP
> > > +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> > > +   field is only available for glibc.  Disable -fsplit-stack for
> > > +   other libc implementations to avoid silent TCB corruptions.  */
> > > +#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
> > >  
> > >  /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
> > >  #define TARGET_THREAD_SSP_OFFSET0x14
> > 
> > Because this doesn't disable just -fsplit-stack support, but also
> > -fstack-protector*.
> > Does that one work on musl?
> > I think common/config/i386/i386-common.c (ix86_supports_split_stack)
> > should have been changed instead of the config/i386/gnu*.h headers.
> > 
> 
> Like this?
> 
> 
> H.J.
> ---
> Revert x86 changes in
> 
> commit c163647ffbc9a20c8feb6e079dbecccfe016c82e
> Author: Soren Tempel 
> Date:   Fri Jan 21 19:22:46 2022 +
> 
> Disable -fsplit-stack support on non-glibc targets
> 
> and change ix86_supports_split_stack to return true only on glibc.
> 
>   PR bootstrap/104170
>   * common/config/i386/i386-common.cc (ix86_supports_split_stack):
>   Return true only on glibc.
>   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN):
>   Revert commit c163647ffbc.
>   * config/i386/gnu.h (TARGET_LIBC_PROVIDES_SSP): Likewise.
> ---
>  gcc/common/config/i386/i386-common.cc | 4 ++--
>  gcc/config/i386/gnu-user-common.h | 5 ++---
>  gcc/config/i386/gnu.h | 5 +
>  3 files changed, 5 insertions(+), 9 deletions(-)
> 
> diff --git a/gcc/common/config/i386/i386-common.cc 
> b/gcc/common/config/i386/i386-common.cc
> index cae0b880d79..78e6ff730aa 100644
> --- a/gcc/common/config/i386/i386-common.cc
> +++ b/gcc/common/config/i386/i386-common.cc
> @@ -1715,9 +1715,9 @@ ix86_option_init_struct (struct gcc_options *opts)
>  
>  static bool
>  ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> -struct gcc_options *opts ATTRIBUTE_UNUSED)
> +struct gcc_options *opts)
>  {
> -  bool ret = true;
> +  bool ret = opts->x_linux_libc == LIBC_GLIBC;
>  
>  #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
>if (report)

Almost.
I'd think you should honor report and drop ATTRIBUTE_UNUSED from both args.

So instead:
  bool ret = true;

  if (opts->x_linux_libc != LIBC_GLIBC)
{
  if (report)
error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
  return false;
}
#ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
...

Jakub



Re: [PATCH] c-family: Fix up a -Wformat regression [PR104148]

2022-01-21 Thread Joseph Myers
On Fri, 21 Jan 2022, Jakub Jelinek via Gcc-patches wrote:

> 2022-01-21  Jakub Jelinek  
> 
>   PR c++/104148
>   * c-common.h (check_function_arguments_recurse): Add for_format
>   arg.
>   * c-common.cc (check_function_nonnull): Pass false to
>   check_function_arguments_recurse's last argument.
>   (check_function_arguments_recurse): Add for_format argument,
>   if true, don't stop on warning_suppressed_p.
>   * c-format.cc (check_format_info): Pass true to
>   check_function_arguments_recurse's last argument.
> 
>   * c-c++-common/Wformat-pr104148.c: New test.

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Mark XXSPLTIW/XXSPLTIDP as prefixed -- PR 104136

2022-01-21 Thread Segher Boessenkool
Hi!

On Fri, Jan 21, 2022 at 02:49:26PM -0500, Michael Meissner wrote:
> If you compile module_advect_em.F90 with -Ofast -mcpu=power10, one module
> is large enough that we can't use a single conditional jump to span the
> function.  Instead, GCC has to reverse the condition, and do a conditional
> jump around an unconditional branch.  It turns out when xxspltiw and
> xxspltdp instructions were generated, they were not marked as being
> prefixed (i.e. length of 12 bytes instead of 4 bytes).

(The prefixed insn itself is 8B, but there can be 4B more because
prefixed insns cannot cross 64B boundaries, necessitating an extra nop
insn or other 4B padding).

> This meant the
> calculations for the branch length were off, which in turn meant the
> assembler raised an error because it couldn't do the conditional jump.

That is the most common symptom, yup.  But there are other problems as
well (other correctness problems -- it obviously does not help
performance either).

> The fix is to explicitly set the prefixed attribute when we are loading up
> vector constants with the xxspltiw or xxspltidp instructions.

That attribute should be set on *all* xxsplti{w,dp} insns, and more in
general on all insns that are always prefixed.  The maybe_prefixed
attribute is only for insns for which a porefixed as well as a not
prefixed version exists, the prefixed version with a "p" prefixed to the
mnemonic.

> I have removed the code that sets the prefixed attribute for xxspltiw,
> xxspltidp, and xxsplti32dx instructions, since it no longer will be invoked.

Great cleanup / simplification!

> I have also explicitly set the prefixed attribute for load SF and DF mode
> constants with xxsplitw and xxspltidp.  Previously, it was not set on these
> insns, but when the insn was split to get the XXSPLTIW/XXSPLTIDP forms, those
> forms already had the prefixed attribute set.

So now we have more correct information before the insn is split.  Good.

> -  (eq_attr "type" "vecperm")
> -  (if_then_else (match_test "prefixed_xxsplti_p (insn)")
>  (const_string "yes")
>  (const_string "no"))]

Excellent to see this go :-)

> +   (set_attr "prefixed"
> + "*,  *, *,  *, *, *,
> +  *,  *, *,  *, *, *,
> +  *,  *, *,  *, yes")])

You could do some formula that computes it from isa==p10 btw.  But wrap
that in some helper, "is can have prefixed" or something.

Not really worth it unless you need this often, the four we have now
(which could be two perhaps, by merging each pair of patterns again)
isn't enough to warrant the extra indirection.

Okay for trunk.  Also fine for backports if you need them.

Thanks!


Segher


[PATCH] x86: Properly disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread H.J. Lu via Gcc-patches
On Fri, Jan 21, 2022 at 09:18:41PM +0100, Jakub Jelinek via Gcc-patches wrote:
> On Fri, Jan 21, 2022 at 08:16:11PM +0100, soeren--- via Gcc-patches wrote:
> > gcc/ChangeLog:
> > 
> > * common/config/s390/s390-common.c (s390_supports_split_stack):
> > Only support split-stack on glibc targets.
> > * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> > * config/i386/gnu.h (defined): Ditto.
> 
> Besides breaking bootstrap, this doesn't do what it talks about:
> 
> > --- a/gcc/config/i386/gnu.h
> > +++ b/gcc/config/i386/gnu.h
> > @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> > .
> > crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
> >  #endif
> >  
> > -#ifdef TARGET_LIBC_PROVIDES_SSP
> > +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> > +   field is only available for glibc.  Disable -fsplit-stack for
> > +   other libc implementations to avoid silent TCB corruptions.  */
> > +#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
> >  
> >  /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
> >  #define TARGET_THREAD_SSP_OFFSET0x14
> 
> Because this doesn't disable just -fsplit-stack support, but also
> -fstack-protector*.
> Does that one work on musl?
> I think common/config/i386/i386-common.c (ix86_supports_split_stack)
> should have been changed instead of the config/i386/gnu*.h headers.
> 

Like this?


H.J.
---
Revert x86 changes in

commit c163647ffbc9a20c8feb6e079dbecccfe016c82e
Author: Soren Tempel 
Date:   Fri Jan 21 19:22:46 2022 +

Disable -fsplit-stack support on non-glibc targets

and change ix86_supports_split_stack to return true only on glibc.

PR bootstrap/104170
* common/config/i386/i386-common.cc (ix86_supports_split_stack):
Return true only on glibc.
* config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN):
Revert commit c163647ffbc.
* config/i386/gnu.h (TARGET_LIBC_PROVIDES_SSP): Likewise.
---
 gcc/common/config/i386/i386-common.cc | 4 ++--
 gcc/config/i386/gnu-user-common.h | 5 ++---
 gcc/config/i386/gnu.h | 5 +
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/gcc/common/config/i386/i386-common.cc 
b/gcc/common/config/i386/i386-common.cc
index cae0b880d79..78e6ff730aa 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -1715,9 +1715,9 @@ ix86_option_init_struct (struct gcc_options *opts)
 
 static bool
 ix86_supports_split_stack (bool report ATTRIBUTE_UNUSED,
-  struct gcc_options *opts ATTRIBUTE_UNUSED)
+  struct gcc_options *opts)
 {
-  bool ret = true;
+  bool ret = opts->x_linux_libc == LIBC_GLIBC;
 
 #ifndef TARGET_THREAD_SPLIT_STACK_OFFSET
   if (report)
diff --git a/gcc/config/i386/gnu-user-common.h 
b/gcc/config/i386/gnu-user-common.h
index 7525f788a9c..23b54c5be52 100644
--- a/gcc/config/i386/gnu-user-common.h
+++ b/gcc/config/i386/gnu-user-common.h
@@ -66,8 +66,7 @@ along with GCC; see the file COPYING3.  If not see
 #define STACK_CHECK_STATIC_BUILTIN 1
 
 /* We only build the -fsplit-stack support in libgcc if the
-   assembler has full support for the CFI directives and
-   targets glibc.  */
-#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC
+   assembler has full support for the CFI directives.  */
+#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
 #define TARGET_CAN_SPLIT_STACK
 #endif
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
index daa505a5d45..401e60c9a02 100644
--- a/gcc/config/i386/gnu.h
+++ b/gcc/config/i386/gnu.h
@@ -35,10 +35,7 @@ along with GCC.  If not, see .
crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
 #endif
 
-/* -fsplit-stack uses a field in the TCB at a fixed offset. This
-   field is only available for glibc.  Disable -fsplit-stack for
-   other libc implementations to avoid silent TCB corruptions.  */
-#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
+#ifdef TARGET_LIBC_PROVIDES_SSP
 
 /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
 #define TARGET_THREAD_SSP_OFFSET0x14
-- 
2.34.1



Re: [PATCH v2] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Sören Tempel via Gcc-patches
"H.J. Lu"  wrote:
> OPTION_GLIBC can't be used here since OPTION_GLIBC is
> evaluated at run-time:
> 
> https://gcc.gnu.org/pipermail/gcc-regression/2022-January/076271.html

Oops, my bad, sorry! This accidentally broke in one of the two cleanup
commits. Originally I justed use TARGET_GLIBC_MAJOR in PATCH v1. Would
that be acceptable?

Greetings,
Sören


Re: [PATCH v3] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Jakub Jelinek via Gcc-patches
On Fri, Jan 21, 2022 at 08:16:11PM +0100, soeren--- via Gcc-patches wrote:
> gcc/ChangeLog:
> 
>   * common/config/s390/s390-common.c (s390_supports_split_stack):
>   Only support split-stack on glibc targets.
>   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
>   * config/i386/gnu.h (defined): Ditto.

Besides breaking bootstrap, this doesn't do what it talks about:

> --- a/gcc/config/i386/gnu.h
> +++ b/gcc/config/i386/gnu.h
> @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> .
> crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
>  #endif
>  
> -#ifdef TARGET_LIBC_PROVIDES_SSP
> +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> +   field is only available for glibc.  Disable -fsplit-stack for
> +   other libc implementations to avoid silent TCB corruptions.  */
> +#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
>  
>  /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
>  #define TARGET_THREAD_SSP_OFFSET0x14

Because this doesn't disable just -fsplit-stack support, but also
-fstack-protector*.
Does that one work on musl?
I think common/config/i386/i386-common.c (ix86_supports_split_stack)
should have been changed instead of the config/i386/gnu*.h headers.

Jakub



[pushed] c++: explain failing static_assert

2022-01-21 Thread Jason Merrill via Gcc-patches
While looking at another bug I wanted the compiler to tell me what the two
unequal values were.

Tested x86_64-pc-linux-gnu, applying to trunk.

gcc/cp/ChangeLog:

* semantics.cc (find_failing_clause): Return expr if not
decomposable.
(finish_static_assert): Show constant values in failing
comparison.

gcc/testsuite/ChangeLog:

* g++.dg/template/explicit-args6.C: Add expected message.
---
 gcc/cp/semantics.cc   | 33 +++
 .../g++.dg/template/explicit-args6.C  |  2 ++
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/gcc/cp/semantics.cc b/gcc/cp/semantics.cc
index 8e22944c2dc..640b2d75471 100644
--- a/gcc/cp/semantics.cc
+++ b/gcc/cp/semantics.cc
@@ -11062,9 +11062,10 @@ find_failing_clause_r (tree expr)
 static tree
 find_failing_clause (tree expr)
 {
-  if (TREE_CODE (expr) != TRUTH_ANDIF_EXPR)
-return NULL_TREE;
-  return find_failing_clause_r (expr);
+  if (TREE_CODE (expr) == TRUTH_ANDIF_EXPR)
+if (tree e = find_failing_clause_r (expr))
+  expr = e;
+  return expr;
 }
 
 /* Build a STATIC_ASSERT for a static assertion with the condition
@@ -11134,9 +11135,9 @@ finish_static_assert (tree condition, tree message, 
location_t location,
  tree bad = find_failing_clause (orig_condition);
  /* If not, or its location is unusable, fall back to the previous
 location.  */
- location_t cloc = location;
- if (cp_expr_location (bad) != UNKNOWN_LOCATION)
-   cloc = cp_expr_location (bad);
+ location_t cloc = cp_expr_loc_or_loc (bad, location);
+ /* Nobody wants to see the artificial (bool) cast.  */
+ bad = tree_strip_nop_conversions (bad);
 
   /* Report the error. */
  if (len == 0)
@@ -11144,16 +11145,22 @@ finish_static_assert (tree condition, tree message, 
location_t location,
  else
error_at (cloc, "static assertion failed: %s",
  TREE_STRING_POINTER (message));
- if (show_expr_p)
-   inform (cloc, "%qE evaluates to false",
-   /* Nobody wants to see the artificial (bool) cast.  */
-   (bad ? tree_strip_nop_conversions (bad) : orig_condition));
 
  /* Actually explain the failure if this is a concept check or a
 requires-expression.  */
- if (concept_check_p (orig_condition)
- || TREE_CODE (orig_condition) == REQUIRES_EXPR)
-   diagnose_constraints (location, orig_condition, NULL_TREE);
+ if (concept_check_p (bad)
+ || TREE_CODE (bad) == REQUIRES_EXPR)
+   diagnose_constraints (location, bad, NULL_TREE);
+ else if (COMPARISON_CLASS_P (bad)
+  && ARITHMETIC_TYPE_P (TREE_TYPE (TREE_OPERAND (bad, 0
+   {
+ tree op0 = fold_non_dependent_expr (TREE_OPERAND (bad, 0));
+ tree op1 = fold_non_dependent_expr (TREE_OPERAND (bad, 1));
+ tree cond = build2 (TREE_CODE (bad), boolean_type_node, op0, op1);
+ inform (cloc, "the comparison reduces to %qE", cond);
+   }
+ else if (show_expr_p)
+   inform (cloc, "%qE evaluates to false", bad);
}
   else if (condition && condition != error_mark_node)
{
diff --git a/gcc/testsuite/g++.dg/template/explicit-args6.C 
b/gcc/testsuite/g++.dg/template/explicit-args6.C
index d853564e3be..311a7678923 100644
--- a/gcc/testsuite/g++.dg/template/explicit-args6.C
+++ b/gcc/testsuite/g++.dg/template/explicit-args6.C
@@ -20,6 +20,8 @@ constexpr unsigned
 frob()
 {
   static_assert(N == 1, "user-friendly diagnostic"); // { dg-error 
"user-friendly" }
+  // dg-message { "-1 == 1" "" { target *-*-* } .-1 }
+
   // narrowing check, reject negative values
   return unsigned{N};  // { dg-prune-output "narrowing" }
 } // { dg-prune-output "flows off the end" }

base-commit: c163647ffbc9a20c8feb6e079dbecccfe016c82e
prerequisite-patch-id: 6e305598074684612bebec8a36ecf37de96519f8
-- 
2.27.0



Re: [PATCH v3] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread H.J. Lu via Gcc-patches
On Fri, Jan 21, 2022 at 11:47 AM H.J. Lu  wrote:
>
> On Fri, Jan 21, 2022 at 11:23 AM Richard Sandiford via Gcc-patches
>  wrote:
> >
> > soe...@soeren-tempel.net writes:
> > > From: Sören Tempel 
> > >
> > > The -fsplit-stack option requires the pthread_t TCB definition in the
> > > libc to provide certain struct fields at specific hardcoded offsets. As
> > > far as I know, only glibc provides these fields at the required offsets.
> > > Most notably, musl libc does not have these fields. However, since gcc
> > > accesses the fields using a fixed offset, this does not cause a
> > > compile-time error, but instead results in a silent memory corruption at
> > > run-time with musl libc. For example, on s390x libgcc's
> > > __stack_split_initialize CTOR will overwrite the cancel field in the
> > > pthread_t TCB on musl.
> > >
> > > The -fsplit-stack option is used within the gcc code base itself by
> > > gcc-go (if available). On musl-based systems with split-stack support
> > > (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
> > > misbehave at run-time.
> > >
> > > This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
> > > since it is not supported on non-glibc targets anyhow. This is achieved
> > > by checking if gcc targets a glibc-based system. This check has been
> > > added for x86 and s390x, the rs6000 config already checks for
> > > TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
> > > support. With this patch applied, the gcc-go configure script will
> > > detect that -fsplit-stack support is not available and will not use it.
> > >
> > > See https://www.openwall.com/lists/musl/2012/10/16/12
> > >
> > > This patch was written under the assumption that glibc is the only libc
> > > implementation which supports the required fields at the required
> > > offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
> > > Edge on the s390x and x86 architectures by bootstrapping Google's Go
> > > implementation with gcc-go.
> > >
> > > Signed-off-by: Sören Tempel 
> > >
> > > gcc/ChangeLog:
> > >
> > >   * common/config/s390/s390-common.c (s390_supports_split_stack):
> > >   Only support split-stack on glibc targets.
> > >   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> > >   * config/i386/gnu.h (defined): Ditto.
> >
> > Thanks, pushed to trunk.
>
> This broke GCC bootstrap on Linux/i686:
>
> https://gcc.gnu.org/pipermail/gcc-regression/2022-January/076271.html
>

I opened:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104170

H.J.


Ping, important: [PATCH] Use system default for long double if not specified on PowerPC.

2022-01-21 Thread Michael Meissner via Gcc-patches
Ping patch.
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/588292.html

| Date: Wed, 12 Jan 2022 13:04:19 -0500
| From: Michael Meissner 
| Subject: [PATCH] Use system default for long double if not specified on 
PowerPC.
| Message-ID: 

I believe this patch will be very important when Linux distributions start
moving to using IEEE 128-bit floating point format.

Note, the patch was made before changing the .c files to .cc.  Here is the
patch after the renaming.

[PATCH] Use system default for long double if not specified on PowerPC.

If the user did not specify a default long double format, use the long double
default for the build compiler for the long double default.  This patch will
allow compilers built on a distribution that has changed the 128-bit floating
point format to use the default used on the system.

I did a normal normal bootstrap and make check regression on a little
endian power9 system and there were no regressions.

In addition, I built a compiler where I configured the default to use IEEE
128-bit floating point for long double.  I then used that compiler to
build a bootstrap with this patch applied and I did not set the floating
point format.  I verified that the compiler built with this patch defaults
long double to be IEEE 128-bit.

Can I apply this patch to the trunk for GCC 12?

gcc/
2022-01-20  Michael Meissner  

* config/rs6000/rs6000.cc (TARGET_IEEEQUAD_DEFAULT): If the
compiler used to build the current compiler defaults to IEEE
128-bit long double,  make that the default for this build.
---
 gcc/config/rs6000/rs6000.cc | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 81a4ede0e8e..0a9d62b3dfd 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -91,14 +91,22 @@
  explicitly redefine TARGET_IEEEQUAD and TARGET_IEEEQUAD_DEFAULT to 0, so
  those systems will not pick up this default.  This needs to be after all
  of the include files, so that POWERPC_LINUX and POWERPC_FREEBSD are
- properly defined.  */
+ properly defined.
+
+ If we are being built by a compiler that uses IEEE 128-bit as the default
+ long double and no explicit long double format was selected, then also
+ default long double to IEEE 128-bit.  */
 #ifndef TARGET_IEEEQUAD_DEFAULT
 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
 #define TARGET_IEEEQUAD_DEFAULT 1
 #else
+#ifdef __LONG_DOUBLE_IEEE128__
+#define TARGET_IEEEQUAD_DEFAULT 1
+#else
 #define TARGET_IEEEQUAD_DEFAULT 0
 #endif
 #endif
+#endif
 
 /* Don't enable PC-relative addressing if the target does not support it.  */
 #ifndef PCREL_SUPPORTED_BY_OS

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Ping: [PATCH] PR 103763, Fix fold-vec-splat-floatdouble on power10.

2022-01-21 Thread Michael Meissner via Gcc-patches
Ping patch
https://gcc.gnu.org/pipermail/gcc-patches/2022-January/587924.html

| Date: Fri, 7 Jan 2022 16:05:53 -0500
| From: Michael Meissner 
| Subject: [PATCH] PR 103763, Fix fold-vec-splat-floatdouble on power10.
| Message-ID: 

-- 
Michael Meissner, IBM
PO Box 98, Ayer, Massachusetts, USA, 01432
email: meiss...@linux.ibm.com


Re: [PATCH v2] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread H.J. Lu via Gcc-patches
On Sat, Dec 18, 2021 at 4:20 AM soeren--- via Gcc-patches
 wrote:
>
> From: Sören Tempel 
>
> The -fsplit-stack option requires the pthread_t TCB definition in the
> libc to provide certain struct fields at specific hardcoded offsets. As
> far as I know, only glibc provides these fields at the required offsets.
> Most notably, musl libc does not have these fields. However, since gcc
> accesses the fields using a fixed offset, this does not cause a
> compile-time error, but instead results in a silent memory corruption at
> run-time with musl libc. For example, on s390x libgcc's
> __stack_split_initialize CTOR will overwrite the cancel field in the
> pthread_t TCB on musl.
>
> The -fsplit-stack option is used within the gcc code base itself by
> gcc-go (if available). On musl-based systems with split-stack support
> (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
> misbehave at run-time.
>
> This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
> since it is not supported on non-glibc targets anyhow. This is achieved
> by checking if gcc targets a glibc-based system. This check has been
> added for x86 and s390x, the rs6000 config already checks for
> TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
> support. With this patch applied, the gcc-go configure script will
> detect that -fsplit-stack support is not available and will not use it.
>
> See https://www.openwall.com/lists/musl/2012/10/16/12
>
> This patch was written under the assumption that glibc is the only libc
> implementation which supports the required fields at the required
> offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
> Edge on the s390x and x86 architectures by bootstrapping Google's Go
> implementation with gcc-go.
>
> Signed-off-by: Sören Tempel 
>
> gcc/ChangeLog:
>
> * common/config/s390/s390-common.c (s390_supports_split_stack):
> Only support split-stack on glibc targets.
> * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> * config/i386/gnu.h (defined): Ditto.
> ---
> This version of the patch addresses feedback by Andrew Pinski and uses
> OPTION_GLIBC as well as opts->x_linux_libc == LIBC_GLIBC to detect glibc
> targets (instead of relying on TARGET_GLIBC_MAJOR).
>
>  gcc/common/config/s390/s390-common.c | 11 +--
>  gcc/config/i386/gnu-user-common.h|  5 +++--
>  gcc/config/i386/gnu.h|  6 +-
>  3 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/common/config/s390/s390-common.c 
> b/gcc/common/config/s390/s390-common.c
> index b6bc8501742..fc86e0bc5e7 100644
> --- a/gcc/common/config/s390/s390-common.c
> +++ b/gcc/common/config/s390/s390-common.c
> @@ -116,13 +116,20 @@ s390_handle_option (struct gcc_options *opts 
> ATTRIBUTE_UNUSED,
>
>  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
> We don't verify it, since earlier versions just have padding at
> -   its place, which works just as well.  */
> +   its place, which works just as well. For other libc implementations
> +   we disable the feature entirely to avoid corrupting the TCB.  */
>
>  static bool
>  s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
>struct gcc_options *opts ATTRIBUTE_UNUSED)
>  {
> -  return true;
> +  if (opts->x_linux_libc == LIBC_GLIBC) {
> +return true;
> +  } else {
> +if (report)
> +  error("%<-fsplit-stack%> currently only supported on GNU/Linux");
> +return false;
> +  }
>  }
>
>  #undef TARGET_DEFAULT_TARGET_FLAGS
> diff --git a/gcc/config/i386/gnu-user-common.h 
> b/gcc/config/i386/gnu-user-common.h
> index 00226f5a455..6e13315b5a3 100644
> --- a/gcc/config/i386/gnu-user-common.h
> +++ b/gcc/config/i386/gnu-user-common.h
> @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
>  #define STACK_CHECK_STATIC_BUILTIN 1
>
>  /* We only build the -fsplit-stack support in libgcc if the
> -   assembler has full support for the CFI directives.  */
> -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
> +   assembler has full support for the CFI directives and
> +   targets glibc.  */
> +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC

OPTION_GLIBC can't be used here since OPTION_GLIBC is
evaluated at run-time:

https://gcc.gnu.org/pipermail/gcc-regression/2022-January/076271.html

>  #define TARGET_CAN_SPLIT_STACK
>  #endif
> diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
> index 25fbc07f58c..adfe817201e 100644
> --- a/gcc/config/i386/gnu.h
> +++ b/gcc/config/i386/gnu.h
> @@ -35,7 +35,11 @@ along with GCC.  If not, see 
> .
> crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
>  #endif
>
> -#ifdef TARGET_LIBC_PROVIDES_SSP
> +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> +   field is only available for glibc. Disable -fsplit-stack for
> +   other libc implementation to avoid silent TCB corruptions.  */
> 

[PATCH] Mark XXSPLTIW/XXSPLTIDP as prefixed -- PR 104136

2022-01-21 Thread Michael Meissner via Gcc-patches
Mark XXSPLTIW/XXSPLTIDP as prefixed -- PR 104136

If you compile module_advect_em.F90 with -Ofast -mcpu=power10, one module
is large enough that we can't use a single conditional jump to span the
function.  Instead, GCC has to reverse the condition, and do a conditional
jump around an unconditional branch.  It turns out when xxspltiw and
xxspltdp instructions were generated, they were not marked as being
prefixed (i.e. length of 12 bytes instead of 4 bytes).  This meant the
calculations for the branch length were off, which in turn meant the
assembler raised an error because it couldn't do the conditional jump.

The fix is to explicitly set the prefixed attribute when we are loading up
vector constants with the xxspltiw or xxspltidp instructions.

I have removed the code that sets the prefixed attribute for xxspltiw,
xxspltidp, and xxsplti32dx instructions, since it no longer will be invoked.

I have also explicitly set the prefixed attribute for load SF and DF mode
constants with xxsplitw and xxspltidp.  Previously, it was not set on these
insns, but when the insn was split to get the XXSPLTIW/XXSPLTIDP forms, those
forms already had the prefixed attribute set.

I have tested this by doing bootstraps and make check on a power8 big endian
system using --with-cpu=power8, power9 little endian system using
--with-cpu=power9, and a power10 little endian system using
--with-cpu=power10.  There were no new errors with this patch.

I have also built a full spec 2017 rate build for power10 using the -Ofast
compilation option, and it now built the entire suite.

Can I install this patch to the trunk?

gcc/
2022-01-21  Michael Meissner  

PR target/104136
* config/rs6000/rs6000-protos.h (prefixed_xxsplti_p): Delete.
* config/rs6000/rs6000.cc (prefixed_xxsplti_p): Delete.
* config/rs6000/rs6000.md (prefixed attribute): Delete section
that sets the prefixed attribute for xxspltiw, xxspltidp, and
xxsplti32dx instructions.
(movsf_hardfloat): Explicitly set the prefixed attribute
when xxspltiw and xxspltidp instructions are generated.
(mov_hardfloat32): Likewise.
(mov_hardfloat64): Likewise.
* config/rs6000/vsx.md (vsx_mov_64bit): Explicitly set the
prefixed attribute for xxspltiw and xxspltidp instructions.
(vsx_mov_32bit): Likewise.
---
 gcc/config/rs6000/rs6000-protos.h |  1 -
 gcc/config/rs6000/rs6000.cc   | 38 ---
 gcc/config/rs6000/rs6000.md   | 24 ---
 gcc/config/rs6000/vsx.md  | 12 +-
 4 files changed, 27 insertions(+), 48 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-protos.h 
b/gcc/config/rs6000/rs6000-protos.h
index e322ac0c199..3ea01023609 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -199,7 +199,6 @@ enum non_prefixed_form reg_to_non_prefixed (rtx reg, 
machine_mode mode);
 extern bool prefixed_load_p (rtx_insn *);
 extern bool prefixed_store_p (rtx_insn *);
 extern bool prefixed_paddi_p (rtx_insn *);
-extern bool prefixed_xxsplti_p (rtx_insn *);
 extern void rs6000_asm_output_opcode (FILE *);
 extern void output_pcrel_opt_reloc (rtx);
 extern void rs6000_final_prescan_insn (rtx_insn *, rtx [], int);
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index b34962da27d..7b8a3b5299a 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -26617,44 +26617,6 @@ prefixed_paddi_p (rtx_insn *insn)
   return (iform == INSN_FORM_PCREL_EXTERNAL || iform == INSN_FORM_PCREL_LOCAL);
 }
 
-/* Whether an instruction is a prefixed XXSPLTI* instruction.  This is called
-   from the prefixed attribute processing.  */
-
-bool
-prefixed_xxsplti_p (rtx_insn *insn)
-{
-  rtx set = single_set (insn);
-  if (!set)
-return false;
-
-  rtx dest = SET_DEST (set);
-  rtx src = SET_SRC (set);
-  machine_mode mode = GET_MODE (dest);
-
-  if (!REG_P (dest) && !SUBREG_P (dest))
-return false;
-
-  if (GET_CODE (src) == UNSPEC)
-{
-  int unspec = XINT (src, 1);
-  return (unspec == UNSPEC_XXSPLTIW
- || unspec == UNSPEC_XXSPLTIDP
- || unspec == UNSPEC_XXSPLTI32DX);
-}
-
-  vec_const_128bit_type vsx_const;
-  if (vec_const_128bit_to_bytes (src, mode, &vsx_const))
-{
-  if (constant_generates_xxspltiw (&vsx_const))
-   return true;
-
-  if (constant_generates_xxspltidp (&vsx_const))
-   return true;
-}
-
-  return false;
-}
-
 /* Whether the next instruction needs a 'p' prefix issued before the
instruction is printed out.  */
 static bool prepend_p_to_next_insn;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 59531b6d07e..4e221189028 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -314,11 +314,6 @@ (define_attr "prefixed" "no,yes"
 
 (eq_attr "type" "integer,add")
 (if_then_else (match_test "prefixed_paddi_p (insn)")
-  

Re: [PATCH v3] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread H.J. Lu via Gcc-patches
On Fri, Jan 21, 2022 at 11:23 AM Richard Sandiford via Gcc-patches
 wrote:
>
> soe...@soeren-tempel.net writes:
> > From: Sören Tempel 
> >
> > The -fsplit-stack option requires the pthread_t TCB definition in the
> > libc to provide certain struct fields at specific hardcoded offsets. As
> > far as I know, only glibc provides these fields at the required offsets.
> > Most notably, musl libc does not have these fields. However, since gcc
> > accesses the fields using a fixed offset, this does not cause a
> > compile-time error, but instead results in a silent memory corruption at
> > run-time with musl libc. For example, on s390x libgcc's
> > __stack_split_initialize CTOR will overwrite the cancel field in the
> > pthread_t TCB on musl.
> >
> > The -fsplit-stack option is used within the gcc code base itself by
> > gcc-go (if available). On musl-based systems with split-stack support
> > (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
> > misbehave at run-time.
> >
> > This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
> > since it is not supported on non-glibc targets anyhow. This is achieved
> > by checking if gcc targets a glibc-based system. This check has been
> > added for x86 and s390x, the rs6000 config already checks for
> > TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
> > support. With this patch applied, the gcc-go configure script will
> > detect that -fsplit-stack support is not available and will not use it.
> >
> > See https://www.openwall.com/lists/musl/2012/10/16/12
> >
> > This patch was written under the assumption that glibc is the only libc
> > implementation which supports the required fields at the required
> > offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
> > Edge on the s390x and x86 architectures by bootstrapping Google's Go
> > implementation with gcc-go.
> >
> > Signed-off-by: Sören Tempel 
> >
> > gcc/ChangeLog:
> >
> >   * common/config/s390/s390-common.c (s390_supports_split_stack):
> >   Only support split-stack on glibc targets.
> >   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> >   * config/i386/gnu.h (defined): Ditto.
>
> Thanks, pushed to trunk.

This broke GCC bootstrap on Linux/i686:

https://gcc.gnu.org/pipermail/gcc-regression/2022-January/076271.html

> Richard
>
> > ---
> > This version of the patch fixes a few codingstyle violations pointed out
> > to me by Richard Sandiford, it does not include any functional changes
> > compared to previous versions of this patch.
> >
> >  gcc/common/config/s390/s390-common.cc | 14 ++
> >  gcc/config/i386/gnu-user-common.h |  5 +++--
> >  gcc/config/i386/gnu.h |  5 -
> >  3 files changed, 17 insertions(+), 7 deletions(-)
> >
> > diff --git a/gcc/common/config/s390/s390-common.cc 
> > b/gcc/common/config/s390/s390-common.cc
> > index 6ed2f89f3d0..547b0826f93 100644
> > --- a/gcc/common/config/s390/s390-common.cc
> > +++ b/gcc/common/config/s390/s390-common.cc
> > @@ -116,13 +116,19 @@ s390_handle_option (struct gcc_options *opts 
> > ATTRIBUTE_UNUSED,
> >
> >  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
> > We don't verify it, since earlier versions just have padding at
> > -   its place, which works just as well.  */
> > +   its place, which works just as well.  For other libc implementations
> > +   we disable the feature entirely to avoid corrupting the TCB.  */
> >
> >  static bool
> > -s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> > -struct gcc_options *opts ATTRIBUTE_UNUSED)
> > +s390_supports_split_stack (bool report,
> > +struct gcc_options *opts)
> >  {
> > -  return true;
> > +  if (opts->x_linux_libc == LIBC_GLIBC)
> > +return true;
> > +
> > +  if (report)
> > +error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
> > +  return false;
> >  }
> >
> >  #undef TARGET_DEFAULT_TARGET_FLAGS
> > diff --git a/gcc/config/i386/gnu-user-common.h 
> > b/gcc/config/i386/gnu-user-common.h
> > index 23b54c5be52..7525f788a9c 100644
> > --- a/gcc/config/i386/gnu-user-common.h
> > +++ b/gcc/config/i386/gnu-user-common.h
> > @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #define STACK_CHECK_STATIC_BUILTIN 1
> >
> >  /* We only build the -fsplit-stack support in libgcc if the
> > -   assembler has full support for the CFI directives.  */
> > -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
> > +   assembler has full support for the CFI directives and
> > +   targets glibc.  */
> > +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC
> >  #define TARGET_CAN_SPLIT_STACK
> >  #endif
> > diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
> > index 401e60c9a02..daa505a5d45 100644
> > --- a/gcc/config/i386/gnu.h
> > +++ b/gcc/config/i386/gnu.h
> > @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> > .
> > crti.

[pushed] c++: class array new checking [PR104084]

2022-01-21 Thread Jason Merrill via Gcc-patches
My patch for PR20040 made us stop exiting early from build_new_1 in
cases of trivial initialization if there's a class operator delete; as a
result, code later in the function needs to handle this case properly.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/104084
PR c++/20040

gcc/cp/ChangeLog:

* init.cc (build_new_1): Only pull out TARGET_EXPR_INITIAL if
alloc_expr is a TARGET_EXPR.

gcc/testsuite/ChangeLog:

* g++.dg/init/new50.C: New test.
---
 gcc/cp/init.cc| 2 +-
 gcc/testsuite/g++.dg/init/new50.C | 9 +
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/init/new50.C

diff --git a/gcc/cp/init.cc b/gcc/cp/init.cc
index 668a84d969e..1f047831b6d 100644
--- a/gcc/cp/init.cc
+++ b/gcc/cp/init.cc
@@ -3786,7 +3786,7 @@ build_new_1 (vec **placement, tree type, 
tree nelts,
   if (cookie_expr)
 rval = build2 (COMPOUND_EXPR, TREE_TYPE (rval), cookie_expr, rval);
 
-  if (rval == data_addr)
+  if (rval == data_addr && TREE_CODE (alloc_expr) == TARGET_EXPR)
 /* If we don't have an initializer or a cookie, strip the TARGET_EXPR
and return the call (which doesn't need to be adjusted).  */
 rval = TARGET_EXPR_INITIAL (alloc_expr);
diff --git a/gcc/testsuite/g++.dg/init/new50.C 
b/gcc/testsuite/g++.dg/init/new50.C
new file mode 100644
index 000..981d23106f3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/init/new50.C
@@ -0,0 +1,9 @@
+// PR c++/104084
+
+int nothrow;
+struct MaxAlignedAllocable {
+  void *operator new[](__SIZE_TYPE__, int);
+  void operator delete[](void *);
+  long Resize_size;
+  void Resize() { new (nothrow) MaxAlignedAllocable[Resize_size]; }
+};

base-commit: c163647ffbc9a20c8feb6e079dbecccfe016c82e
-- 
2.27.0



Re: [PATCH v3] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Richard Sandiford via Gcc-patches
soe...@soeren-tempel.net writes:
> From: Sören Tempel 
>
> The -fsplit-stack option requires the pthread_t TCB definition in the
> libc to provide certain struct fields at specific hardcoded offsets. As
> far as I know, only glibc provides these fields at the required offsets.
> Most notably, musl libc does not have these fields. However, since gcc
> accesses the fields using a fixed offset, this does not cause a
> compile-time error, but instead results in a silent memory corruption at
> run-time with musl libc. For example, on s390x libgcc's
> __stack_split_initialize CTOR will overwrite the cancel field in the
> pthread_t TCB on musl.
>
> The -fsplit-stack option is used within the gcc code base itself by
> gcc-go (if available). On musl-based systems with split-stack support
> (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
> misbehave at run-time.
>
> This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
> since it is not supported on non-glibc targets anyhow. This is achieved
> by checking if gcc targets a glibc-based system. This check has been
> added for x86 and s390x, the rs6000 config already checks for
> TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
> support. With this patch applied, the gcc-go configure script will
> detect that -fsplit-stack support is not available and will not use it.
>
> See https://www.openwall.com/lists/musl/2012/10/16/12
>
> This patch was written under the assumption that glibc is the only libc
> implementation which supports the required fields at the required
> offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
> Edge on the s390x and x86 architectures by bootstrapping Google's Go
> implementation with gcc-go.
>
> Signed-off-by: Sören Tempel 
>
> gcc/ChangeLog:
>
>   * common/config/s390/s390-common.c (s390_supports_split_stack):
>   Only support split-stack on glibc targets.
>   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
>   * config/i386/gnu.h (defined): Ditto.

Thanks, pushed to trunk.

Richard

> ---
> This version of the patch fixes a few codingstyle violations pointed out
> to me by Richard Sandiford, it does not include any functional changes
> compared to previous versions of this patch.
>
>  gcc/common/config/s390/s390-common.cc | 14 ++
>  gcc/config/i386/gnu-user-common.h |  5 +++--
>  gcc/config/i386/gnu.h |  5 -
>  3 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/gcc/common/config/s390/s390-common.cc 
> b/gcc/common/config/s390/s390-common.cc
> index 6ed2f89f3d0..547b0826f93 100644
> --- a/gcc/common/config/s390/s390-common.cc
> +++ b/gcc/common/config/s390/s390-common.cc
> @@ -116,13 +116,19 @@ s390_handle_option (struct gcc_options *opts 
> ATTRIBUTE_UNUSED,
>  
>  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
> We don't verify it, since earlier versions just have padding at
> -   its place, which works just as well.  */
> +   its place, which works just as well.  For other libc implementations
> +   we disable the feature entirely to avoid corrupting the TCB.  */
>  
>  static bool
> -s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> -struct gcc_options *opts ATTRIBUTE_UNUSED)
> +s390_supports_split_stack (bool report,
> +struct gcc_options *opts)
>  {
> -  return true;
> +  if (opts->x_linux_libc == LIBC_GLIBC)
> +return true;
> +
> +  if (report)
> +error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
> +  return false;
>  }
>  
>  #undef TARGET_DEFAULT_TARGET_FLAGS
> diff --git a/gcc/config/i386/gnu-user-common.h 
> b/gcc/config/i386/gnu-user-common.h
> index 23b54c5be52..7525f788a9c 100644
> --- a/gcc/config/i386/gnu-user-common.h
> +++ b/gcc/config/i386/gnu-user-common.h
> @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
>  #define STACK_CHECK_STATIC_BUILTIN 1
>  
>  /* We only build the -fsplit-stack support in libgcc if the
> -   assembler has full support for the CFI directives.  */
> -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
> +   assembler has full support for the CFI directives and
> +   targets glibc.  */
> +#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC
>  #define TARGET_CAN_SPLIT_STACK
>  #endif
> diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
> index 401e60c9a02..daa505a5d45 100644
> --- a/gcc/config/i386/gnu.h
> +++ b/gcc/config/i386/gnu.h
> @@ -35,7 +35,10 @@ along with GCC.  If not, see 
> .
> crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
>  #endif
>  
> -#ifdef TARGET_LIBC_PROVIDES_SSP
> +/* -fsplit-stack uses a field in the TCB at a fixed offset. This
> +   field is only available for glibc.  Disable -fsplit-stack for
> +   other libc implementations to avoid silent TCB corruptions.  */
> +#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
>  
>  /* i386 g

[PATCH v3] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread soeren--- via Gcc-patches
From: Sören Tempel 

The -fsplit-stack option requires the pthread_t TCB definition in the
libc to provide certain struct fields at specific hardcoded offsets. As
far as I know, only glibc provides these fields at the required offsets.
Most notably, musl libc does not have these fields. However, since gcc
accesses the fields using a fixed offset, this does not cause a
compile-time error, but instead results in a silent memory corruption at
run-time with musl libc. For example, on s390x libgcc's
__stack_split_initialize CTOR will overwrite the cancel field in the
pthread_t TCB on musl.

The -fsplit-stack option is used within the gcc code base itself by
gcc-go (if available). On musl-based systems with split-stack support
(i.e. s390x or x86) this causes Go programs compiled with gcc-go to
misbehave at run-time.

This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
since it is not supported on non-glibc targets anyhow. This is achieved
by checking if gcc targets a glibc-based system. This check has been
added for x86 and s390x, the rs6000 config already checks for
TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
support. With this patch applied, the gcc-go configure script will
detect that -fsplit-stack support is not available and will not use it.

See https://www.openwall.com/lists/musl/2012/10/16/12

This patch was written under the assumption that glibc is the only libc
implementation which supports the required fields at the required
offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
Edge on the s390x and x86 architectures by bootstrapping Google's Go
implementation with gcc-go.

Signed-off-by: Sören Tempel 

gcc/ChangeLog:

* common/config/s390/s390-common.c (s390_supports_split_stack):
Only support split-stack on glibc targets.
* config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
* config/i386/gnu.h (defined): Ditto.
---
This version of the patch fixes a few codingstyle violations pointed out
to me by Richard Sandiford, it does not include any functional changes
compared to previous versions of this patch.

 gcc/common/config/s390/s390-common.cc | 14 ++
 gcc/config/i386/gnu-user-common.h |  5 +++--
 gcc/config/i386/gnu.h |  5 -
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/gcc/common/config/s390/s390-common.cc 
b/gcc/common/config/s390/s390-common.cc
index 6ed2f89f3d0..547b0826f93 100644
--- a/gcc/common/config/s390/s390-common.cc
+++ b/gcc/common/config/s390/s390-common.cc
@@ -116,13 +116,19 @@ s390_handle_option (struct gcc_options *opts 
ATTRIBUTE_UNUSED,
 
 /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
We don't verify it, since earlier versions just have padding at
-   its place, which works just as well.  */
+   its place, which works just as well.  For other libc implementations
+   we disable the feature entirely to avoid corrupting the TCB.  */
 
 static bool
-s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
-  struct gcc_options *opts ATTRIBUTE_UNUSED)
+s390_supports_split_stack (bool report,
+  struct gcc_options *opts)
 {
-  return true;
+  if (opts->x_linux_libc == LIBC_GLIBC)
+return true;
+
+  if (report)
+error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
+  return false;
 }
 
 #undef TARGET_DEFAULT_TARGET_FLAGS
diff --git a/gcc/config/i386/gnu-user-common.h 
b/gcc/config/i386/gnu-user-common.h
index 23b54c5be52..7525f788a9c 100644
--- a/gcc/config/i386/gnu-user-common.h
+++ b/gcc/config/i386/gnu-user-common.h
@@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
 #define STACK_CHECK_STATIC_BUILTIN 1
 
 /* We only build the -fsplit-stack support in libgcc if the
-   assembler has full support for the CFI directives.  */
-#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
+   assembler has full support for the CFI directives and
+   targets glibc.  */
+#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE && OPTION_GLIBC
 #define TARGET_CAN_SPLIT_STACK
 #endif
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
index 401e60c9a02..daa505a5d45 100644
--- a/gcc/config/i386/gnu.h
+++ b/gcc/config/i386/gnu.h
@@ -35,7 +35,10 @@ along with GCC.  If not, see .
crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
 #endif
 
-#ifdef TARGET_LIBC_PROVIDES_SSP
+/* -fsplit-stack uses a field in the TCB at a fixed offset. This
+   field is only available for glibc.  Disable -fsplit-stack for
+   other libc implementations to avoid silent TCB corruptions.  */
+#if defined (TARGET_LIBC_PROVIDES_SSP) && OPTION_GLIBC
 
 /* i386 glibc provides __stack_chk_guard in %gs:0x14.  */
 #define TARGET_THREAD_SSP_OFFSET0x14


Re: [PATCH] rs6000: Support vector float/double for vec_sldw

2022-01-21 Thread Bill Schmidt via Gcc-patches
Thanks!  Pushed as r12-6806 with the testcase adjusted.

Bill

On 1/21/22 11:47 AM, Segher Boessenkool wrote:
> Hi!
>
> On Fri, Jan 21, 2022 at 11:31:34AM -0600, Bill Schmidt wrote:
>> It was recently discovered that Clang supports a couple of variants of 
>> vec_sldw that
>> GCC does not.  After some discussion, we decided that these variants are 
>> reasonable,
>> and GCC will also support them.  This patch adds that support.
> As we discussed, this is reasonable only because we already allow
> non-integer inputs (and outputs) for all(?) other permute class
> instructions.
>
>> I updated an existing test and discovered it wasn't actually checking for 
>> generation
>> of the xxsldwi instruction, so I added that check as well.
> It can always generate vsldoi instead, which is a strict superset (if
> all registers used are VRs).  They will not likely be here, because
> these are such simple functions, but that is a bit fragile.
>
>>  * gcc.target/powerpc/builtins-4.c: Add two test variants.  Adjust
>>  assembler counts.
> Is there any justification for the new counts?
>
> ... Ah, it didn't count the sld's at all before.  Okay.
>
>> @@ -161,6 +175,6 @@ test_sll_vuill_vuill_vuc (vector unsigned long long int 
>> x,
>>  /* { dg-final { scan-assembler-times "xvnabssp"  1 } } */
>>  /* { dg-final { scan-assembler-times "xvnabsdp"  1 } } */
>>  /* { dg-final { scan-assembler-times "vslo"  4 } } */
>> -/* { dg-final { scan-assembler-times "xxlor" 30 } } */
>> +/* { dg-final { scan-assembler-times "xxlor" 32 } } */
> This will need modification for the phase of the moon.  It also does not
> even test only xxlor insn (also xxlorc insns, for example).
>
>> +/* { dg-final { scan-assembler-times "xxsldwi"   10 } } */
> Okay if you make this
>   \mxxsldwi\M
> or even
>   \m(?:xxsldwi|vsldoi)\M
>
> Thanks!
>
>
> Segher


[pushed] [PR103676] LRA: Calculate and exclude some start hard registers for reload pseudos

2022-01-21 Thread Vladimir Makarov via Gcc-patches

The following patch solves

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103676

The patch was successfully bootstrapped and tested on x86_64, aarch64, 
and ppc64.
commit 85419ac59724b7ce710ebb4acf03dbd747edeea3
Author: Vladimir N. Makarov 
Date:   Fri Jan 21 13:34:32 2022 -0500

[PR103676] LRA: Calculate and exclude some start hard registers for reload pseudos

LRA and old reload pass uses only one register class for reload pseudos even if
operand constraints contain more one register class.  Let us consider
constraint 'lh' for thumb arm which means low and high thumb registers.
Reload pseudo for such constraint will have general reg class (union of
low and high reg classes).  Assigning the last low register to the reload
pseudo is wrong if the pseudo is of DImode as it requires two hard regs.
But it is considered OK if we use general reg class.  The following patch
solves this problem for LRA.

gcc/ChangeLog:

PR target/103676
* ira.h (struct target_ira): Add member
x_ira_exclude_class_mode_regs.
(ira_exclude_class_mode_regs): New macro.
* lra.h (lra_create_new_reg): Add arg exclude_start_hard_regs and
move from here ...
* lra-int.h: ... to here.
(lra_create_new_reg_with_unique_value): Add arg
exclude_start_hard_regs.
(class lra_reg): Add member exclude_start_hard_regs.
* lra-assigns.cc (find_hard_regno_for_1): Setup
impossible_start_hard_regs from exclude_start_hard_regs.
* lra-constraints.cc (get_reload_reg): Add arg exclude_start_hard_regs and pass
it lra_create_new_reg[_with_unique_value].
(match_reload): Ditto.
(check_and_process_move): Pass NULL
exclude_start_hard_regs to lra_create_new_reg_with_unique_value.
(goal_alt_exclude_start_hard_regs): New static variable.
(process_addr_reg, simplify_operand_subreg): Pass NULL
exclude_start_hard_regs to lra_create_new_reg_with_unique_value
and get_reload_reg.
(process_alt_operands): Setup goal_alt_exclude_start_hard_regs.
Use this_alternative_exclude_start_hard_regs additionally to find
winning operand alternative.
(base_to_reg, base_plus_disp_to_reg, index_part_to_reg): Pass NULL
exclude_start_hard_regs to lra_create_new_reg.
(process_address_1, emit_inc): Ditto.
(curr_insn_transform): Pass exclude_start_hard_regs value to
lra_create_new_reg, get_reload_reg, match_reload.
(inherit_reload_reg, split_reg): Pass NULL exclude_start_hard_regs
to lra_create_new_reg.
(process_invariant_for_inheritance): Ditto.
* lra-remat.cc (update_scratch_ops): Ditto.
* lra.cc (lra_create_new_reg_with_unique_value): Add arg
exclude_start_hard_regs.  Setup the corresponding member of
lra reg info.
(lra_create_new_reg): Add arg exclude_start_hard_regs and pass it
to lra_create_new_reg_with_unique_value.
(initialize_lra_reg_info_element): Initialize member
exclude_start_hard_regs.
(get_scratch_reg): Pass NULL to lra_create_new_reg.
* ira.cc (setup_prohibited_class_mode_regs): Rename to
setup_prohibited_and_exclude_class_mode_regs and calculate
ira_exclude_class_mode_regs.

gcc/testsuite/ChangeLog:

PR target/103676
* g++.target/arm/pr103676.C: New.

diff --git a/gcc/ira.cc b/gcc/ira.cc
index f294f035d74..e3b3c549120 100644
--- a/gcc/ira.cc
+++ b/gcc/ira.cc
@@ -1465,10 +1465,11 @@ setup_reg_class_nregs (void)
 
 
 
-/* Set up IRA_PROHIBITED_CLASS_MODE_REGS and IRA_CLASS_SINGLETON.
-   This function is called once IRA_CLASS_HARD_REGS has been initialized.  */
+/* Set up IRA_PROHIBITED_CLASS_MODE_REGS, IRA_EXCLUDE_CLASS_MODE_REGS, and
+   IRA_CLASS_SINGLETON.  This function is called once IRA_CLASS_HARD_REGS has
+   been initialized.  */
 static void
-setup_prohibited_class_mode_regs (void)
+setup_prohibited_and_exclude_class_mode_regs (void)
 {
   int j, k, hard_regno, cl, last_hard_regno, count;
 
@@ -1480,6 +1481,7 @@ setup_prohibited_class_mode_regs (void)
 	  count = 0;
 	  last_hard_regno = -1;
 	  CLEAR_HARD_REG_SET (ira_prohibited_class_mode_regs[cl][j]);
+	  CLEAR_HARD_REG_SET (ira_exclude_class_mode_regs[cl][j]);
 	  for (k = ira_class_hard_regs_num[cl] - 1; k >= 0; k--)
 	{
 	  hard_regno = ira_class_hard_regs[cl][k];
@@ -1492,6 +1494,10 @@ setup_prohibited_class_mode_regs (void)
 		  last_hard_regno = hard_regno;
 		  count++;
 		}
+	  else
+		{
+		  SET_HARD_REG_BIT (ira_exclude_class_mode_regs[cl][j], hard_regno);
+		}
 	}
 	  ira_class_singleton[cl][j] = (count == 1 ? last_hard_regno : -1);
 	}
@@ -1707,7 +1713,7 @@ ira_init (void)
   setup_alloc

Re: [PATCH v3] c++: ICE with noexcept and canonical types [PR101715]

2022-01-21 Thread Jason Merrill via Gcc-patches

On 1/21/22 12:42, Marek Polacek wrote:

On Fri, Jan 21, 2022 at 09:27:17AM -0500, Jason Merrill wrote:

On 1/20/22 20:03, Marek Polacek wrote:

@@ -2815,12 +2816,23 @@ fixup_deferred_exception_variants (tree type, tree 
raises)
cp_cv_quals var_quals = TYPE_QUALS (variant);
cp_ref_qualifier rqual = type_memfn_rqual (variant);
+   /* If VARIANT would become a dup (cp_check_qualified_type-wise)
+  of an existing variant in the variant list of TYPE after its
+  exception specification has been parsed, elide it.  Otherwise,
+  build_cp_fntype_variant could use it, leading to "canonical
+  types differ for identical types."  */
tree v = TYPE_MAIN_VARIANT (type);
for (; v; v = TYPE_NEXT_VARIANT (v))
  if (TYPE_CANONICAL (v) == v


I think we want to drop the TYPE_CANONICAL check here, and below change

TYPE_CANONICAL (variant) = v;

to

TYPE_CANONICAL (variant) = TYPE_CANONICAL (v);


OK.  I couldn't really find a way to test it; clang++ rejected
my attempts with "error: exception specifications are not allowed in
typedefs" so I'm not sure if I want to add such tests even though we
happen to accept it currently.
  

so that this also works for e.g. signatures involving typedefs.


+ && v != variant


I think we don't need this check since we haven't changed
TYPE_RAISES_EXCEPTIONS yet.


And variant will never be the main variant, because of the

   if (TYPE_RAISES_EXCEPTIONS (variant) == original)

check.  Ok, so the following should be enough:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK, thanks.


-- >8 --
This is a "canonical types differ for identical types" ICE, which started
with r11-4682.  It's a bit tricky to explain.  Consider:

   template  struct S {
 S bar() noexcept(T::value);  // #1
 S foo() noexcept(T::value);  // #2
   };

   template  S S::foo() noexcept(T::value) {}  // #3

We ICE because #3 and #2 have the same type, but their canonical types
differ: TYPE_CANONICAL (#3) == #2 but TYPE_CANONICAL (#2) == #1.

The member functions #1 and #2 have the same type.  However, since their
noexcept-specifier is deferred, when parsing them, we create a variant for
both of them, because DEFERRED_PARSE cannot be compared.  In other words,
build_cp_fntype_variant's

   tree v = TYPE_MAIN_VARIANT (type);
   for (; v; v = TYPE_NEXT_VARIANT (v))
 if (cp_check_qualified_type (v, type, type_quals, rqual, raises, late))
   return v;

will *not* find an existing variant when creating a method_type for #2, so we
have to create a new one.

But then we perform delayed parsing and call fixup_deferred_exception_variants
for #1 and #2.  f_d_e_v will replace TYPE_RAISES_EXCEPTIONS with the newly
parsed noexcept-specifier.  It also sets TYPE_CANONICAL (#2) to #1.  Both
noexcepts turned out to be the same, so now we have two equivalent variants in
the list!  I.e.,

+-+  +-+  +-+
|  main   |  |  #2 |  |  #1 |
| S S::(S*) |->| S S::(S*) |->| S S::(S*) |->NULL
|-|  |  noex(T::value) |  |  noex(T::value) |
+-+  +-+  +-+

Then we get to #3.  As for #1 and #2, grokdeclarator calls build_memfn_type,
which ends up calling build_cp_fntype_variant, which will use the loop
above to look for an existing variant.  The first one that matches
cp_check_qualified_type will be used, so we use #2 rather than #1, and the
TYPE_CANONICAL mismatch follows.  Hopefully that makes sense.

As for the fix, I didn't think I could rewrite the method_type #2 with #1
because the type may have escaped via decltype.  So my approach is to
elide #2 from the list, so when looking for a matching variant, we always
find #1 (#2 remains live though, which admittedly sounds sort of dodgy).

PR c++/101715

gcc/cp/ChangeLog:

* tree.c (fixup_deferred_exception_variants): Remove duplicate
variants after parsing the exception specifications.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept72.C: New test.
* g++.dg/cpp0x/noexcept73.C: New test.
---
  gcc/cp/tree.cc  | 22 --
  gcc/testsuite/g++.dg/cpp0x/noexcept72.C | 21 +
  gcc/testsuite/g++.dg/cpp0x/noexcept73.C | 13 +
  3 files changed, 50 insertions(+), 6 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept72.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept73.C

diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index bcd44e73921..f88006aec4f 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -2804,8 +2804,9 @@ fixup_deferred_exception_variants (tree type, tree raises)
  
/* Though sucky, this walk will process the canonical variants

   first.  */
+  tree prev = NULL_TREE;
for (tree variant = TYPE_MAIN_VARIANT (type)

Strengthen a few OpenACC test cases

2022-01-21 Thread Thomas Schwinge
Hi!

Pushed to master branch commit 087e545747ca9ee977e84326877b0ce1bc4c383a
"Strengthen a few OpenACC test cases", see attached.


Grüße
 Thomas


-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955
>From 087e545747ca9ee977e84326877b0ce1bc4c383a Mon Sep 17 00:00:00 2001
From: Thomas Schwinge 
Date: Fri, 21 Jan 2022 12:48:28 +0100
Subject: [PATCH] Strengthen a few OpenACC test cases

Rather than rubber-stamp whatever requested vs. actual device kernel launch
configuration happens, actually (again) verify the requested values (modulo
expected variations).

This better highlights that "AMD GCN has an upper limit of 'num_workers(16)'",
and the deficiency that "AMD GCN uses the autovectorizer for the vector
dimension: the use of a function call in vector-partitioned code [...] is not
currently supported".

And, this removes several instances of race conditions, where variables are
concurrently written to in OpenACC gang-redundant mode.

	libgomp/
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c: Strengthen.
	* testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-v-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-w-2.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-red-wv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/loop-wv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-gwv-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-v-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-w-1.c: Likewise.
	* testsuite/libgomp.oacc-c-c++-common/routine-wv-1.c: Likewise.
---
 .../libgomp.oacc-c-c++-common/loop-gwv-1.c| 25 ++
 .../libgomp.oacc-c-c++-common/loop-gwv-2.c| 34 +++
 .../loop-red-gwv-1.c  | 22 +---
 .../libgomp.oacc-c-c++-common/loop-red-v-1.c  | 11 --
 .../libgomp.oacc-c-c++-common/loop-red-v-2.c  | 11 --
 .../libgomp.oacc-c-c++-common/loop-red-w-1.c  | 13 +--
 .../libgomp.oacc-c-c++-common/loop-red-w-2.c  | 13 +--
 .../libgomp.oacc-c-c++-common/loop-red-wv-1.c | 19 ---
 .../libgomp.oacc-c-c++-common/loop-v-1.c  | 13 +--
 .../libgomp.oacc-c-c++-common/loop-w-1.c  | 13 +--
 .../libgomp.oacc-c-c++-common/loop-wv-1.c | 19 ---
 .../libgomp.oacc-c-c++-common/routine-gwv-1.c | 21 +---
 .../libgomp.oacc-c-c++-common/routine-v-1.c   | 13 +--
 .../libgomp.oacc-c-c++-common/routine-w-1.c   | 13 +--
 .../libgomp.oacc-c-c++-common/routine-wv-1.c  | 19 ---
 15 files changed, 202 insertions(+), 57 deletions(-)

diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
index e5ed2ab7006..d3f6ea24e7e 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-1.c
@@ -19,9 +19,12 @@ int main ()
 
   for (ix = 0; ix < N;ix++)
 ary[ix] = -1;
-  
-#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) \
-	copy(ary) copy(ondev) copyout(gangsize, workersize, vectorsize)
+
+#define NG 32
+#define NW 32
+#define VL 32
+#pragma acc parallel num_gangs(NG) num_workers(NW) vector_length(VL) \
+	copy(ary) copy(ondev)
   /* { dg-note {variable 'ix' declared in block isn't candidate for adjusting OpenACC privatization level: not addressable} "" { target *-*-* } .-2 } */
   {
 #pragma acc loop gang worker vector
@@ -45,11 +48,19 @@ int main ()
 	else
 	  ary[ix] = ix;
   }
-
-gangsize = __builtin_goacc_parlevel_size (GOMP_DIM_GANG);
-workersize = __builtin_goacc_parlevel_size (GOMP_DIM_WORKER);
-vectorsize = __builtin_goacc_parlevel_size (GOMP_DIM_VECTOR);
   }
+  gangsize = NG;
+  workersize = NW;
+  vectorsize = VL;
+#ifdef ACC_DEVICE_TYPE_radeon
+  /* AMD GCN has an upper limit of 'num_workers(16)'.  */
+  if (workersize > 16)
+workersize = 16;
+  /* AMD GCN uses the autovectorizer for the vector dimension: the use
+ of a function call in vector-partitioned code in this test is not
+ currently supported.  */
+  vectorsize = 1;
+#endif
 
   for (ix = 0; ix < N; ix++)
 {
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
index e73ed6064eb..4b761f0f624 100644
--- a/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/loop-gwv-2.c
@@ -46,1

Re: [PATCH] rs6000: Support vector float/double for vec_sldw

2022-01-21 Thread Segher Boessenkool
Hi!

On Fri, Jan 21, 2022 at 11:31:34AM -0600, Bill Schmidt wrote:
> It was recently discovered that Clang supports a couple of variants of 
> vec_sldw that
> GCC does not.  After some discussion, we decided that these variants are 
> reasonable,
> and GCC will also support them.  This patch adds that support.

As we discussed, this is reasonable only because we already allow
non-integer inputs (and outputs) for all(?) other permute class
instructions.

> I updated an existing test and discovered it wasn't actually checking for 
> generation
> of the xxsldwi instruction, so I added that check as well.

It can always generate vsldoi instead, which is a strict superset (if
all registers used are VRs).  They will not likely be here, because
these are such simple functions, but that is a bit fragile.

>   * gcc.target/powerpc/builtins-4.c: Add two test variants.  Adjust
>   assembler counts.

Is there any justification for the new counts?

... Ah, it didn't count the sld's at all before.  Okay.

> @@ -161,6 +175,6 @@ test_sll_vuill_vuill_vuc (vector unsigned long long int x,
>  /* { dg-final { scan-assembler-times "xvnabssp"  1 } } */
>  /* { dg-final { scan-assembler-times "xvnabsdp"  1 } } */
>  /* { dg-final { scan-assembler-times "vslo"  4 } } */
> -/* { dg-final { scan-assembler-times "xxlor" 30 } } */
> +/* { dg-final { scan-assembler-times "xxlor" 32 } } */

This will need modification for the phase of the moon.  It also does not
even test only xxlor insn (also xxlorc insns, for example).

> +/* { dg-final { scan-assembler-times "xxsldwi"   10 } } */

Okay if you make this
  \mxxsldwi\M
or even
  \m(?:xxsldwi|vsldoi)\M

Thanks!


Segher


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-01-21 Thread Tobias Burnus

On 21.01.22 18:15, Thomas Schwinge wrote:

 source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:11:47:

11 | integer(c_int) function is_64bit_aligned (a) bind(C)
   |   1
 Warning: Variable ‘a’ at (1) is a dummy argument of the BIND(C) procedure 
‘is_64bit_aligned’ but may not be C interoperable [-Wc-binding-type]

Is that something to worry about?


I think it is not very elegant – but should be okay.

On the Fortran side:

integer(c_int) function is_64bit_aligned (a) bind(C)
  import :: c_int
  integer  :: a
end

that matches  'int is_64bit_aligned (int *a);'
While 'integer' in principle may not be 'int',
the call by reference makes this independent of the
actually used integer kind.

HOWEVER: That interface it not used! While it
defines that interface in 'module m', there is
no 'use m' in 'subroutine foo'.

(or alternatively: 'foo' being after 'contains' inside
the 'module m' - and then 'use m' in the main program)



That means that 'is_64bit_aligned(...)' gets implicitly
types as 'integer' with unknown arguments, which get
passed by value. By gfortran convention, that function
has a tailing underscore.

That matches the C side, which such an underscore:

int
is_64bit_aligned_ (uintptr_t a)
{
  return ( (a & 0x3f) == 0);
}

With pass by reference, a pointer is passed, which
should be handled by 'uintptr_t'.

 * * *

Side remark: I really recommend 'implicit none'
when writing Fortran code - which disables implicit
typing. I personally have started to use
  implicit none (type, external)
which also rejects 'call something()' unless
'something' has been explicitly declared, e.g. by
an interface block.

 * * *

Side remark: A Fortran-only variant has been used in
libgomp/testsuite/libgomp.fortran/alloc-11.f90:

if (mod (TRANSFER (p, iptr), 64) /= 0)

As optimization, also 'iand(..., z'3f') == 0' would work ;-)

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


[PATCH v3] c++: ICE with noexcept and canonical types [PR101715]

2022-01-21 Thread Marek Polacek via Gcc-patches
On Fri, Jan 21, 2022 at 09:27:17AM -0500, Jason Merrill wrote:
> On 1/20/22 20:03, Marek Polacek wrote:
> > @@ -2815,12 +2816,23 @@ fixup_deferred_exception_variants (tree type, tree 
> > raises)
> > cp_cv_quals var_quals = TYPE_QUALS (variant);
> > cp_ref_qualifier rqual = type_memfn_rqual (variant);
> > +   /* If VARIANT would become a dup (cp_check_qualified_type-wise)
> > +  of an existing variant in the variant list of TYPE after its
> > +  exception specification has been parsed, elide it.  Otherwise,
> > +  build_cp_fntype_variant could use it, leading to "canonical
> > +  types differ for identical types."  */
> > tree v = TYPE_MAIN_VARIANT (type);
> > for (; v; v = TYPE_NEXT_VARIANT (v))
> >   if (TYPE_CANONICAL (v) == v
> 
> I think we want to drop the TYPE_CANONICAL check here, and below change
> 
> TYPE_CANONICAL (variant) = v;
> 
> to
> 
> TYPE_CANONICAL (variant) = TYPE_CANONICAL (v);

OK.  I couldn't really find a way to test it; clang++ rejected
my attempts with "error: exception specifications are not allowed in
typedefs" so I'm not sure if I want to add such tests even though we
happen to accept it currently.
 
> so that this also works for e.g. signatures involving typedefs.
> 
> > + && v != variant
> 
> I think we don't need this check since we haven't changed
> TYPE_RAISES_EXCEPTIONS yet.

And variant will never be the main variant, because of the

  if (TYPE_RAISES_EXCEPTIONS (variant) == original)

check.  Ok, so the following should be enough:

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
This is a "canonical types differ for identical types" ICE, which started
with r11-4682.  It's a bit tricky to explain.  Consider:

  template  struct S {
S bar() noexcept(T::value);  // #1
S foo() noexcept(T::value);  // #2
  };

  template  S S::foo() noexcept(T::value) {}  // #3

We ICE because #3 and #2 have the same type, but their canonical types
differ: TYPE_CANONICAL (#3) == #2 but TYPE_CANONICAL (#2) == #1.

The member functions #1 and #2 have the same type.  However, since their
noexcept-specifier is deferred, when parsing them, we create a variant for
both of them, because DEFERRED_PARSE cannot be compared.  In other words,
build_cp_fntype_variant's

  tree v = TYPE_MAIN_VARIANT (type);
  for (; v; v = TYPE_NEXT_VARIANT (v))
if (cp_check_qualified_type (v, type, type_quals, rqual, raises, late))
  return v;

will *not* find an existing variant when creating a method_type for #2, so we
have to create a new one.

But then we perform delayed parsing and call fixup_deferred_exception_variants
for #1 and #2.  f_d_e_v will replace TYPE_RAISES_EXCEPTIONS with the newly
parsed noexcept-specifier.  It also sets TYPE_CANONICAL (#2) to #1.  Both
noexcepts turned out to be the same, so now we have two equivalent variants in
the list!  I.e.,

+-+  +-+  +-+
|  main   |  |  #2 |  |  #1 |
| S S::(S*) |->| S S::(S*) |->| S S::(S*) |->NULL
|-|  |  noex(T::value) |  |  noex(T::value) |
+-+  +-+  +-+

Then we get to #3.  As for #1 and #2, grokdeclarator calls build_memfn_type,
which ends up calling build_cp_fntype_variant, which will use the loop
above to look for an existing variant.  The first one that matches
cp_check_qualified_type will be used, so we use #2 rather than #1, and the
TYPE_CANONICAL mismatch follows.  Hopefully that makes sense.

As for the fix, I didn't think I could rewrite the method_type #2 with #1
because the type may have escaped via decltype.  So my approach is to
elide #2 from the list, so when looking for a matching variant, we always
find #1 (#2 remains live though, which admittedly sounds sort of dodgy).

PR c++/101715

gcc/cp/ChangeLog:

* tree.c (fixup_deferred_exception_variants): Remove duplicate
variants after parsing the exception specifications.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept72.C: New test.
* g++.dg/cpp0x/noexcept73.C: New test.
---
 gcc/cp/tree.cc  | 22 --
 gcc/testsuite/g++.dg/cpp0x/noexcept72.C | 21 +
 gcc/testsuite/g++.dg/cpp0x/noexcept73.C | 13 +
 3 files changed, 50 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept72.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept73.C

diff --git a/gcc/cp/tree.cc b/gcc/cp/tree.cc
index bcd44e73921..f88006aec4f 100644
--- a/gcc/cp/tree.cc
+++ b/gcc/cp/tree.cc
@@ -2804,8 +2804,9 @@ fixup_deferred_exception_variants (tree type, tree raises)
 
   /* Though sucky, this walk will process the canonical variants
  first.  */
+  tree prev = NULL_TREE;
   for (tree variant = TYPE_MAIN_VARIANT (type);
-   variant; variant = TYPE_NEXT_VARIANT (va

[PATCH] rs6000: Support vector float/double for vec_sldw

2022-01-21 Thread Bill Schmidt via Gcc-patches
Hi,

It was recently discovered that Clang supports a couple of variants of vec_sldw 
that
GCC does not.  After some discussion, we decided that these variants are 
reasonable,
and GCC will also support them.  This patch adds that support.

I updated an existing test and discovered it wasn't actually checking for 
generation
of the xxsldwi instruction, so I added that check as well.

Bootstrapped and tested on powerpc64le-linux-gnu with no regressions.  Is this 
okay
for trunk?

Thanks!
Bill


2022-01-21  Bill Schmidt  

gcc/
* config/rs6000/rs6000-overload.def (VEC_SLDW): Add instances for
vector float and vector double.

gcc/testsuite/
* gcc.target/powerpc/builtins-4.c: Add two test variants.  Adjust
assembler counts.
---
 gcc/config/rs6000/rs6000-overload.def |  4 +++
 gcc/testsuite/gcc.target/powerpc/builtins-4.c | 34 +--
 2 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-overload.def 
b/gcc/config/rs6000/rs6000-overload.def
index dea6f5d4258..cdc703e9764 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -3405,6 +3405,10 @@
 XXSLDWI_2DI  XXSLDWI_VSLL
   vull __builtin_vec_sldw (vull, vull, const int);
 XXSLDWI_2DI  XXSLDWI_VULL
+  vf __builtin_vec_sldw (vf, vf, const int);
+XXSLDWI_4SF  XXSLDWI_VF
+  vd __builtin_vec_sldw (vd, vd, const int);
+XXSLDWI_2DF  XXSLDWI_VD
 
 [VEC_SLL, vec_sll, __builtin_vec_sll]
   vsc __builtin_vec_sll (vsc, vuc);
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-4.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-4.c
index 4e3b543f242..df012e9b7d6 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-4.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-4.c
@@ -119,6 +119,18 @@ test_vul_sldw_vul_vul (vector unsigned long long x,
return vec_sldw (x, y, 3);
 }
 
+vector float
+test_vf_sldw_vf_vf (vector float x, vector float y)
+{
+  return vec_sldw (x, y, 3);
+}
+
+vector double
+test_vd_sldw_vd_vd (vector double x, vector double y)
+{
+  return vec_sldw (x, y, 1);
+}
+
 vector signed int long long
 test_sll_vsill_vsill_vuc (vector signed long long int x,
  vector unsigned char y)
@@ -146,14 +158,16 @@ test_sll_vuill_vuill_vuc (vector unsigned long long int x,
  test_slo_vsll_slo_vsll_vuc1 vslo
  test_slo_vull_slo_vull_vsc1 vslo
  test_slo_vull_slo_vull_vuc1 vslo
- test_vsc_sldw_vsc_vsc 1 xxlor
- test_vuc_sldw_vuc_vuc 1 xxlor
- test_vssi_sldw_vssi_vssi  1 xxlor
- test_vusi_sldw_vusi_vusi  1 xxlor
- test_vsi_sldw_vsi_vsi 1 xxlor
- test_vui_sldw_vui_vui 1 xxlor
- test_vsl_sldw_vsl_vsl 1 xxlor
- test_vul_sldw_vul_vul 1 xxlor
+ test_vsc_sldw_vsc_vsc 1 xxlor, 1 xxsldwi
+ test_vuc_sldw_vuc_vuc 1 xxlor, 1 xxsldwi
+ test_vssi_sldw_vssi_vssi  1 xxlor, 1 xxsldwi
+ test_vusi_sldw_vusi_vusi  1 xxlor, 1 xxsldwi
+ test_vsi_sldw_vsi_vsi 1 xxlor, 1 xxsldwi
+ test_vui_sldw_vui_vui 1 xxlor, 1 xxsldwi
+ test_vsl_sldw_vsl_vsl 1 xxlor, 1 xxsldwi
+ test_vul_sldw_vul_vul 1 xxlor, 1 xxsldwi
+ test_vf_sldw_vf_vf1 xxlor, 1 xxsldwi
+ test_vd_sldw_vd_vd1 xxlor, 1 xxsldwi
  test_sll_vsill_vsill_vuc  1 vsl
  test_sll_vuill_vuill_vuc  1 vsl  */
 
@@ -161,6 +175,6 @@ test_sll_vuill_vuill_vuc (vector unsigned long long int x,
 /* { dg-final { scan-assembler-times "xvnabssp"  1 } } */
 /* { dg-final { scan-assembler-times "xvnabsdp"  1 } } */
 /* { dg-final { scan-assembler-times "vslo"  4 } } */
-/* { dg-final { scan-assembler-times "xxlor" 30 } } */
+/* { dg-final { scan-assembler-times "xxlor" 32 } } */
 /* { dg-final { scan-assembler-times {\mvsl\M}   5 } } */
-
+/* { dg-final { scan-assembler-times "xxsldwi"   10 } } */
-- 
2.27.0




Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2022-01-21 Thread Thomas Schwinge
Hi Abid!

On 2022-01-11T22:31:54+, Hafiz Abid Qadeer  wrote:
> From d1fb55bff497a20e6feefa50bd03890e7a903c0e Mon Sep 17 00:00:00 2001
> From: Hafiz Abid Qadeer 
> Date: Fri, 24 Sep 2021 10:04:12 +0100
> Subject: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).
>
> This patch adds support for OpenMP 5.0 allocate clause for fortran. It does 
> not
> yet support the allocator-modifier as specified in OpenMP 5.1. The allocate
> clause is already supported in C/C++.

> libgomp/ChangeLog:
>
>   * testsuite/libgomp.fortran/allocate-1.c: New test.
>   * testsuite/libgomp.fortran/allocate-1.f90: New test.

I'm seeing this test case randomly/non-deterministically FAIL to execute,
differently on different systems and runs, for example:

libgomp:
libgomp:
libgomp: Out of memory allocating 4 bytesOut of memory allocating 4 bytes
libgomp:
libgomp:
libgomp: Out of memory allocating 168 bytes

libgomp: Out of memory allocating 4 bytes

libgomp: Out of memory allocating 4 bytes

libgomp: Out of memory allocating 4 bytes

I'd assume there's some concurrency issue: the problem disappears if I
manually specify a lowerish 'OMP_NUM_THREADS', and conversely, on a
system where I don't normally see the FAILs, I can trigger them with a
largish 'OMP_NUM_THREADS', such as 'OMP_NUM_THREADS=18' and higher.

For example:

Thread 10 "a.out" hit Breakpoint 1, omp_aligned_alloc (alignment=4, size=4, 
allocator=6326576) at [...]/source-gcc/libgomp/allocator.c:318
318   if (allocator_data)
(gdb) print *allocator_data
$1 = {memspace = omp_default_mem_space, alignment = 64, pool_size = 8192, 
used_pool_size = 8188, fb_data = omp_null_allocator, sync_hint = 3, access = 7, 
fallback = 12, pinned = 0, partition = 15}

Given the high 'used_pool_size', is that to be expected, and the test
case shouldn't be requesting "so much" memory?  Or might the problem
actually be in 'libgomp/allocator.c' (not touched by your commit)?

All but Thread 10 are in 'gomp_team_barrier_wait_end' -- should memory
have been released at that point?

(gdb) thread apply 10 bt

Thread 10 (Thread 0x732e2700 (LWP 1601318)):
#0  omp_aligned_alloc (alignment=4, size=4, allocator=6326576) at 
[...]/source-gcc/libgomp/allocator.c:320
#1  0x7790b4db in GOMP_alloc (alignment=4, size=4, 
allocator=6326576) at [...]/source-gcc/libgomp/allocator.c:364
#2  0x00401f3f in foo_._omp_fn.3 () at 
source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:136
#3  0x778f31e6 in gomp_thread_start (xdata=) at 
[...]/source-gcc/libgomp/team.c:129
#4  0x7789e609 in start_thread (arg=) at 
pthread_create.c:477
#5  0x777c5293 in clone () at 
../sysdeps/unix/sysv/linux/x86_64/clone.S:95
(gdb) thread apply 1 bt

Thread 1 (Thread 0x772ec1c0 (LWP 1601309)):
#0  futex_wait (val=96, addr=) at 
[...]/source-gcc/libgomp/config/linux/x86/futex.h:97
#1  do_wait (val=96, addr=) at 
[...]/source-gcc/libgomp/config/linux/wait.h:67
#2  gomp_team_barrier_wait_end (bar=, state=96) at 
[...]/source-gcc/libgomp/config/linux/bar.c:112
#3  0x00401f53 in foo_._omp_fn.3 () at 
source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:136
#4  0x778ea4f2 in GOMP_parallel (fn=0x401e6b , 
data=0x7fffd450, num_threads=18, flags=0) at 
[...]/source-gcc/libgomp/parallel.c:178
#5  0x004012ab in foo (x=42, p=..., q=..., px=2, h=6326576, fl=0) 
at source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:122
#6  0x004018e9 in MAIN__ () at 
source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:326

Manually compiling the test case, I see a lot of '-Wtabs' diagnostics
(can be ignored, I suppose), but also:

source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:11:47:

   11 | integer(c_int) function is_64bit_aligned (a) bind(C)
  |   1
Warning: Variable ‘a’ at (1) is a dummy argument of the BIND(C) procedure 
‘is_64bit_aligned’ but may not be C interoperable [-Wc-binding-type]

Is that something to worry about?

And:

source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:31:19:

   31 |   integer  :: n, n1, n2, n3, n4
  |   1
Warning: Unused variable ‘n1’ declared at (1) [-Wunused-variable]
source-gcc/libgomp/testsuite/libgomp.fortran/allocate-1.f90:18:27:

   18 | subroutine foo (x, p, q, px, h, fl)
  |   1
Warning: Unused dummy argument ‘px’ at (1) [-Wunused-dummy-argument]

For reference, quoting below the new Fortran test case.


Grüße
 Thomas


> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.c
> @@ -0,0 +1,7 @@
> +#include 
> +
> +int
> +is_64bit_aligned_ (uintptr_t a)
> +{
> +  return ( (a & 0x3f) == 0);
> +}

> --- /dev/null
> +++ b/libgomp/testsuite/libgomp.fortran/allocate-1.f90
> @@ -0,0 +1,333 @@
> +!

[pushed] c++: [[no_unique_address]] and virtual base [PR104139]

2022-01-21 Thread Jason Merrill via Gcc-patches
Fixing a thinko in my patch for 103681: when computing the size of a virtual
base, it would help to use its binfo instead of the one for the derived
class.

Tested x86_64-pc-linux-gnu, applying to trunk.

PR c++/104139
PR c++/103681

gcc/cp/ChangeLog:

* class.cc (end_of_class): Use base_binfo.

gcc/testsuite/ChangeLog:

* g++.dg/abi/no_unique_address2.C: Adjust to detect this on x86-64.
---
 gcc/cp/class.cc   |  4 +--
 gcc/testsuite/g++.dg/abi/no_unique_address2.C | 27 +++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc
index e5cc6f10e3b..5db3722ae46 100644
--- a/gcc/cp/class.cc
+++ b/gcc/cp/class.cc
@@ -6414,8 +6414,8 @@ end_of_class (tree t, eoc_mode mode)
   {
if (mode == eoc_nv_or_dsize)
  /* For dsize, don't count trailing empty bases.  */
- offset = size_binop (PLUS_EXPR, BINFO_OFFSET (binfo),
-  CLASSTYPE_SIZE_UNIT (BINFO_TYPE (binfo)));
+ offset = size_binop (PLUS_EXPR, BINFO_OFFSET (base_binfo),
+  CLASSTYPE_SIZE_UNIT (BINFO_TYPE (base_binfo)));
else
  offset = end_of_base (base_binfo);
if (tree_int_cst_lt (result, offset))
diff --git a/gcc/testsuite/g++.dg/abi/no_unique_address2.C 
b/gcc/testsuite/g++.dg/abi/no_unique_address2.C
index bef6d5b43be..3bb3f76ac92 100644
--- a/gcc/testsuite/g++.dg/abi/no_unique_address2.C
+++ b/gcc/testsuite/g++.dg/abi/no_unique_address2.C
@@ -41,3 +41,30 @@ struct B4
 #define SA(X) static_assert ((X), #X)
 SA (sizeof (B2) == sizeof (B1));
 SA (sizeof (B3) == sizeof (B4));
+
+namespace N2
+{
+  // C as big as _vptr to test PR c++/104139
+  struct C
+  {
+long c;
+  };
+
+  struct D: virtual C
+  {
+virtual void f();
+  };
+
+  struct B3: D
+  {
+char c2;
+  };
+
+  struct B4
+  {
+D d [[no_unique_address]];
+char c2;
+  };
+
+  SA (sizeof (B3) == sizeof (B4));
+}

base-commit: 45cae5b6392496028f35c5948f7fae0af81d135b
-- 
2.27.0



Re: [PATCH] libstdc++: detect mold linker.

2022-01-21 Thread Jonathan Wakely via Gcc-patches
On Fri, 21 Jan 2022 at 16:11, Martin Liška  wrote:
>
> Hi.
>
> This adds linker detection for mold in libstdc++-v3.
>
> Ready to be installed?

Yes, OK (but please CC the libstdc++ list, not just me).



> Thanks,
> Martin
>
> libstdc++-v3/ChangeLog:
>
> * acinclude.m4: Detect features for ld.mold linker.
> * configure: Regenerate.
> ---
>   libstdc++-v3/acinclude.m4 |  8 +-
>   libstdc++-v3/configure| 52 +++
>   2 files changed, 49 insertions(+), 11 deletions(-)
>
> diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
> index d996477254c..1a7d7a96050 100644
> --- a/libstdc++-v3/acinclude.m4
> +++ b/libstdc++-v3/acinclude.m4
> @@ -172,6 +172,7 @@ dnl  LD (as a side effect of testing)
>   dnl Sets:
>   dnl  with_gnu_ld
>   dnl  glibcxx_ld_is_gold (set to "no" or "yes")
> +dnl  glibcxx_ld_is_mold (set to "no" or "yes")
>   dnl  glibcxx_gnu_ld_version (possibly)
>   dnl
>   dnl The last will be a single integer, e.g., version 1.23.45.0.67.89 will
> @@ -204,11 +205,14 @@ AC_DEFUN([GLIBCXX_CHECK_LINKER_FEATURES], [
> # Start by getting the version number.  I think the libtool test already
> # does some of this, but throws away the result.
> glibcxx_ld_is_gold=no
> +  glibcxx_ld_is_mold=no
> if test x"$with_gnu_ld" = x"yes"; then
>   AC_MSG_CHECKING([for ld version])
>   changequote(,)
>   if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then
> glibcxx_ld_is_gold=yes
> +elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
> +  glibcxx_ld_is_mold=yes
>   fi
>   ldver=`$LD --version 2>/dev/null |
>sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
> @@ -220,7 +224,7 @@ AC_DEFUN([GLIBCXX_CHECK_LINKER_FEATURES], [
>
> # Set --gc-sections.
> glibcxx_have_gc_sections=no
> -  if test "$glibcxx_ld_is_gold" = "yes"; then
> +  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = 
> "yes" ; then
>   if $LD --help 2>/dev/null | grep gc-sections >/dev/null 2>&1; then
> glibcxx_have_gc_sections=yes
>   fi
> @@ -3796,6 +3800,8 @@ changequote([,])dnl
>   enable_symvers=no
> elif test $glibcxx_ld_is_gold = yes ; then
>   : All versions of gold support symbol versioning.
> +  elif test $glibcxx_ld_is_mold = yes ; then
> +: All versions of mold support symbol versioning.
> elif test $glibcxx_gnu_ld_version -lt $glibcxx_min_gnu_ld_version ; then
>   # The right tools, the right setup, but too old.  Fallbacks?
>   AC_MSG_WARN(=== Linker version $glibcxx_gnu_ld_version is too old for)
> diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
> index 4c20c669144..0b551b864ce 100755
> --- a/libstdc++-v3/configure
> +++ b/libstdc++-v3/configure
> @@ -22157,12 +22157,15 @@ with_gnu_ld=$lt_cv_prog_gnu_ld
> # Start by getting the version number.  I think the libtool test already
> # does some of this, but throws away the result.
> glibcxx_ld_is_gold=no
> +  glibcxx_ld_is_mold=no
> if test x"$with_gnu_ld" = x"yes"; then
>   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld version" >&5
>   $as_echo_n "checking for ld version... " >&6; }
>
>   if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then
> glibcxx_ld_is_gold=yes
> +elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
> +  glibcxx_ld_is_mold=yes
>   fi
>   ldver=`$LD --version 2>/dev/null |
>sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
> @@ -22175,7 +22178,7 @@ $as_echo "$glibcxx_gnu_ld_version" >&6; }
>
> # Set --gc-sections.
> glibcxx_have_gc_sections=no
> -  if test "$glibcxx_ld_is_gold" = "yes"; then
> +  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = 
> "yes" ; then
>   if $LD --help 2>/dev/null | grep gc-sections >/dev/null 2>&1; then
> glibcxx_have_gc_sections=yes
>   fi
> @@ -29371,12 +29374,15 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
> # Start by getting the version number.  I think the libtool test already
> # does some of this, but throws away the result.
> glibcxx_ld_is_gold=no
> +  glibcxx_ld_is_mold=no
> if test x"$with_gnu_ld" = x"yes"; then
>   { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld version" >&5
>   $as_echo_n "checking for ld version... " >&6; }
>
>   if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then
> glibcxx_ld_is_gold=yes
> +elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
> +  glibcxx_ld_is_mold=yes
>   fi
>   ldver=`$LD --version 2>/dev/null |
>sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
> @@ -29389,7 +29395,7 @@ $as_echo "$glibcxx_gnu_ld_version" >&6; }
>
> # Set --gc-sections.
> glibcxx_have_gc_sections=no
> -  if test "$glibcxx_ld_is_gold" = "yes"; then
> +  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = 

[PATCH] PR101260 regcprop: Add mode change check for copy reg

2022-01-21 Thread Andreas Krebbel via Gcc-patches
When propagating a multi-word register into an access with a smaller
mode the can_change_mode backend hook is already consulted for the
original register.  This however is also required for the intermediate
copy in copy_regno which might use a different register class.

Bootstrapped on x86_64 and s390x. No testsuite regressions.

Ok for mainline?

gcc/ChangeLog:

PR rtl-optimization/101260
* regcprop.cc (maybe_mode_change): Invoke mode_change_ok also for
copy_regno.
---
 gcc/regcprop.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/regcprop.cc b/gcc/regcprop.cc
index 1a9bcf0a1ad..8e966f2b5ac 100644
--- a/gcc/regcprop.cc
+++ b/gcc/regcprop.cc
@@ -426,7 +426,8 @@ maybe_mode_change (machine_mode orig_mode, machine_mode 
copy_mode,
 
   if (orig_mode == new_mode)
 return gen_raw_REG (new_mode, regno);
-  else if (mode_change_ok (orig_mode, new_mode, regno))
+  else if (mode_change_ok (orig_mode, new_mode, regno)
+  && mode_change_ok (copy_mode, new_mode, copy_regno))
 {
   int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
   int use_nregs = hard_regno_nregs (copy_regno, new_mode);
-- 
2.34.1



Re: [PATCH] libgccjit: Add support for sized integer types, including 128-bit integers [PR95325]

2022-01-21 Thread Antoni Boucher via Gcc-patches
David: this is the email I was talking about in my other email.
Here's the updated patch.

By the way, I find the usage of NUM_GCC_JIT_TYPES brittle. Would it be
better to switch to a new enum value for that instead?

See comments below.

Le jeudi 20 mai 2021 à 15:25 -0400, David Malcolm a écrit :
> On Tue, 2021-05-18 at 14:53 +0200, Jakub Jelinek via Jit wrote:
> > On Tue, May 18, 2021 at 08:23:56AM -0400, Antoni Boucher via Gcc-
> > patches wrote:
> > > Hello.
> > > This patch add support for sized integer types.
> > > Maybe it should check whether the size of a byte for the current
> > > platform is 8 bits and do other checks so that they're only
> > > available
> > > when it makes sense.
> > > What do you think?
> > 
> > Not a review, just a comment.  The 128-bit integral types are
> > available
> > only on some targets, the test e.g. the C/C++ FE do for those is
> > targetm.scalar_mode_supported_p (TImode)
> > and so even libgccjit shouldn't provide those types
> > unconditionally.
> > Similarly for the tests (though it could be guarded with e.g
> > #ifdef __SIZEOF_INT128__
> > in that case).
> > Also, while currently all in tree targets have BITS_PER_UNIT 8 and
> > therefore QImode is 8-bit, HImode 16-bit, SImode 32-bit and DImode
> > 64-
> > bit,
> > in the past and maybe in he future there can be targets that could
> > have
> > e.g. 16-bit or 32-bit QImode and then there wouldn't be any
> > uint8_t/int8_t
> > and int16_t would be intQImode_type_node etc.
> >   uint16_type_node = make_or_reuse_type (16, 1);
> >   uint32_type_node = make_or_reuse_type (32, 1);
> >   uint64_type_node = make_or_reuse_type (64, 1);
> >   if (targetm.scalar_mode_supported_p (TImode))
> >     uint128_type_node = make_or_reuse_type (128, 1);
> > are always with the given precisions, perhaps jit should use
> > signed_type_for (uint16_type_node) etc.?
> 
> I seem to have mislaid Antoni's original email (sorry), so I'll reply
> to Jakub's.
> 
> > 2021-05-18  Antoni Boucher  
> > 
> >     gcc/jit/
> >     PR target/95325
> >     * jit-playback.c: Add support for the sized integer
> > types.
> >     * jit-recording.c: Add support for the sized integer
> > types.
> >     * libgccjit.h (GCC_JIT_TYPE_UINT8_T,
> > GCC_JIT_TYPE_UINT16_T,
> >     GCC_JIT_TYPE_UINT32_T, GCC_JIT_TYPE_UINT64_T,
> >     GCC_JIT_TYPE_UINT128_T, GCC_JIT_TYPE_INT8_T,
> > GCC_JIT_TYPE_INT16_T,
> >     GCC_JIT_TYPE_INT32_T, GCC_JIT_TYPE_INT64_T,
> > GCC_JIT_TYPE_INT128_T):
> >     New enum variants for gcc_jit_types.
> >     gcc/testsuite/
> >     PR target/95325
> >     * jit.dg/test-types.c: Add tests for sized integer
> > types.
> 
> First a high-level question, why not use (or extend)
> gcc_jit_context_get_int_type instead?

If I remember correctly, I believe I had some issues with this
function, like having it return sometimes long long, and other times
long for the same size. Maybe that was an issue with a global variable
not cleaned up.

> 
> Do we really need to extend enum gcc_jit_types?  Is this a quality-
> of-
> life thing for users of the library?
> 
> That said, recording::context::get_int_type is currently a bit of a
> hack, and maybe could probably be improved by using the new enum
> values
> the patch adds.
> 
> IIRC, libgccjit.c does type-checking by comparing recording::type
> pointer values; does this patch gives us multiple equivalent types
> that
> ought to compare as equal?
> 
> If a user gets a type via GCC_JIT_TYPE_INT and gets "another" type
> via
> GCC_JIT_TYPE_INT32_T and they happen to be the same on the current
> target, should libgccjit complain if you use "int" when you meant
> "int32_t", or accept it?

I updated the function compatible_types to make them compare as equal.
I believe that it's not used everywhere though, so a cast will be
necessary in some cases.

> 
> Various comments inline below...
> 
> > diff --git a/gcc/jit/jit-playback.c b/gcc/jit/jit-playback.c
> > index c6136301243..40630aa1ab8 100644
> > --- a/gcc/jit/jit-playback.c
> > +++ b/gcc/jit/jit-playback.c
> > @@ -193,6 +193,27 @@ get_tree_node_for_type (enum gcc_jit_types
> > type_)
> >  case GCC_JIT_TYPE_UNSIGNED_INT:
> >    return unsigned_type_node;
> >  
> > +    case GCC_JIT_TYPE_UINT8_T:
> > +  return unsigned_intQI_type_node;
> > +    case GCC_JIT_TYPE_UINT16_T:
> > +  return uint16_type_node;
> > +    case GCC_JIT_TYPE_UINT32_T:
> > +  return uint32_type_node;
> > +    case GCC_JIT_TYPE_UINT64_T:
> > +  return uint64_type_node;
> > +    case GCC_JIT_TYPE_UINT128_T:
> > +  return uint128_type_node;
> > +    case GCC_JIT_TYPE_INT8_T:
> > +  return intQI_type_node;
> > +    case GCC_JIT_TYPE_INT16_T:
> > +  return intHI_type_node;
> > +    case GCC_JIT_TYPE_INT32_T:
> > +  return intSI_type_node;
> > +    case GCC_JIT_TYPE_INT64_T:
> > +  return intDI_type_node;
> > +    case GCC_JIT_TYPE_INT128_T:
> > +  return intTI_type_node;

[PATCH] libstdc++: detect mold linker.

2022-01-21 Thread Martin Liška

Hi.

This adds linker detection for mold in libstdc++-v3.

Ready to be installed?
Thanks,
Martin

libstdc++-v3/ChangeLog:

* acinclude.m4: Detect features for ld.mold linker.
* configure: Regenerate.
---
 libstdc++-v3/acinclude.m4 |  8 +-
 libstdc++-v3/configure| 52 +++
 2 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index d996477254c..1a7d7a96050 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -172,6 +172,7 @@ dnl  LD (as a side effect of testing)
 dnl Sets:
 dnl  with_gnu_ld
 dnl  glibcxx_ld_is_gold (set to "no" or "yes")
+dnl  glibcxx_ld_is_mold (set to "no" or "yes")
 dnl  glibcxx_gnu_ld_version (possibly)
 dnl
 dnl The last will be a single integer, e.g., version 1.23.45.0.67.89 will
@@ -204,11 +205,14 @@ AC_DEFUN([GLIBCXX_CHECK_LINKER_FEATURES], [
   # Start by getting the version number.  I think the libtool test already
   # does some of this, but throws away the result.
   glibcxx_ld_is_gold=no
+  glibcxx_ld_is_mold=no
   if test x"$with_gnu_ld" = x"yes"; then
 AC_MSG_CHECKING([for ld version])
 changequote(,)
 if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then
   glibcxx_ld_is_gold=yes
+elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
+  glibcxx_ld_is_mold=yes
 fi
 ldver=`$LD --version 2>/dev/null |
   sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
@@ -220,7 +224,7 @@ AC_DEFUN([GLIBCXX_CHECK_LINKER_FEATURES], [
 
   # Set --gc-sections.

   glibcxx_have_gc_sections=no
-  if test "$glibcxx_ld_is_gold" = "yes"; then
+  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = "yes" 
; then
 if $LD --help 2>/dev/null | grep gc-sections >/dev/null 2>&1; then
   glibcxx_have_gc_sections=yes
 fi
@@ -3796,6 +3800,8 @@ changequote([,])dnl
 enable_symvers=no
   elif test $glibcxx_ld_is_gold = yes ; then
 : All versions of gold support symbol versioning.
+  elif test $glibcxx_ld_is_mold = yes ; then
+: All versions of mold support symbol versioning.
   elif test $glibcxx_gnu_ld_version -lt $glibcxx_min_gnu_ld_version ; then
 # The right tools, the right setup, but too old.  Fallbacks?
 AC_MSG_WARN(=== Linker version $glibcxx_gnu_ld_version is too old for)
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 4c20c669144..0b551b864ce 100755
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -22157,12 +22157,15 @@ with_gnu_ld=$lt_cv_prog_gnu_ld
   # Start by getting the version number.  I think the libtool test already
   # does some of this, but throws away the result.
   glibcxx_ld_is_gold=no
+  glibcxx_ld_is_mold=no
   if test x"$with_gnu_ld" = x"yes"; then
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld version" >&5
 $as_echo_n "checking for ld version... " >&6; }
 
 if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then

   glibcxx_ld_is_gold=yes
+elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
+  glibcxx_ld_is_mold=yes
 fi
 ldver=`$LD --version 2>/dev/null |
   sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
@@ -22175,7 +22178,7 @@ $as_echo "$glibcxx_gnu_ld_version" >&6; }
 
   # Set --gc-sections.

   glibcxx_have_gc_sections=no
-  if test "$glibcxx_ld_is_gold" = "yes"; then
+  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = "yes" 
; then
 if $LD --help 2>/dev/null | grep gc-sections >/dev/null 2>&1; then
   glibcxx_have_gc_sections=yes
 fi
@@ -29371,12 +29374,15 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
   # Start by getting the version number.  I think the libtool test already
   # does some of this, but throws away the result.
   glibcxx_ld_is_gold=no
+  glibcxx_ld_is_mold=no
   if test x"$with_gnu_ld" = x"yes"; then
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for ld version" >&5
 $as_echo_n "checking for ld version... " >&6; }
 
 if $LD --version 2>/dev/null | grep 'GNU gold' >/dev/null 2>&1; then

   glibcxx_ld_is_gold=yes
+elif $LD --version 2>/dev/null | grep 'mold' >/dev/null 2>&1; then
+  glibcxx_ld_is_mold=yes
 fi
 ldver=`$LD --version 2>/dev/null |
   sed -e 's/[. ][0-9]\{8\}$//;s/.* \([^ ]\{1,\}\)$/\1/; q'`
@@ -29389,7 +29395,7 @@ $as_echo "$glibcxx_gnu_ld_version" >&6; }
 
   # Set --gc-sections.

   glibcxx_have_gc_sections=no
-  if test "$glibcxx_ld_is_gold" = "yes"; then
+  if test "$glibcxx_ld_is_gold" = "yes" || test "$glibcxx_ld_is_mold" = "yes" 
; then
 if $LD --help 2>/dev/null | grep gc-sections >/dev/null 2>&1; then
   glibcxx_have_gc_sections=yes
 fi
@@ -35303,12 +35309,15 @@ done
   # Start by getting the version number.  I think the libtool test already
   # does some of this, but throws away the result.
   glibcxx_ld_is_gold=no
+  glibcxx_ld_is_mold=no
   if test x"$with_gnu_ld" = x"yes"; then

[committed] libstdc++: Fix typo in comment

2022-01-21 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


libstdc++-v3/ChangeLog:

* testsuite/20_util/shared_ptr/cons/array.cc: Fix comment.
---
 libstdc++-v3/testsuite/20_util/shared_ptr/cons/array.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/testsuite/20_util/shared_ptr/cons/array.cc 
b/libstdc++-v3/testsuite/20_util/shared_ptr/cons/array.cc
index 7dc0c619333..d69d90bd71d 100644
--- a/libstdc++-v3/testsuite/20_util/shared_ptr/cons/array.cc
+++ b/libstdc++-v3/testsuite/20_util/shared_ptr/cons/array.cc
@@ -143,7 +143,7 @@ test08()
   VERIFY( count == 0 );
 }
 
-// Converting constrcutor
+// Converting constructor
 
 void
 test09()
-- 
2.31.1



[committed] libstdc++: Ensure all feature test macros have type long [PR87193]

2022-01-21 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


This defines all the __cpp_lib_xxx macros as type long, as required by
the standard. We had an inconsistent mix of int and long, sometimes even
for the same macro name.

The __cpp_lib_experimental_xxx macros are left as type int, because
that's what it says in the relevant TS specs.

libstdc++-v3/ChangeLog:

PR libstdc++/87193
PR libstdc++/104019
* include/bits/alloc_traits.h 
(__cpp_lib_allocator_traits_is_always_equal):
Define as type long.
* include/bits/allocator.h (__cpp_lib_incomplete_container_elements):
Likewise.
* include/bits/basic_string.h (__cpp_lib_string_udls): Likewise.
* include/bits/chrono.h (__cpp_lib_chrono): Likewise.
(__cpp_lib_chrono_udls): Likewise.
* include/bits/move.h (__cpp_lib_addressof_constexpr): Likewise.
* include/bits/node_handle.h (__cpp_lib_node_extract): Likewise.
* include/bits/range_access.h (__cpp_lib_nonmember_container_access):
Likewise.
* include/bits/shared_ptr.h (__cpp_lib_enable_shared_from_this):
Likewise.
* include/bits/stl_algo.h (__cpp_lib_clamp): Likewise.
(__cpp_lib_sample): Likewise.
* include/bits/stl_algobase.h (__cpp_lib_robust_nonmodifying_seq_ops):
Likewise.
* include/bits/stl_function.h (__cpp_lib_transparent_operators):
Likewise.
* include/bits/stl_iterator.h (__cpp_lib_make_reverse_iterator):
Likewise.
* include/bits/stl_map.h (__cpp_lib_map_try_emplace):
Likewise.
* include/bits/stl_tree.h (__cpp_lib_generic_associative_lookup):
Likewise.
* include/bits/unique_ptr.h (__cpp_lib_make_unique):
Likewise.
* include/bits/unordered_map.h (__cpp_lib_unordered_map_try_emplace):
Likewise.
* include/c_global/cmath (__cpp_lib_hypot): Likewise.
* include/c_global/cstddef (__cpp_lib_byte): Likewise.
* include/std/atomic (__cpp_lib_atomic_is_always_lock_free):
Likewise.
* include/std/complex (__cpp_lib_complex_udls): Likewise.
* include/std/filesystem (__cpp_lib_filesystem): Likewise.
* include/std/functional (__cpp_lib_not_fn): Likewise.
(__cpp_lib_boyer_moore_searcher): Likewise.
* include/std/iomanip (__cpp_lib_quoted_string_io): Likewise.
* include/std/mutex (__cpp_lib_scoped_lock): Likewise.
* include/std/numeric (__cpp_lib_gcd_lcm): Likewise.
(__cpp_lib_gcd, __cpp_lib_lcm): Likewise.
* include/std/tuple (__cpp_lib_apply): Likewise.
(__cpp_lib_make_from_tuple): Likewise.
* include/std/type_traits (__cpp_lib_integral_constant_callable)
(__cpp_lib_bool_constant, __cpp_lib_logical_traits)
(__cpp_lib_is_null_pointer, __cpp_lib_transformation_trait_aliases)
(__cpp_lib_result_of_sfinae, __cpp_lib_void_t)
(__cpp_lib_is_swappable, __cpp_lib_is_invocable)
(__cpp_lib_has_unique_object_representations)
(__cpp_lib_is_aggregate): Likewise.
* include/std/version: Likewise.
* libsupc++/new (__cpp_lib_launder): Likewise.
---
 libstdc++-v3/include/bits/alloc_traits.h  |   2 +-
 libstdc++-v3/include/bits/allocator.h |   2 +-
 libstdc++-v3/include/bits/basic_string.h  |   2 +-
 libstdc++-v3/include/bits/chrono.h|   4 +-
 libstdc++-v3/include/bits/move.h  |   2 +-
 libstdc++-v3/include/bits/node_handle.h   |   2 +-
 libstdc++-v3/include/bits/range_access.h  |   2 +-
 libstdc++-v3/include/bits/shared_ptr.h|   2 +-
 libstdc++-v3/include/bits/stl_algo.h  |   4 +-
 libstdc++-v3/include/bits/stl_algobase.h  |   2 +-
 libstdc++-v3/include/bits/stl_function.h  |   2 +-
 libstdc++-v3/include/bits/stl_iterator.h  |   2 +-
 libstdc++-v3/include/bits/stl_map.h   |   2 +-
 libstdc++-v3/include/bits/stl_tree.h  |   2 +-
 libstdc++-v3/include/bits/unique_ptr.h|   2 +-
 libstdc++-v3/include/bits/unordered_map.h |   2 +-
 libstdc++-v3/include/c_global/cmath   |   2 +-
 libstdc++-v3/include/c_global/cstddef |   2 +-
 libstdc++-v3/include/std/atomic   |   2 +-
 libstdc++-v3/include/std/complex  |   2 +-
 libstdc++-v3/include/std/filesystem   |   2 +-
 libstdc++-v3/include/std/functional   |   4 +-
 libstdc++-v3/include/std/iomanip  |   2 +-
 libstdc++-v3/include/std/mutex|   2 +-
 libstdc++-v3/include/std/numeric  |   6 +-
 libstdc++-v3/include/std/tuple|   4 +-
 libstdc++-v3/include/std/type_traits  |  22 ++---
 libstdc++-v3/include/std/version  | 100 +++---
 libstdc++-v3/libsupc++/new|   2 +-
 29 files changed, 94 insertions(+), 94 deletions(-)

diff --git a/libstdc++-v3/include/bits/alloc_traits.h 
b/libstdc++-v3/include/bits/alloc_traits.h
index fa93feb7198..a4d06d3fc7a 100644
--- a/libstdc++-v3/include/bits/alloc_traits.h
+++ b/libstdc++-v3/include/bits/alloc_traits.h

[committed] libstdc++: Fix condition for __cpp_lib_shared_ptr_arrays

2022-01-21 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


I changed the preprocessor condition from <= to < in r12-6574 which
meant the macro was not defined by  for C++17.

libstdc++-v3/ChangeLog:

* include/std/version (__cpp_lib_shared_ptr_arrays): Fix
condition for C++17 definition.
---
 libstdc++-v3/include/std/version | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/std/version b/libstdc++-v3/include/std/version
index e3afe57c782..64759c9bf24 100644
--- a/libstdc++-v3/include/std/version
+++ b/libstdc++-v3/include/std/version
@@ -51,7 +51,7 @@
 
 #if _GLIBCXX_HOSTED
 # define __cpp_lib_allocator_traits_is_always_equal 201411
-#if __cplusplus < 201703L // N.B. updated value in C++20
+#if __cplusplus <= 201703L // N.B. updated value in C++20
 # define __cpp_lib_shared_ptr_arrays 201611L
 #endif
 #endif
-- 
2.31.1



Re: [PATCH] PR middle-end/104140: bootstrap ICE on riscv.

2022-01-21 Thread Richard Biener via Gcc-patches



> Am 21.01.2022 um 15:59 schrieb Roger Sayle :
> 
> 
> 
> This patch resolves the P1 "ice-on-valid-code" regression boostrapping
> 
> GCC on risv-unknown-linux-gnu caused by my recent MULT_HIGHPART_EXPR
> 
> functionality.  RISC-V differs from x86_64 and many targets by
> 
> supporting a usmusidi3 instruction, basically a widening multiply
> 
> where one operand is signed and the other is unsigned.  Alas the
> 
> final version of my patch to recognize MULT_HIGHPART_EXPR didn't
> 
> sufficiently defend against the operands of WIDEN_MULT_EXPR having
> 
> different signedness.  This is fixed by the two-line change to
> 
> tree-ssa-math-opts.c's convert_mult_to_highpart in the patch below.
> 
> 
> 
> The majority of the rest of the patch is to the documentation
> 
> (in tree.def and generic.texi).  It turns out that WIDEN_MULT_EXPR
> 
> wasn't previously documented in generic.texi, let alone the slightly
> 
> unusual semantics of allowing mismatched (signed vs unsigned) operands.
> 
> This also clarifies that MULT_HIGHPART_EXPR currently requires the
> 
> signedness of operands to match [but this might change in future
> 
> release of GCC to support targets with usmul3_highpart.
> 
> 
> 
> The one final chunk of this patch (that is hopefully sufficiently
> 
> close to obvious for stage 4) is a similar (NULL pointer) sanity
> 
> check in riscv_cpu_cpp_builtins.  Currently running cc1 from the
> 
> command line (or from gdb) without specifying -march results in a
> 
> segmentation fault (ICE).  This is a minor annoyance tracking down
> 
> issues (in cross compilers) for riscv, and trivially fixed as below.
> 
> 
> 
> 
> 
> This patch has been tested both on x86_64-pc-linux-gnu with a full
> 
> make bootstrap and make -k check, and on a cross-compiler to
> 
> riscv-unknown-linux-gnu where I was able to confirm the new test
> 
> case now passes.  Ok for mainline?

Ok.
Thanks,
Richard 

> 
> 
> 
> 
> 2022-01-22  Roger Sayle  
> 
> 
> 
> gcc/ChangeLog
> 
> * tree-ssa-math-opts.c (convert_mult_to_highpart): Check that the
> 
> operands of the widening multiplication are either both signed or
> 
> both unsigned, and abort the conversion if mismatched.
> 
> * doc/generic.texi (WIDEN_MULT_EXPR): Describe expression node.
> 
> (MULT_HIGHPART_EXPR): Clarify that operands must have the same
> 
> signedness.
> 
> * tree.def (MULT_HIGHPART_EXPR): Document both operands must have
> 
> integer types with the same precision and signedness.
> 
> (WIDEN_MULT_EXPR): Document that operands must have integer types
> 
> with the same precision, but possibly differing signedness.
> 
> * config/riscv/risc-v.c (riscv_cpu_cpp_builtins): Defend against
> 
> riscv_current_subset_list returning a NULL pointer (empty list).
> 
> 
> 
> gcc/testsuite/ChangeLog
> 
> * gcc.target/riscv/pr104140.c: New test case.
> 
> 
> 
> 
> 
> Thanks in advance (and sorry for the inconvenience).
> 
> Roger
> 
> --
> 
> 
> 
> 
> 
> 


[PATCH] PR middle-end/104140: bootstrap ICE on riscv.

2022-01-21 Thread Roger Sayle
 

This patch resolves the P1 "ice-on-valid-code" regression boostrapping

GCC on risv-unknown-linux-gnu caused by my recent MULT_HIGHPART_EXPR

functionality.  RISC-V differs from x86_64 and many targets by

supporting a usmusidi3 instruction, basically a widening multiply

where one operand is signed and the other is unsigned.  Alas the

final version of my patch to recognize MULT_HIGHPART_EXPR didn't

sufficiently defend against the operands of WIDEN_MULT_EXPR having

different signedness.  This is fixed by the two-line change to

tree-ssa-math-opts.c's convert_mult_to_highpart in the patch below.

 

The majority of the rest of the patch is to the documentation

(in tree.def and generic.texi).  It turns out that WIDEN_MULT_EXPR

wasn't previously documented in generic.texi, let alone the slightly

unusual semantics of allowing mismatched (signed vs unsigned) operands.

This also clarifies that MULT_HIGHPART_EXPR currently requires the

signedness of operands to match [but this might change in future

release of GCC to support targets with usmul3_highpart.

 

The one final chunk of this patch (that is hopefully sufficiently

close to obvious for stage 4) is a similar (NULL pointer) sanity

check in riscv_cpu_cpp_builtins.  Currently running cc1 from the

command line (or from gdb) without specifying -march results in a

segmentation fault (ICE).  This is a minor annoyance tracking down

issues (in cross compilers) for riscv, and trivially fixed as below.

 

 

This patch has been tested both on x86_64-pc-linux-gnu with a full

make bootstrap and make -k check, and on a cross-compiler to

riscv-unknown-linux-gnu where I was able to confirm the new test

case now passes.  Ok for mainline?

 

 

2022-01-22  Roger Sayle  

 

gcc/ChangeLog

* tree-ssa-math-opts.c (convert_mult_to_highpart): Check that the

operands of the widening multiplication are either both signed or

both unsigned, and abort the conversion if mismatched.

* doc/generic.texi (WIDEN_MULT_EXPR): Describe expression node.

(MULT_HIGHPART_EXPR): Clarify that operands must have the same

signedness.

* tree.def (MULT_HIGHPART_EXPR): Document both operands must have

integer types with the same precision and signedness.

(WIDEN_MULT_EXPR): Document that operands must have integer types

with the same precision, but possibly differing signedness.

* config/riscv/risc-v.c (riscv_cpu_cpp_builtins): Defend against

riscv_current_subset_list returning a NULL pointer (empty list).

 

gcc/testsuite/ChangeLog

* gcc.target/riscv/pr104140.c: New test case.

 

 

Thanks in advance (and sorry for the inconvenience).

Roger

--

 

 

diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index 211472f..73c62f4 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -108,6 +108,9 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   builtin_define_with_int_value ("__riscv_arch_test", 1);
 
   const riscv_subset_list *subset_list = riscv_current_subset_list ();
+  if (!subset_list)
+return;
+
   size_t max_ext_len = 0;
 
   /* Figure out the max length of extension name for reserving buffer.   */
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index bb07775..bf27a0e 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1318,6 +1318,7 @@ The type of the node specifies the alignment of the 
access.
 @tindex PLUS_EXPR
 @tindex MINUS_EXPR
 @tindex MULT_EXPR
+@tindex WIDEN_MULT_EXPR
 @tindex MULT_HIGHPART_EXPR
 @tindex RDIV_EXPR
 @tindex TRUNC_DIV_EXPR
@@ -1532,10 +1533,18 @@ one operand is of floating type and the other is of 
integral type.
 The behavior of these operations on signed arithmetic overflow is
 controlled by the @code{flag_wrapv} and @code{flag_trapv} variables.
 
+@item WIDEN_MULT_EXPR
+This node represents a widening multiplication.  The operands have
+integral types with same @var{b} bits of precision, producing an
+integral type result with at least @math{2@var{b}} bits of precision.
+The behaviour is equivalent to extending both operands, possibly of
+different signedness, to the result type, then multiplying them.
+
 @item MULT_HIGHPART_EXPR
 This node represents the ``high-part'' of a widening multiplication.
 For an integral type with @var{b} bits of precision, the result is
 the most significant @var{b} bits of the full @math{2@var{b}} product.
+Both operands must have the same precision and same signedness.
 
 @item RDIV_EXPR
 This node represents a floating point division operation.
diff --git a/gcc/testsuite/gcc.target/riscv/pr104140.c 
b/gcc/testsuite/gcc.target/riscv/pr104140.c
new file mode 100644
index 000..648e131
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/pr104140.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv32im -mabi=ilp32" } */
+int x;
+unsigned u, v;
+void f (void)
+{
+  long long y = x;
+  u = y * v >> 32;
+}
+void g (void) { f (); }
+
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index 1b6a57b..28ed116 10

[PATCH][pushed] Enable configure detection of ld.mold.

2022-01-21 Thread Martin Liška

Hello.

The patch detects ld.mold linker and use it for
comdat_group=yes and gcc_cv_ld_hidden=yes.

Tested locally, I'm going to install it.

Martin

gcc/ChangeLog:

* configure.ac: Detect ld_is_mold and use it for
comdat_group=yes and gcc_cv_ld_hidden=yes.
* configure: Regenerate.
---
 gcc/configure| 18 ++
 gcc/configure.ac | 16 
 2 files changed, 34 insertions(+)

diff --git a/gcc/configure b/gcc/configure
index 3b228c3d9dc..bd4d4721868 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -23236,6 +23236,20 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_is_gold" >&5
 $as_echo "$ld_is_gold" >&6; }
 
+# Check to see if we are using mold instead of ld

+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using mold" 
>&5
+$as_echo_n "checking whether we are using mold... " >&6; }
+ld_is_mold=no
+if test x$gcc_cv_ld != x; then
+  if $gcc_cv_ld --version 2>/dev/null | sed 1q \
+ | grep "mold" > /dev/null; then
+ld_is_mold=yes
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ld_is_mold" >&5
+$as_echo "$ld_is_mold" >&6; }
+
+
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking gold linker with split stack support as 
non default" >&5
 $as_echo_n "checking gold linker with split stack support as non default... " 
>&6; }
 # Check to see if default ld is not gold, but gold is
@@ -24118,6 +24132,8 @@ else
   gcc_cv_ld_hidden=yes
   if test x"$ld_is_gold" = xyes; then
 :
+  elif test x"$ld_is_mold" = xyes; then
+:
   elif echo "$ld_ver" | grep GNU > /dev/null; then
 if test 0"$ld_date" -lt 20020404; then
   if test -n "$ld_date"; then
@@ -25197,6 +25213,8 @@ $as_echo "$gcc_cv_as_comdat_group_group" >&6; }
 fi
 if test x"$ld_is_gold" = xyes; then
   comdat_group=yes
+elif test x"$ld_is_mold" = xyes; then
+  comdat_group=yes
 elif test $in_tree_ld = yes ; then
   comdat_group=no
   if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 16 -o 
"$gcc_cv_gld_major_version" -gt 2 \
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 472d1c8c341..1171c946e6e 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -2671,6 +2671,18 @@ if test x$gcc_cv_ld != x; then
 fi
 AC_MSG_RESULT($ld_is_gold)
 
+# Check to see if we are using mold instead of ld

+AC_MSG_CHECKING(whether we are using mold)
+ld_is_mold=no
+if test x$gcc_cv_ld != x; then
+  if $gcc_cv_ld --version 2>/dev/null | sed 1q \
+ | grep "mold" > /dev/null; then
+ld_is_mold=yes
+  fi
+fi
+AC_MSG_RESULT($ld_is_mold)
+
+
 AC_MSG_CHECKING(gold linker with split stack support as non default)
 # Check to see if default ld is not gold, but gold is
 # available and has support for split stack.  If gcc was configured
@@ -3069,6 +3081,8 @@ else
   gcc_cv_ld_hidden=yes
   if test x"$ld_is_gold" = xyes; then
 :
+  elif test x"$ld_is_mold" = xyes; then
+:
   elif echo "$ld_ver" | grep GNU > /dev/null; then
 if test 0"$ld_date" -lt 20020404; then
   if test -n "$ld_date"; then
@@ -3538,6 +3552,8 @@ else
 fi
 if test x"$ld_is_gold" = xyes; then
   comdat_group=yes
+elif test x"$ld_is_mold" = xyes; then
+  comdat_group=yes
 elif test $in_tree_ld = yes ; then
   comdat_group=no
   if test "$gcc_cv_gld_major_version" -eq 2 -a "$gcc_cv_gld_minor_version" -ge 16 -o 
"$gcc_cv_gld_major_version" -gt 2 \
--
2.34.1



Re: [PATCH v2] c++: ICE with noexcept and canonical types [PR101715]

2022-01-21 Thread Jason Merrill via Gcc-patches

On 1/20/22 20:03, Marek Polacek wrote:

On Thu, Jan 20, 2022 at 03:23:24PM -0500, Jason Merrill wrote:

On 1/18/22 11:05, Marek Polacek wrote:

On Mon, Jan 17, 2022 at 01:48:48PM -0500, Jason Merrill wrote:

On 1/14/22 19:22, Marek Polacek wrote:

This is a "canonical types differ for identical types" ICE, which started
with r11-4682.  It's a bit tricky to explain.  Consider:

 template  struct S {
   S bar() noexcept(T::value);  // #1
   S foo() noexcept(T::value);  // #2
 };

 template  S S::foo() noexcept(T::value) {}  // #3

We ICE because #3 and #2 have the same type, but their canonical types
differ: TYPE_CANONICAL (#3) == #2 but TYPE_CANONICAL (#2) == #1.

The member functions #1 and #2 have the same type.  However, since their
noexcept-specifier is deferred, when parsing them, we create a variant for
both of them, because DEFERRED_PARSE cannot be compared.  In other words,
build_cp_fntype_variant's

 tree v = TYPE_MAIN_VARIANT (type);
 for (; v; v = TYPE_NEXT_VARIANT (v))
   if (cp_check_qualified_type (v, type, type_quals, rqual, raises, late))
 return v;

will *not* find an existing variant when creating a method_type for #2, so we
have to create a new one.

But then we perform delayed parsing and call fixup_deferred_exception_variants
for #1 and #2.  f_d_e_v will replace TYPE_RAISES_EXCEPTIONS with the newly
parsed noexcept-specifier.  It also sets TYPE_CANONICAL (#2) to #1.  Both
noexcepts turned out to be the same, so now we have two equivalent variants in
the list!  I.e.,

+-+  +-+  +-+
|  main   |  |  #2 |  |  #1 |
| S S::(S*) |->| S S::(S*) |->| S S::(S*) |->NULL
|-|  |  noex(T::value) |  |  noex(T::value) |
+-+  +-+  +-+

Then we get to #3.  As for #1 and #2, grokdeclarator calls build_memfn_type,
which ends up calling build_cp_fntype_variant, which will use the loop
above to look for an existing variant.  The first one that matches
cp_check_qualified_type will be used, so we use #2 rather than #1, and the
TYPE_CANONICAL mismatch follows.  Hopefully that makes sense.


Why doesn't the TYPE_CANONICAL (v) == v check prevent this?


In other words, I think you're asking: why did fixup_deferred_exception_variants
set TYPE_CANONICAL (#2) to #1 (which then differs from TYPE_CANONICAL (#3),
which is #2)?


I meant to ask why TYPE_CANONICAL (#3) got set to #2 instead of #1?

And to answer my own question, it's because the check I mention is in
fixup_deferred_exception_variants, and #3 doesn't go through there at all;
the loop in build_cp_fntype_variant assumes no duplicate variants, which
your patch fixes.


Right, fixup_deferred_exception_variants is only called for fn decls in
unparsed_noexcepts.


The method_type for #1 (I'll mark is as #1 here) is built with it being its own
canonical type.

The first call to fixup_deferred_exception_variants does not change it: in
there, VARIANT is #1, the loop with 'TYPE_CANONICAL (v) == v' cannot find
an existing variant that would match, so when we do

  v = build_cp_fntype_variant (TYPE_CANONICAL (variant),
   rqual, cr, false);
we get #1 so
  TYPE_CANONICAL (variant) = v;
is just
  TYPE_CANONICAL (#1) = #1;
so no change.

The second call to fixup_deferred_exception_variants: here we're working with
VARIANT #2.  Now we again scan the list of variants {main, #2, #1} where we
find a match for #2: #1.  #1's TYPE_CANONICAL is #1 as per above, so we set
  TYPE_CANONICAL (#2) = #1;
which I think is correct.


I think TYPE_CANONICAL (#3) should also be #1, not #2, which my patch attempts
to do.


Hope this explanation makes some sense, please ask away if it doesn't!


As for the fix, I didn't think I could rewrite the method_type #2 with #1
because the type may have escaped via decltype.  So my approach is to
elide #2 from the list, so when looking for a matching variant, we always
find #1 (#2 remains live though, which admittedly sounds sort of dodgy).

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk/11?

PR c++/101715

gcc/cp/ChangeLog:

* tree.c (fixup_deferred_exception_variants): Remove duplicate
variants after parsing the exception specifications.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/noexcept72.C: New test.
* g++.dg/cpp0x/noexcept73.C: New test.
---
gcc/cp/tree.c   | 16 +++-
gcc/testsuite/g++.dg/cpp0x/noexcept72.C | 21 +
gcc/testsuite/g++.dg/cpp0x/noexcept73.C | 13 +
3 files changed, 49 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept72.C
create mode 100644 gcc/testsuite/g++.dg/cpp0x/noexcept73.C

diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index 7f7de86b4e8..2efad49e7c1 100644
--- a/gcc/cp/tree.c
+++ b/g

Difference between 32-bit SPARCv9 and SPARCv8+?

2022-01-21 Thread John Paul Adrian Glaubitz
Hello!

I'm currently trying to solve a problem in LLVM which arises when building the 
compiler-rt
library on 32-bit SPARC [1].

More specifically, I'm getting a linker error which indicates that the target 
does not 64-bit
atomic operations natively and has to use libatomic:

/usr/bin/ld: warning: -z gnu-version-script-compat ignored
/usr/bin/ld: 
projects/compiler-rt/lib/sanitizer_common/CMakeFiles/RTSanitizerCommon.sparc.dir/sanitizer_libignore.cpp.o:
 in function `bool 
__sanitizer::atomic_compare_exchange_strong<__sanitizer::atomic_uint64_t>(__sanitizer::atomic_uint64_t
 volatile*, __sanitizer::atomic_uint64_t::Type*, 
__sanitizer::atomic_uint64_t::Type, __sanitizer::memory_order)':
/var/lib/buildbot/workers/debian-stadler-sparc64/clang-sparc64-linux-multistage/llvm/compiler-rt/lib/sanitizer_common/sanitizer_atomic_clang.h:80:
 undefined reference to `__sync_val_compare_and_swap_8'

While playing around with the compiler options trying to find a solution, I 
made an interesting
discovery which is that GCC support 64-bit compare and swap on SPARCv8plus but 
not on 32-bit
SPARCv9:

glaubitz@gcc202:~$ echo | gcc -mv8plus -E -dM -|grep -i SWAP
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
glaubitz@gcc202:~$ echo | gcc -mcpu=v9 -m32 -E -dM -|grep -i SWAP
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
#define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
glaubitz@gcc202:~$

Is this intentional? If yes, what is the exact difference between V8+ and 
32-bit V9?

Thanks,
Adrian

> [1] https://github.com/llvm/llvm-project/issues/53337

-- 
 .''`.  John Paul Adrian Glaubitz
: :' :  Debian Developer - glaub...@debian.org
`. `'   Freie Universitaet Berlin - glaub...@physik.fu-berlin.de
  `-GPG: 62FF 8A75 84E0 2956 9546  0006 7426 3B37 F5B5 F913


Re: [committed] aarch64: Adjust spellcheck tests for recent quotation-mark changes

2022-01-21 Thread Martin Liška

On 1/21/22 12:39, Richard Earnshaw wrote:

Martin's recent change to the way we handle quotation marks and
punctuation in error messages caused some fallout in the testsuite.
This patch updates the tests for the new output.


Thank you for the patch.

Martin


[PATCH] libstdc++: Fix aliasing violation in std::shared_ptr

2022-01-21 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux. Does anybody see a problem with this change?


The non-atomic store that sets both reference counts to zero uses a
type-punned pointer, which has undefined behaviour. We could use memset
to write 8 bytes, but we don't actually need it to be a single store
anyway. No other thread can observe the values, that's why it's safe to
use non-atomic stores in the first place. So we can just set each count
to zero.

With -fstore-merging (which is enabled by default at -O2) GCC produces
the same code for this as for memset or the type punned store. Clang
does that store merging even at -O1.

libstdc++-v3/ChangeLog:

* include/bits/shared_ptr_base.h (_Sp_counted_base<>::_M_release):
Set members to zero without type punning.
---
 libstdc++-v3/include/bits/shared_ptr_base.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libstdc++-v3/include/bits/shared_ptr_base.h 
b/libstdc++-v3/include/bits/shared_ptr_base.h
index 5b8f84b65be..b2f955b41f7 100644
--- a/libstdc++-v3/include/bits/shared_ptr_base.h
+++ b/libstdc++-v3/include/bits/shared_ptr_base.h
@@ -340,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
  // we are releasing the last strong reference. No other
  // threads can observe the effects of this _M_release()
  // call (e.g. calling use_count()) without a data race.
- *(long long*)(&_M_use_count) = 0;
+ _M_weak_count = _M_use_count = 0;
  _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_use_count);
  _GLIBCXX_SYNCHRONIZATION_HAPPENS_AFTER(&_M_weak_count);
  _M_dispose();
-- 
2.31.1



Re: [PATCH] tree-optimization/100089 - BB vectorization of if-converted loop bodies

2022-01-21 Thread Richard Biener via Gcc-patches
On Fri, 21 Jan 2022, Richard Sandiford wrote:

> Richard Biener  writes:
> > The PR complains that when we only partially BB vectorize an
> > if-converted loop body that this can leave unvectorized code
> > unconditionally executed and thus effectively slow down code.
> > For -O2 we already mitigated the issue by not doing BB vectorization
> > when not all if-converted stmts were covered but the issue is
> > present with -O3 as well.  Thus the following simply extends the
> > fix to cover all but the unlimited cost models.  It is after all
> > very likely that we vectorize some stmts, if only a single
> > paired store.
> >
> > Bootstrap & regtest running on x86_64-unknown-linux-gnu, OK?
> >
> > Thanks,
> > Richard.
> >
> > 2022-01-21  Richard Biener  
> >
> > PR tree-optimization/100089
> > * tree-vect-slp.cc (vect_slp_region): Reject BB vectorization
> > of if-converted loops with unvectorized COND_EXPRs for
> > all but the unlimited cost models.
> > ---
> >  gcc/tree-vect-slp.cc | 5 ++---
> >  1 file changed, 2 insertions(+), 3 deletions(-)
> >
> > diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> > index 5ffb63689f2..430da281e18 100644
> > --- a/gcc/tree-vect-slp.cc
> > +++ b/gcc/tree-vect-slp.cc
> > @@ -5907,8 +5907,7 @@ vect_slp_region (vec bbs, 
> > vec datarefs,
> > }
> >  
> >   /* When we're vectorizing an if-converted loop body with the
> > -very-cheap cost model make sure we vectorized all if-converted
> > -code.  */
> > +make sure we vectorized all if-converted code.  */
> 
> Nit: keeps too much of the old comment (“with the”).

Oops, fixed and pushed.

Richard.


[committed] libstdc++: Fix constexpr constructor for atomic>

2022-01-21 Thread Jonathan Wakely via Gcc-patches
Tested powerpc64le-linux, pushed to trunk.


libstdc++-v3/ChangeLog:

* include/bits/shared_ptr_atomic.h (_Sp_atomic::_Atomic_count):
Add constexpr.
(_Sp_atomic::_M_ptr): Add default member-initializer.
* testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc:
Check constant initialization.
* testsuite/20_util/weak_ptr/atomic_weak_ptr.cc: Likewise.
---
 libstdc++-v3/include/bits/shared_ptr_atomic.h | 4 ++--
 .../testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc  | 3 +++
 libstdc++-v3/testsuite/20_util/weak_ptr/atomic_weak_ptr.cc| 3 +++
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/libstdc++-v3/include/bits/shared_ptr_atomic.h 
b/libstdc++-v3/include/bits/shared_ptr_atomic.h
index 35f781dc9a0..9e4df7da7f8 100644
--- a/libstdc++-v3/include/bits/shared_ptr_atomic.h
+++ b/libstdc++-v3/include/bits/shared_ptr_atomic.h
@@ -357,7 +357,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
// Ensure we can use the LSB as the lock bit.
static_assert(alignof(remove_pointer_t) > 1);
 
-   _Atomic_count() : _M_val(0) { }
+   constexpr _Atomic_count() noexcept = default;
 
explicit
_Atomic_count(__count_type&& __c) noexcept
@@ -457,7 +457,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
static constexpr uintptr_t _S_lock_bit{1};
   };
 
-  typename _Tp::element_type* _M_ptr;
+  typename _Tp::element_type* _M_ptr = nullptr;
   _Atomic_count _M_refcount;
 
   static typename _Atomic_count::pointer
diff --git 
a/libstdc++-v3/testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc 
b/libstdc++-v3/testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc
index 725e7ba5031..1f97224bf6a 100644
--- a/libstdc++-v3/testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc
+++ b/libstdc++-v3/testsuite/20_util/shared_ptr/atomic/atomic_shared_ptr.cc
@@ -16,6 +16,9 @@
 
 #include 
 
+// Check constexpr constructor.
+constinit std::atomic> a;
+
 void
 test_is_lock_free()
 {
diff --git a/libstdc++-v3/testsuite/20_util/weak_ptr/atomic_weak_ptr.cc 
b/libstdc++-v3/testsuite/20_util/weak_ptr/atomic_weak_ptr.cc
index e394e5562b9..b38cea768c8 100644
--- a/libstdc++-v3/testsuite/20_util/weak_ptr/atomic_weak_ptr.cc
+++ b/libstdc++-v3/testsuite/20_util/weak_ptr/atomic_weak_ptr.cc
@@ -8,6 +8,9 @@
 #include 
 #include 
 
+// Check constexpr constructor.
+constinit std::atomic> a;
+
 void
 test_is_lock_free()
 {
-- 
2.31.1



[PATCH] sanitizer/99673 - bad -Wstringop-overread diagnostic with asan

2022-01-21 Thread Richard Biener via Gcc-patches
The testcase got fixed by lowering of &MEM[ptr + CST] to ptr + CST.

Pushed.

2022-01-21  Richard Biener  

PR sanitizer/99673
* gcc.dg/asan/pr99673.c: New testcase.
---
 gcc/testsuite/gcc.dg/asan/pr99673.c | 27 +++
 1 file changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/asan/pr99673.c

diff --git a/gcc/testsuite/gcc.dg/asan/pr99673.c 
b/gcc/testsuite/gcc.dg/asan/pr99673.c
new file mode 100644
index 000..05857fd46c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/asan/pr99673.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Wstringop-overread" } */
+
+struct B {
+  int i;
+  struct A {
+short sa[8];
+  } a[2];
+};
+
+struct C {
+  char n, ax[];
+};
+
+struct D { int i, j, k; };
+
+int f (const short[8]);
+
+void g (struct C *pc, struct D *pd, int i)
+{
+  struct B *pb = (void *)pc->ax;
+  pd->i = pb->i;
+
+  const short *psa = pb->a[i].sa;
+  if (f (psa))
+return;
+}
-- 
2.31.1


Re: [PATCH] tree-optimization/100089 - BB vectorization of if-converted loop bodies

2022-01-21 Thread Richard Sandiford via Gcc-patches
Richard Biener  writes:
> The PR complains that when we only partially BB vectorize an
> if-converted loop body that this can leave unvectorized code
> unconditionally executed and thus effectively slow down code.
> For -O2 we already mitigated the issue by not doing BB vectorization
> when not all if-converted stmts were covered but the issue is
> present with -O3 as well.  Thus the following simply extends the
> fix to cover all but the unlimited cost models.  It is after all
> very likely that we vectorize some stmts, if only a single
> paired store.
>
> Bootstrap & regtest running on x86_64-unknown-linux-gnu, OK?
>
> Thanks,
> Richard.
>
> 2022-01-21  Richard Biener  
>
>   PR tree-optimization/100089
>   * tree-vect-slp.cc (vect_slp_region): Reject BB vectorization
>   of if-converted loops with unvectorized COND_EXPRs for
>   all but the unlimited cost models.
> ---
>  gcc/tree-vect-slp.cc | 5 ++---
>  1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
> index 5ffb63689f2..430da281e18 100644
> --- a/gcc/tree-vect-slp.cc
> +++ b/gcc/tree-vect-slp.cc
> @@ -5907,8 +5907,7 @@ vect_slp_region (vec bbs, 
> vec datarefs,
>   }
>  
> /* When we're vectorizing an if-converted loop body with the
> -  very-cheap cost model make sure we vectorized all if-converted
> -  code.  */
> +  make sure we vectorized all if-converted code.  */

Nit: keeps too much of the old comment (“with the”).

But yeah, LGTM otherwise FWIW.

Thanks,
Richard

> if (!profitable_subgraphs.is_empty ()
> && orig_loop)
>   {
> @@ -5924,7 +5923,7 @@ vect_slp_region (vec bbs, 
> vec datarefs,
> gimple_set_visited (gsi_stmt (gsi), false);
> continue;
>   }
> -   if (flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
> +   if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED)
>   continue;
>  
> if (gassign *ass = dyn_cast  (gsi_stmt (gsi)))


[PATCH] tree-optimization/100089 - BB vectorization of if-converted loop bodies

2022-01-21 Thread Richard Biener via Gcc-patches
The PR complains that when we only partially BB vectorize an
if-converted loop body that this can leave unvectorized code
unconditionally executed and thus effectively slow down code.
For -O2 we already mitigated the issue by not doing BB vectorization
when not all if-converted stmts were covered but the issue is
present with -O3 as well.  Thus the following simply extends the
fix to cover all but the unlimited cost models.  It is after all
very likely that we vectorize some stmts, if only a single
paired store.

Bootstrap & regtest running on x86_64-unknown-linux-gnu, OK?

Thanks,
Richard.

2022-01-21  Richard Biener  

PR tree-optimization/100089
* tree-vect-slp.cc (vect_slp_region): Reject BB vectorization
of if-converted loops with unvectorized COND_EXPRs for
all but the unlimited cost models.
---
 gcc/tree-vect-slp.cc | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 5ffb63689f2..430da281e18 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -5907,8 +5907,7 @@ vect_slp_region (vec bbs, 
vec datarefs,
}
 
  /* When we're vectorizing an if-converted loop body with the
-very-cheap cost model make sure we vectorized all if-converted
-code.  */
+make sure we vectorized all if-converted code.  */
  if (!profitable_subgraphs.is_empty ()
  && orig_loop)
{
@@ -5924,7 +5923,7 @@ vect_slp_region (vec bbs, 
vec datarefs,
  gimple_set_visited (gsi_stmt (gsi), false);
  continue;
}
- if (flag_vect_cost_model != VECT_COST_MODEL_VERY_CHEAP)
+ if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED)
continue;
 
  if (gassign *ass = dyn_cast  (gsi_stmt (gsi)))
-- 
2.31.1


RE: [PATCH v6 1/1] [ARM] Add support for TLS register based stack protector canary access

2022-01-21 Thread Kyrylo Tkachov via Gcc-patches


> -Original Message-
> From: Ard Biesheuvel 
> Sent: Friday, January 21, 2022 10:50 AM
> To: Kyrylo Tkachov 
> Cc: linux-harden...@vger.kernel.org; Richard Earnshaw
> ; Richard Sandiford
> ; thomas.preudho...@celest.fr; Keith
> Packard ; gcc-patches@gcc.gnu.org
> Subject: Re: [PATCH v6 1/1] [ARM] Add support for TLS register based stack
> protector canary access
> 
> On Fri, 21 Jan 2022 at 11:47, Kyrylo Tkachov 
> wrote:
> >
> > > -Original Message-
> > > From: Gcc-patches  > > bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Ard
> > > Biesheuvel via Gcc-patches
> > > Sent: Wednesday, January 19, 2022 5:44 PM
> > > To: linux-harden...@vger.kernel.org
> > > Cc: Richard Earnshaw ; Richard Sandiford
> > > ; thomas.preudho...@celest.fr; Keith
> > > Packard ; gcc-patches@gcc.gnu.org; Kyrylo
> Tkachov
> > > ; Ard Biesheuvel 
> > > Subject: [PATCH v6 1/1] [ARM] Add support for TLS register based stack
> > > protector canary access
> > >
> > > Add support for accessing the stack canary value via the TLS register,
> > > so that multiple threads running in the same address space can use
> > > distinct canary values. This is intended for the Linux kernel running in
> > > SMP mode, where processes entering the kernel are essentially threads
> > > running the same program concurrently: using a global variable for the
> > > canary in that context is problematic because it can never be rotated,
> > > and so the OS is forced to use the same value as long as it remains up.
> > >
> > > Using the TLS register to index the stack canary helps with this, as it
> > > allows each CPU to context switch the TLS register along with the rest
> > > of the process, permitting each process to use its own value for the
> > > stack canary.
> > >
> > > 2022-01-19 Ard Biesheuvel 
> > >
> > >   * config/arm/arm-opts.h (enum stack_protector_guard): New
> > >   * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem):
> > >   New
> > >   * config/arm/arm.cc (TARGET_STACK_PROTECT_GUARD): Define
> > >   (arm_option_override_internal): Handle and put in error checks
> > >   for stack protector guard options.
> > >   (arm_option_reconfigure_globals): Likewise
> > >   (arm_stack_protect_tls_canary_mem): New
> > >   (arm_stack_protect_guard): New
> > >   * config/arm/arm.md (stack_protect_set): New
> > >   (stack_protect_set_tls): Likewise
> > >   (stack_protect_test): Likewise
> > >   (stack_protect_test_tls): Likewise
> > >   (reload_tp_hard): Likewise
> > >   * config/arm/arm.opt (-mstack-protector-guard): New
> > >   (-mstack-protector-guard-offset): New.
> > >   * doc/invoke.texi: Document new options
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > >   * gcc.target/arm/stack-protector-7.c: New test.
> > >   * gcc.target/arm/stack-protector-8.c: New test.
> > >
> > > Signed-off-by: Ard Biesheuvel 
> >
> > Thanks. One final bit. Given that you're using the Signed-off-by tag this
> means that you're contributing this patch under the DCO rules.
> > Can you please confirm that you intend to contribute this patch under the
> rules in https://gcc.gnu.org/dco.html
> 
> Yes, I am making this contribution under DCO 1.1 terms.
> 
> > If you're happy with that I'll push the patch for you.
> > Thanks,
> > Kyrill
> >
> 
> Thanks!

I've now pushed it to trunk.
Thank you for your patience and responsiveness and apologies for the delay in 
the reviews.

Thanks,
Kyrill

> 
> 
> > > ---
> > >  gcc/config/arm/arm-opts.h|  6 ++
> > >  gcc/config/arm/arm-protos.h  |  2 +
> > >  gcc/config/arm/arm.cc| 55 +++
> > >  gcc/config/arm/arm.md| 71 
> > > +++-
> > >  gcc/config/arm/arm.opt   | 22 ++
> > >  gcc/doc/invoke.texi  | 11 +++
> > >  gcc/testsuite/gcc.target/arm/stack-protector-7.c | 12 
> > >  gcc/testsuite/gcc.target/arm/stack-protector-8.c |  7 ++
> > >  8 files changed, 184 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
> > > index c50d5e56a181..24d12fafdec8 100644
> > > --- a/gcc/config/arm/arm-opts.h
> > > +++ b/gcc/config/arm/arm-opts.h
> > > @@ -69,4 +69,10 @@ enum arm_tls_type {
> > >TLS_GNU,
> > >TLS_GNU2
> > >  };
> > > +
> > > +/* Where to get the canary for the stack protector.  */
> > > +enum stack_protector_guard {
> > > +  SSP_TLSREG,  /* per-thread canary in TLS register */
> > > +  SSP_GLOBAL   /* global canary */
> > > +};
> > >  #endif
> > > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> > > index cd55a9f6ca54..881c72c988bd 100644
> > > --- a/gcc/config/arm/arm-protos.h
> > > +++ b/gcc/config/arm/arm-protos.h
> > > @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum
> rtx_code,
> > > rtx, rtx, rtx, rtx, rtx, rtx);
> > >  extern

Re: [PATCH] Reset relations when crossing backedges.

2022-01-21 Thread Aldy Hernandez via Gcc-patches
On Fri, Jan 21, 2022 at 11:56 AM Richard Biener
 wrote:
>
> On Fri, Jan 21, 2022 at 11:30 AM Aldy Hernandez  wrote:
> >
> > On Fri, Jan 21, 2022 at 10:43 AM Richard Biener
> >  wrote:
> > >
> > > On Fri, Jan 21, 2022 at 9:30 AM Aldy Hernandez via Gcc-patches
> > >  wrote:
> > > >
> > > > As discussed in PR103721, the problem here is that we are crossing a
> > > > backedge and causing us to use relations from a previous iteration of a
> > > > loop.
> > > >
> > > > This handles the testcases in both PR103721 and PR104067 which are 
> > > > variants
> > > > of the same thing.
> > > >
> > > > Tested on x86-64 Linux with the usual regstrap as well as verifying the
> > > > thread count before and after the patch.  The number of threads is
> > > > reduced by a miniscule amount.
> > > >
> > > > I assume we need release manager approval at this point?  OK for trunk?
> > >
> > > Not for regression fixes.
> >
> > OK, I've pushed it to fix the P1s.  We can continue refining the
> > solution in a follow-up patch.
> >
> > >
> > > Btw, I wonder whether you have to treat irreducible regions in the same
> > > way more generally - which edges are marked as backedge there depends
> > > on which edge into the region was visited first.  I also wonder how we
> >
> > Jeff, Andrew??
> >
> > > I also wonder how we guarantee that all users of the resolve mode have 
> > > backedges marked
> > > properly?  Also note that CFG manipulation routines in general do not
> > > keep backedge markings up-to-date so incremental modification and
> > > resolving queries might not mix.
> > >
> > > It's a bit unfortunate that we can't query the CFG on whether backedges
> > > are marked.
> >
> > Ughh.  The call to mark_dfs_back_edges is currently in the backward
> > threader.  Perhaps we could put it in the path_range_query
> > constructor?  That way other users of path_range_query can benefit
> > (loop_ch for instance, though it doesn't use it in a way that crosses
> > backedges so perhaps it's unaffected).  Does that sound reasonable?
>
> Hmm, I'd rather keep the burden on the callers because many already
> should have backedges marked.  What you could instead do is
> add sth like
>
>   if (flag_checking)
> {
>auto_edge_flag saved_dfs_back;
>for-each-edge-in-cfg () set saved_dfs_back flag if dfs_back is
> set, clear dfs_back
>mark_dfs_back_edges ();
>for-each-edge-in-cfg () verify the flags are set on the same
> edges and clear saved_dfs_back
> }
>
> to the path_range_query constructor.  That way we at least notice when passes
> do _not_ have the backedges marked properly.

Sounds good.  Thanks.

I've put the verifier by mark_dfs_back_edges(), since it really has
nothing to do with the path solver.  Perhaps it's useful for someone
else.

The patch triggered with the loop-ch use, so I've added a
corresponding mark_dfs_back_edges there.

OK pending tests?

Aldy
From 627237b45cb3aadbb009f4d077475a8b0c13c987 Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Fri, 21 Jan 2022 13:04:20 +0100
Subject: [PATCH] Assert that backedges are available in path solver.

gcc/ChangeLog:

	* cfganal.cc (dfs_back_edges_available_p): New.
	* cfganal.h (dfs_back_edges_available_p): New.
	* gimple-range-path.cc (path_range_query::path_range_query):
	Verify freshness of back edges.
	* tree-ssa-loop-ch.cc (ch_base::copy_headers): Call
	mark_dfs_back_edges.
	* tree-ssa-threadbackward.cc (back_threader::back_threader): Move
	path_range_query construction after backedges have been
	updated.
---
 gcc/cfganal.cc | 37 ++
 gcc/cfganal.h  |  1 +
 gcc/gimple-range-path.cc   |  2 ++
 gcc/tree-ssa-loop-ch.cc|  2 ++
 gcc/tree-ssa-threadbackward.cc |  2 +-
 5 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/gcc/cfganal.cc b/gcc/cfganal.cc
index e570d27768b..361d80da100 100644
--- a/gcc/cfganal.cc
+++ b/gcc/cfganal.cc
@@ -135,6 +135,43 @@ mark_dfs_back_edges (void)
   return found;
 }
 
+/* Return TRUE if EDGE_DFS_BACK is up to date for CFUN.  */
+
+bool
+dfs_back_edges_available_p ()
+{
+  auto_edge_flag saved_dfs_back (cfun);
+  basic_block bb;
+  edge e;
+  edge_iterator ei;
+  bool ret = true;
+
+  // Save all the back edges...
+  FOR_EACH_BB_FN (bb, cfun)
+FOR_EACH_EDGE (e, ei, bb->succs)
+  {
+	if (e->flags & EDGE_DFS_BACK)
+	  {
+	e->flags |= saved_dfs_back;
+	e->flags &= ~EDGE_DFS_BACK;
+	  }
+  }
+
+  // ...and verify that recalculating them agrees with the saved ones.
+  mark_dfs_back_edges ();
+  FOR_EACH_BB_FN (bb, cfun)
+FOR_EACH_EDGE (e, ei, bb->succs)
+  {
+	if (((e->flags & EDGE_DFS_BACK) != 0)
+	!= ((e->flags & saved_dfs_back) != 0))
+	  ret = false;
+
+	e->flags &= ~saved_dfs_back;
+  }
+
+  return ret;
+}
+
 /* Find unreachable blocks.  An unreachable block will have 0 in
the reachable bit in block->flags.  A nonzero value indicates the
block is reachable.  */
diff --git a/gcc/cfganal.h b/gcc/cf

[committed] aarch64: Adjust spellcheck tests for recent quotation-mark changes

2022-01-21 Thread Richard Earnshaw via Gcc-patches
Martin's recent change to the way we handle quotation marks and
punctuation in error messages caused some fallout in the testsuite.
This patch updates the tests for the new output.



Adjust tests for quotation-mark and punctuation changes.

gcc/testsuite:

* gcc.target/aarch64/spellcheck_1.c: Adjust tests for new output.
* gcc.target/aarch64/spellcheck_2.c: Likewise.
* gcc.target/aarch64/spellcheck_3.c: Likewise.
* gcc.target/aarch64/spellcheck_7.c: Likewise.
---
 gcc/testsuite/gcc.target/aarch64/spellcheck_1.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/spellcheck_2.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/spellcheck_3.c | 2 +-
 gcc/testsuite/gcc.target/aarch64/spellcheck_7.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
index a0795c1cc53..8654674fc67 100644
--- a/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_1.c
@@ -4,6 +4,6 @@ __attribute__((target ("arch=armv8-a-typo"))) void
 foo ()
 {
   /* { dg-message "valid arguments are: \[^\n\r]*(; did you mean 'armv*'?)?"  "" { target *-*-* } .-1 } */
-  /* { dg-error "invalid name \\(\"armv8-a-typo\"\\) in 'target\\(\"arch=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
+  /* { dg-error "invalid name \\('armv8-a-typo'\\) in 'target\\(\"arch=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
   /* { dg-error "pragma or attribute 'target\\(\"arch=armv8-a-typo\"\\)' is not valid"  "" { target *-*-* } .-3 } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c
index 70096f89e0b..1cc2c0591dc 100644
--- a/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_2.c
@@ -4,6 +4,6 @@ __attribute__((target ("cpu=cortex-a57-typo"))) void
 foo ()
 {
   /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57'?"  "" { target *-*-* } .-1 } */
-  /* { dg-error "invalid name \\(\"cortex-a57-typo\"\\) in 'target\\(\"cpu=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
+  /* { dg-error "invalid name \\('cortex-a57-typo'\\) in 'target\\(\"cpu=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
   /* { dg-error "pragma or attribute 'target\\(\"cpu=cortex-a57-typo\"\\)' is not valid"  "" { target *-*-* } .-3 } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c
index 20dff2b6e45..9953872b679 100644
--- a/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_3.c
@@ -4,6 +4,6 @@ __attribute__((target ("tune=cortex-a57-typo"))) void
 foo ()
 {
   /* { dg-message "valid arguments are: \[^\n\r]*; did you mean 'cortex-a57'?"  "" { target *-*-* } .-1 } */
-  /* { dg-error "invalid name \\(\"cortex-a57-typo\"\\) in 'target\\(\"tune=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
+  /* { dg-error "invalid name \\('cortex-a57-typo'\\) in 'target\\(\"tune=\"\\)' pragma or attribute"  "" { target *-*-* } .-2 } */
   /* { dg-error "pragma or attribute 'target\\(\"tune=cortex-a57-typo\"\\)' is not valid"  "" { target *-*-* } .-3 } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/spellcheck_7.c b/gcc/testsuite/gcc.target/aarch64/spellcheck_7.c
index 1d31950cb61..1e1cbec057f 100644
--- a/gcc/testsuite/gcc.target/aarch64/spellcheck_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/spellcheck_7.c
@@ -9,4 +9,4 @@ foo ()
 }
 
 /* { dg-error "invalid feature modifier .typo. in .-march=armv8-a\\+typo."  "" { target *-*-* } 0 } */
-/* { dg-message "valid arguments are: \[^\n\r]*;'?"  "" { target *-*-* } 0 } */
+/* { dg-message "valid arguments are: \[^\n\r]*"  "" { target *-*-* } 0 } */


Re: [PATCH v3 07/15] arm: Implement MVE predicates as vectors of booleans

2022-01-21 Thread Andre Vieira (lists) via Gcc-patches

Hi Christophe,

On 13/01/2022 14:56, Christophe Lyon via Gcc-patches wrote:

diff --git a/gcc/config/arm/arm-simd-builtin-types.def 
b/gcc/config/arm/arm-simd-builtin-types.def
index 6ba6f211531..920c2a68e4c 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -51,3 +51,7 @@
ENTRY (Bfloat16x2_t, V2BF, none, 32, bfloat16, 20)
ENTRY (Bfloat16x4_t, V4BF, none, 64, bfloat16, 20)
ENTRY (Bfloat16x8_t, V8BF, none, 128, bfloat16, 20)
+
+  ENTRY (Pred1x16_t, V16BI, unsigned, 16, uint16, 21)
+  ENTRY (Pred2x8_t, V8BI, unsigned, 8, uint16, 21)
+  ENTRY (Pred4x4_t, V4BI, unsigned, 4, uint16, 21)


I'm trying to lower masked loads and when I tried to use the 
arm_simd_types[Pred1x16_t].itype as the mask type I noticed the 
TYPE_SIZE of that is 256, rather than the expected 16. Instead I used 
truth_type_for (arm_simd_types[Uint8x16_t].itype) and that gives me a 
compatible vector of booleans. So the itype for Pred1x16_t seems wrong 
to me.




Re: [PATCH] Reset relations when crossing backedges.

2022-01-21 Thread Richard Biener via Gcc-patches
On Fri, Jan 21, 2022 at 11:30 AM Aldy Hernandez  wrote:
>
> On Fri, Jan 21, 2022 at 10:43 AM Richard Biener
>  wrote:
> >
> > On Fri, Jan 21, 2022 at 9:30 AM Aldy Hernandez via Gcc-patches
> >  wrote:
> > >
> > > As discussed in PR103721, the problem here is that we are crossing a
> > > backedge and causing us to use relations from a previous iteration of a
> > > loop.
> > >
> > > This handles the testcases in both PR103721 and PR104067 which are 
> > > variants
> > > of the same thing.
> > >
> > > Tested on x86-64 Linux with the usual regstrap as well as verifying the
> > > thread count before and after the patch.  The number of threads is
> > > reduced by a miniscule amount.
> > >
> > > I assume we need release manager approval at this point?  OK for trunk?
> >
> > Not for regression fixes.
>
> OK, I've pushed it to fix the P1s.  We can continue refining the
> solution in a follow-up patch.
>
> >
> > Btw, I wonder whether you have to treat irreducible regions in the same
> > way more generally - which edges are marked as backedge there depends
> > on which edge into the region was visited first.  I also wonder how we
>
> Jeff, Andrew??
>
> > I also wonder how we guarantee that all users of the resolve mode have 
> > backedges marked
> > properly?  Also note that CFG manipulation routines in general do not
> > keep backedge markings up-to-date so incremental modification and
> > resolving queries might not mix.
> >
> > It's a bit unfortunate that we can't query the CFG on whether backedges
> > are marked.
>
> Ughh.  The call to mark_dfs_back_edges is currently in the backward
> threader.  Perhaps we could put it in the path_range_query
> constructor?  That way other users of path_range_query can benefit
> (loop_ch for instance, though it doesn't use it in a way that crosses
> backedges so perhaps it's unaffected).  Does that sound reasonable?

Hmm, I'd rather keep the burden on the callers because many already
should have backedges marked.  What you could instead do is
add sth like

  if (flag_checking)
{
   auto_edge_flag saved_dfs_back;
   for-each-edge-in-cfg () set saved_dfs_back flag if dfs_back is
set, clear dfs_back
   mark_dfs_back_edges ();
   for-each-edge-in-cfg () verify the flags are set on the same
edges and clear saved_dfs_back
}

to the path_range_query constructor.  That way we at least notice when passes
do _not_ have the backedges marked properly.

Richard.

> Aldy
>
> >
> > If you're only dealing with non-irreducible regions you can use a
> > dominance query to identify a backedge.  Oh, and irreducible regions
> > are also not marked (but at least CFG manipulation tries to conservatively
> > keep that info up-to-date).
> >
> > > gcc/ChangeLog:
> > >
> > > PR 103721/tree-optimization
> >
> > swapped, it should be PR tree-optimization/103721
> >
> > > * gimple-range-path.cc
> > > (path_range_query::relations_may_be_invalidated): New.
> > > (path_range_query::compute_ranges_in_block): Reset relations if
> > > they may be invalidated.
> > > (path_range_query::maybe_register_phi_relation): Exit if relations
> > > may be invalidated on incoming edge.
> > > (path_range_query::compute_phi_relations): Pass incoming PHI edge
> > > to maybe_register_phi_relation.
> > > * gimple-range-path.h (relations_may_be_invalidated): New.
> > > (maybe_register_phi_relation): Pass edge instead of tree.
> > > * tree-ssa-threadbackward.cc (back_threader::back_threader):
> > > * value-relation.cc (path_oracle::path_oracle): Call
> > > mark_dfs_back_edges.
> > > (path_oracle::register_relation): Add SSA names to m_registered
> > > bitmap.
> > > (path_oracle::reset_path): Clear m_registered bitmap.
> > > * value-relation.h (path_oracle::set_root_oracle): New.
> > >
> > > gcc/testsuite/ChangeLog:
> > >
> > > * gcc.dg/pr103721-2.c: New test.
> > > * gcc.dg/pr103721.c: New test.
> > > ---
> > >  gcc/gimple-range-path.cc  | 48 +++
> > >  gcc/gimple-range-path.h   |  3 +-
> > >  gcc/testsuite/gcc.dg/pr103721-2.c | 28 ++
> > >  gcc/testsuite/gcc.dg/pr103721.c   | 25 
> > >  gcc/tree-ssa-threadbackward.cc|  4 +++
> > >  gcc/value-relation.cc |  4 +--
> > >  gcc/value-relation.h  |  1 +
> > >  7 files changed, 104 insertions(+), 9 deletions(-)
> > >  create mode 100644 gcc/testsuite/gcc.dg/pr103721-2.c
> > >  create mode 100644 gcc/testsuite/gcc.dg/pr103721.c
> > >
> > > diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
> > > index a1bcca0b5fb..3ee4989f4b0 100644
> > > --- a/gcc/gimple-range-path.cc
> > > +++ b/gcc/gimple-range-path.cc
> > > @@ -400,6 +400,19 @@ path_range_query::compute_ranges_in_phis 
> > > (basic_block bb)
> > >bitmap_ior_into (m_has_cache_entry, phi_set);
> > >  }
> > >
> > > +// Return TRUE if relat

[PATCH] tree-optimization/104156 - fix unswitching compare-debug issue

2022-01-21 Thread Richard Biener via Gcc-patches
When hoisting guards the unswitching pass does not properly ignore
debug stmts when looking for uses outside of the loop of defs
produced in the skipped region.  The following rectifies this
by instead collecting them and resetting them after the transform.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

2022-01-21  Richard Biener  

PR tree-optimization/104156
* tree-ssa-loop-unswitch.cc (tree_unswitch_outer_loop):
Collect and reset debug stmts with out-of-loop uses when
hoisting guards.
(find_loop_guard): Adjust.
(empty_bb_without_guard_p): Likewise.  Ignore debug stmts.
(used_outside_loop_p): Push debug uses to a vector of
debug stmts to reset.
(hoist_guard): Adjust -fopt-info category.

* gcc.dg/loop-unswitch-6.c: New testcase.
---
 gcc/testsuite/gcc.dg/loop-unswitch-6.c | 32 +
 gcc/tree-ssa-loop-unswitch.cc  | 50 +-
 2 files changed, 66 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/loop-unswitch-6.c

diff --git a/gcc/testsuite/gcc.dg/loop-unswitch-6.c 
b/gcc/testsuite/gcc.dg/loop-unswitch-6.c
new file mode 100644
index 000..f70b629e054
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/loop-unswitch-6.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -funswitch-loops -g -fcompare-debug 
-fdump-tree-unswitch-details" } */
+
+short a, d;
+int b, c;
+static int e() {
+  int f = -2L, g = 9, h = 0;
+  for (; h < 2; h++)
+if (a <= 5) {
+  g = 0;
+  if (c && a)
+break;
+  if (c - 1)
+goto i;
+}
+  if (b) {
+int *j[] = {&f};
+if (d)
+  for (; f < 9; f++)
+if (g)
+  for (; f; f++)
+;
+  i:
+while (f) {
+  a--;
+  break;
+}
+  }
+}
+int main() { e(); }
+
+/* { dg-final { scan-tree-dump-times "Guard hoisted" 1 "unswitch" } } */
diff --git a/gcc/tree-ssa-loop-unswitch.cc b/gcc/tree-ssa-loop-unswitch.cc
index a405119b58a..2927f308234 100644
--- a/gcc/tree-ssa-loop-unswitch.cc
+++ b/gcc/tree-ssa-loop-unswitch.cc
@@ -79,9 +79,10 @@ static class loop *tree_unswitch_loop (class loop *, 
basic_block, tree);
 static bool tree_unswitch_single_loop (class loop *, int);
 static tree tree_may_unswitch_on (basic_block, class loop *);
 static bool tree_unswitch_outer_loop (class loop *);
-static edge find_loop_guard (class loop *);
-static bool empty_bb_without_guard_p (class loop *, basic_block);
-static bool used_outside_loop_p (class loop *, tree);
+static edge find_loop_guard (class loop *, vec&);
+static bool empty_bb_without_guard_p (class loop *, basic_block,
+ vec&);
+static bool used_outside_loop_p (class loop *, tree, vec&);
 static void hoist_guard (class loop *, edge);
 static bool check_exit_phi (class loop *);
 static tree get_vop_from_header (class loop *);
@@ -536,11 +537,18 @@ tree_unswitch_outer_loop (class loop *loop)
 }
 
   bool changed = false;
-  while ((guard = find_loop_guard (loop)))
+  auto_vec dbg_to_reset;
+  while ((guard = find_loop_guard (loop, dbg_to_reset)))
 {
   if (! changed)
rewrite_virtuals_into_loop_closed_ssa (loop);
   hoist_guard (loop, guard);
+  for (gimple *debug_stmt : dbg_to_reset)
+   {
+ gimple_debug_bind_reset_value (debug_stmt);
+ update_stmt (debug_stmt);
+   }
+  dbg_to_reset.truncate (0);
   changed = true;
 }
   return changed;
@@ -551,7 +559,7 @@ tree_unswitch_outer_loop (class loop *loop)
otherwise returns NULL.  */
 
 static edge
-find_loop_guard (class loop *loop)
+find_loop_guard (class loop *loop, vec &dbg_to_reset)
 {
   basic_block header = loop->header;
   edge guard_edge, te, fe;
@@ -688,7 +696,7 @@ find_loop_guard (class loop *loop)
  guard_edge = NULL;
  goto end;
}
-  if (!empty_bb_without_guard_p (loop, bb))
+  if (!empty_bb_without_guard_p (loop, bb, dbg_to_reset))
{
  if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, loc,
@@ -699,7 +707,7 @@ find_loop_guard (class loop *loop)
 }
 
   if (dump_enabled_p ())
-dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
+dump_printf_loc (MSG_NOTE, loc,
 "suitable to hoist\n");
 end:
   if (body)
@@ -713,10 +721,12 @@ end:
   are noy used outside of the loop.
KNOWN_INVARIANTS is a set of ssa names we know to be invariant, and
PROCESSED is a set of ssa names for that we already tested whether they
-   are invariant or not.  */
+   are invariant or not.  Uses in debug stmts outside of the loop are
+   pushed to DBG_TO_RESET.  */
 
 static bool
-empty_bb_without_guard_p (class loop *loop, basic_block bb)
+empty_bb_without_guard_p (class loop *loop, basic_block bb,
+ vec &dbg_to_reset)
 {
   basic_block exit_bb = single_exit (loop)->src;
   bool may_be_used_outside = (bb == exit_bb
@@ -736,7 +746,7 @@ empty_bb_with

[PATCH] tree-optimization/104152 - add missing check for SLP constant build

2022-01-21 Thread Richard Biener via Gcc-patches
This adds a missing check to verify we can actually build an
invariant vector from components when SLP vectorizing an associatable
chain.

Bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

Richard.

2022-01-21  Richard Biener  

PR tree-optimization/104152
* tree-vect-slp.cc (vect_build_slp_tree_2): Add missing
can_duplicate_and_interleave_p check.

* gcc.dg/vect/pr104152.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr104152.c | 40 
 gcc/tree-vect-slp.cc | 13 +++--
 2 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr104152.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr104152.c 
b/gcc/testsuite/gcc.dg/vect/pr104152.c
new file mode 100644
index 000..3a313c32dc3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr104152.c
@@ -0,0 +1,40 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-Ofast" } */
+/* { dg-additional-options "-march=armv8.2-a+sve" { target aarch64-*-* } } */
+
+#define M_PI 3.14f
+#define NK 24
+#define DIM 3
+
+typedef float rvec_ [3];
+
+static rvec_ v0[NK] = {
+  { 1, 0, 0 },  { 0, 1, 0 },  { 0, 0, 1 },  { 1, 1, 0 },  { 1, -1, 0 },
+  { 1, 0, 1 },  { 1, 0, -1 }, { 0, 1, 1 },  { 0, 1, -1 }, { 1, 1, 1 },
+  { 1, 1, -1 }, { 1, -1, 1 }, { -1, 1, 1 }, { 2, 0, 0 },  { 0, 2, 0 },
+  { 0, 0, 2 },  { 3, 0, 0 },  { 0, 3, 0 },  { 0, 0, 3 },  { 4, 0, 0 },
+  { 0, 4, 0 },  { 0, 0, 4 } };
+
+static inline float iprod__(const rvec_ a, const rvec_ b)
+{
+  return (a[0] * b[0] + a[1] * b[1] + a[2] * b[2]);
+}
+
+int badaboum(rvec_ cm_mol, float **tc)
+{
+  float  sx;
+  intk, d;
+  rvec_  kfac[3];
+
+  for (k = 0; k < DIM; k++)
+for (d = 0; d < DIM; d++)
+  kfac[k][d] = M_PI * v0[k][d] / v0[d][d];
+
+  for (k = 0; k < DIM; k++)
+{
+  sx = __builtin_sinf(iprod__(kfac[k], cm_mol));
+  tc[k][0] += sx * iprod__(v0[k], cm_mol);
+}
+
+  return 0;
+}
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index c3a1681d7c6..5ffb63689f2 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -1926,8 +1926,17 @@ vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node,
  if (dt == vect_constant_def
  || dt == vect_external_def)
{
- /* We can always build those.  Might want to sort last
-or defer building.  */
+ /* Check whether we can build the invariant.  If we can't
+we never will be able to.  */
+ tree type = TREE_TYPE (chains[0][n].op);
+ if (!GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
+ && (TREE_CODE (type) == BOOLEAN_TYPE
+ || !can_duplicate_and_interleave_p (vinfo, group_size,
+ type)))
+   {
+ matches[0] = false;
+ goto out;
+   }
  vec ops;
  ops.create (group_size);
  for (lane = 0; lane < group_size; ++lane)
-- 
2.31.1


Re: [PATCH v6 1/1] [ARM] Add support for TLS register based stack protector canary access

2022-01-21 Thread Ard Biesheuvel via Gcc-patches
On Fri, 21 Jan 2022 at 11:47, Kyrylo Tkachov  wrote:
>
> > -Original Message-
> > From: Gcc-patches  > bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Ard
> > Biesheuvel via Gcc-patches
> > Sent: Wednesday, January 19, 2022 5:44 PM
> > To: linux-harden...@vger.kernel.org
> > Cc: Richard Earnshaw ; Richard Sandiford
> > ; thomas.preudho...@celest.fr; Keith
> > Packard ; gcc-patches@gcc.gnu.org; Kyrylo Tkachov
> > ; Ard Biesheuvel 
> > Subject: [PATCH v6 1/1] [ARM] Add support for TLS register based stack
> > protector canary access
> >
> > Add support for accessing the stack canary value via the TLS register,
> > so that multiple threads running in the same address space can use
> > distinct canary values. This is intended for the Linux kernel running in
> > SMP mode, where processes entering the kernel are essentially threads
> > running the same program concurrently: using a global variable for the
> > canary in that context is problematic because it can never be rotated,
> > and so the OS is forced to use the same value as long as it remains up.
> >
> > Using the TLS register to index the stack canary helps with this, as it
> > allows each CPU to context switch the TLS register along with the rest
> > of the process, permitting each process to use its own value for the
> > stack canary.
> >
> > 2022-01-19 Ard Biesheuvel 
> >
> >   * config/arm/arm-opts.h (enum stack_protector_guard): New
> >   * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem):
> >   New
> >   * config/arm/arm.cc (TARGET_STACK_PROTECT_GUARD): Define
> >   (arm_option_override_internal): Handle and put in error checks
> >   for stack protector guard options.
> >   (arm_option_reconfigure_globals): Likewise
> >   (arm_stack_protect_tls_canary_mem): New
> >   (arm_stack_protect_guard): New
> >   * config/arm/arm.md (stack_protect_set): New
> >   (stack_protect_set_tls): Likewise
> >   (stack_protect_test): Likewise
> >   (stack_protect_test_tls): Likewise
> >   (reload_tp_hard): Likewise
> >   * config/arm/arm.opt (-mstack-protector-guard): New
> >   (-mstack-protector-guard-offset): New.
> >   * doc/invoke.texi: Document new options
> >
> > gcc/testsuite/ChangeLog:
> >
> >   * gcc.target/arm/stack-protector-7.c: New test.
> >   * gcc.target/arm/stack-protector-8.c: New test.
> >
> > Signed-off-by: Ard Biesheuvel 
>
> Thanks. One final bit. Given that you're using the Signed-off-by tag this 
> means that you're contributing this patch under the DCO rules.
> Can you please confirm that you intend to contribute this patch under the 
> rules in https://gcc.gnu.org/dco.html

Yes, I am making this contribution under DCO 1.1 terms.

> If you're happy with that I'll push the patch for you.
> Thanks,
> Kyrill
>

Thanks!


> > ---
> >  gcc/config/arm/arm-opts.h|  6 ++
> >  gcc/config/arm/arm-protos.h  |  2 +
> >  gcc/config/arm/arm.cc| 55 +++
> >  gcc/config/arm/arm.md| 71 +++-
> >  gcc/config/arm/arm.opt   | 22 ++
> >  gcc/doc/invoke.texi  | 11 +++
> >  gcc/testsuite/gcc.target/arm/stack-protector-7.c | 12 
> >  gcc/testsuite/gcc.target/arm/stack-protector-8.c |  7 ++
> >  8 files changed, 184 insertions(+), 2 deletions(-)
> >
> > diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
> > index c50d5e56a181..24d12fafdec8 100644
> > --- a/gcc/config/arm/arm-opts.h
> > +++ b/gcc/config/arm/arm-opts.h
> > @@ -69,4 +69,10 @@ enum arm_tls_type {
> >TLS_GNU,
> >TLS_GNU2
> >  };
> > +
> > +/* Where to get the canary for the stack protector.  */
> > +enum stack_protector_guard {
> > +  SSP_TLSREG,  /* per-thread canary in TLS register */
> > +  SSP_GLOBAL   /* global canary */
> > +};
> >  #endif
> > diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> > index cd55a9f6ca54..881c72c988bd 100644
> > --- a/gcc/config/arm/arm-protos.h
> > +++ b/gcc/config/arm/arm-protos.h
> > @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code,
> > rtx, rtx, rtx, rtx, rtx, rtx);
> >  extern rtx arm_load_tp (rtx);
> >  extern bool arm_coproc_builtin_available (enum unspecv);
> >  extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
> > +extern rtx arm_stack_protect_tls_canary_mem (bool);
> > +
> >
> >  #if defined TREE_CODE
> >  extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
> > diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> > index 7825e364c01e..c192894ff33e 100644
> > --- a/gcc/config/arm/arm.cc
> > +++ b/gcc/config/arm/arm.cc
> > @@ -829,6 +829,9 @@ static const struct attribute_spec
> > arm_attribute_table[] =
> >
> >  #undef TARGET_MD_ASM_ADJUST
> >  #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
> > +
> > +#undef TARGET_STACK_PROTECT_GUARD
> > +#define TAR

RE: [PATCH v6 1/1] [ARM] Add support for TLS register based stack protector canary access

2022-01-21 Thread Kyrylo Tkachov via Gcc-patches
> -Original Message-
> From: Gcc-patches  bounces+kyrylo.tkachov=arm@gcc.gnu.org> On Behalf Of Ard
> Biesheuvel via Gcc-patches
> Sent: Wednesday, January 19, 2022 5:44 PM
> To: linux-harden...@vger.kernel.org
> Cc: Richard Earnshaw ; Richard Sandiford
> ; thomas.preudho...@celest.fr; Keith
> Packard ; gcc-patches@gcc.gnu.org; Kyrylo Tkachov
> ; Ard Biesheuvel 
> Subject: [PATCH v6 1/1] [ARM] Add support for TLS register based stack
> protector canary access
> 
> Add support for accessing the stack canary value via the TLS register,
> so that multiple threads running in the same address space can use
> distinct canary values. This is intended for the Linux kernel running in
> SMP mode, where processes entering the kernel are essentially threads
> running the same program concurrently: using a global variable for the
> canary in that context is problematic because it can never be rotated,
> and so the OS is forced to use the same value as long as it remains up.
> 
> Using the TLS register to index the stack canary helps with this, as it
> allows each CPU to context switch the TLS register along with the rest
> of the process, permitting each process to use its own value for the
> stack canary.
> 
> 2022-01-19 Ard Biesheuvel 
> 
>   * config/arm/arm-opts.h (enum stack_protector_guard): New
>   * config/arm/arm-protos.h (arm_stack_protect_tls_canary_mem):
>   New
>   * config/arm/arm.cc (TARGET_STACK_PROTECT_GUARD): Define
>   (arm_option_override_internal): Handle and put in error checks
>   for stack protector guard options.
>   (arm_option_reconfigure_globals): Likewise
>   (arm_stack_protect_tls_canary_mem): New
>   (arm_stack_protect_guard): New
>   * config/arm/arm.md (stack_protect_set): New
>   (stack_protect_set_tls): Likewise
>   (stack_protect_test): Likewise
>   (stack_protect_test_tls): Likewise
>   (reload_tp_hard): Likewise
>   * config/arm/arm.opt (-mstack-protector-guard): New
>   (-mstack-protector-guard-offset): New.
>   * doc/invoke.texi: Document new options
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/arm/stack-protector-7.c: New test.
>   * gcc.target/arm/stack-protector-8.c: New test.
> 
> Signed-off-by: Ard Biesheuvel 

Thanks. One final bit. Given that you're using the Signed-off-by tag this means 
that you're contributing this patch under the DCO rules.
Can you please confirm that you intend to contribute this patch under the rules 
in https://gcc.gnu.org/dco.html
If you're happy with that I'll push the patch for you.
Thanks,
Kyrill

> ---
>  gcc/config/arm/arm-opts.h|  6 ++
>  gcc/config/arm/arm-protos.h  |  2 +
>  gcc/config/arm/arm.cc| 55 +++
>  gcc/config/arm/arm.md| 71 +++-
>  gcc/config/arm/arm.opt   | 22 ++
>  gcc/doc/invoke.texi  | 11 +++
>  gcc/testsuite/gcc.target/arm/stack-protector-7.c | 12 
>  gcc/testsuite/gcc.target/arm/stack-protector-8.c |  7 ++
>  8 files changed, 184 insertions(+), 2 deletions(-)
> 
> diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
> index c50d5e56a181..24d12fafdec8 100644
> --- a/gcc/config/arm/arm-opts.h
> +++ b/gcc/config/arm/arm-opts.h
> @@ -69,4 +69,10 @@ enum arm_tls_type {
>TLS_GNU,
>TLS_GNU2
>  };
> +
> +/* Where to get the canary for the stack protector.  */
> +enum stack_protector_guard {
> +  SSP_TLSREG,  /* per-thread canary in TLS register */
> +  SSP_GLOBAL   /* global canary */
> +};
>  #endif
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index cd55a9f6ca54..881c72c988bd 100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -195,6 +195,8 @@ extern void arm_split_atomic_op (enum rtx_code,
> rtx, rtx, rtx, rtx, rtx, rtx);
>  extern rtx arm_load_tp (rtx);
>  extern bool arm_coproc_builtin_available (enum unspecv);
>  extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
> +extern rtx arm_stack_protect_tls_canary_mem (bool);
> +
> 
>  #if defined TREE_CODE
>  extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree);
> diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
> index 7825e364c01e..c192894ff33e 100644
> --- a/gcc/config/arm/arm.cc
> +++ b/gcc/config/arm/arm.cc
> @@ -829,6 +829,9 @@ static const struct attribute_spec
> arm_attribute_table[] =
> 
>  #undef TARGET_MD_ASM_ADJUST
>  #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
> +
> +#undef TARGET_STACK_PROTECT_GUARD
> +#define TARGET_STACK_PROTECT_GUARD arm_stack_protect_guard
> 
> 
> 
>  /* Obstack for minipool constant handling.  */
>  static struct obstack minipool_obstack;
> @@ -3176,6 +3179,26 @@ arm_option_override_internal (struct
> gcc_options *opts,
>if (TARGET_THUMB2_P (opts->x_target_flags))
>  opts->x_inline_asm_unified = true;
> 
> +  if (ar

Re: [PATCH] Reset relations when crossing backedges.

2022-01-21 Thread Aldy Hernandez via Gcc-patches
On Fri, Jan 21, 2022 at 10:43 AM Richard Biener
 wrote:
>
> On Fri, Jan 21, 2022 at 9:30 AM Aldy Hernandez via Gcc-patches
>  wrote:
> >
> > As discussed in PR103721, the problem here is that we are crossing a
> > backedge and causing us to use relations from a previous iteration of a
> > loop.
> >
> > This handles the testcases in both PR103721 and PR104067 which are variants
> > of the same thing.
> >
> > Tested on x86-64 Linux with the usual regstrap as well as verifying the
> > thread count before and after the patch.  The number of threads is
> > reduced by a miniscule amount.
> >
> > I assume we need release manager approval at this point?  OK for trunk?
>
> Not for regression fixes.

OK, I've pushed it to fix the P1s.  We can continue refining the
solution in a follow-up patch.

>
> Btw, I wonder whether you have to treat irreducible regions in the same
> way more generally - which edges are marked as backedge there depends
> on which edge into the region was visited first.  I also wonder how we

Jeff, Andrew??

> I also wonder how we guarantee that all users of the resolve mode have 
> backedges marked
> properly?  Also note that CFG manipulation routines in general do not
> keep backedge markings up-to-date so incremental modification and
> resolving queries might not mix.
>
> It's a bit unfortunate that we can't query the CFG on whether backedges
> are marked.

Ughh.  The call to mark_dfs_back_edges is currently in the backward
threader.  Perhaps we could put it in the path_range_query
constructor?  That way other users of path_range_query can benefit
(loop_ch for instance, though it doesn't use it in a way that crosses
backedges so perhaps it's unaffected).  Does that sound reasonable?

Aldy

>
> If you're only dealing with non-irreducible regions you can use a
> dominance query to identify a backedge.  Oh, and irreducible regions
> are also not marked (but at least CFG manipulation tries to conservatively
> keep that info up-to-date).
>
> > gcc/ChangeLog:
> >
> > PR 103721/tree-optimization
>
> swapped, it should be PR tree-optimization/103721
>
> > * gimple-range-path.cc
> > (path_range_query::relations_may_be_invalidated): New.
> > (path_range_query::compute_ranges_in_block): Reset relations if
> > they may be invalidated.
> > (path_range_query::maybe_register_phi_relation): Exit if relations
> > may be invalidated on incoming edge.
> > (path_range_query::compute_phi_relations): Pass incoming PHI edge
> > to maybe_register_phi_relation.
> > * gimple-range-path.h (relations_may_be_invalidated): New.
> > (maybe_register_phi_relation): Pass edge instead of tree.
> > * tree-ssa-threadbackward.cc (back_threader::back_threader):
> > * value-relation.cc (path_oracle::path_oracle): Call
> > mark_dfs_back_edges.
> > (path_oracle::register_relation): Add SSA names to m_registered
> > bitmap.
> > (path_oracle::reset_path): Clear m_registered bitmap.
> > * value-relation.h (path_oracle::set_root_oracle): New.
> >
> > gcc/testsuite/ChangeLog:
> >
> > * gcc.dg/pr103721-2.c: New test.
> > * gcc.dg/pr103721.c: New test.
> > ---
> >  gcc/gimple-range-path.cc  | 48 +++
> >  gcc/gimple-range-path.h   |  3 +-
> >  gcc/testsuite/gcc.dg/pr103721-2.c | 28 ++
> >  gcc/testsuite/gcc.dg/pr103721.c   | 25 
> >  gcc/tree-ssa-threadbackward.cc|  4 +++
> >  gcc/value-relation.cc |  4 +--
> >  gcc/value-relation.h  |  1 +
> >  7 files changed, 104 insertions(+), 9 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.dg/pr103721-2.c
> >  create mode 100644 gcc/testsuite/gcc.dg/pr103721.c
> >
> > diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
> > index a1bcca0b5fb..3ee4989f4b0 100644
> > --- a/gcc/gimple-range-path.cc
> > +++ b/gcc/gimple-range-path.cc
> > @@ -400,6 +400,19 @@ path_range_query::compute_ranges_in_phis (basic_block 
> > bb)
> >bitmap_ior_into (m_has_cache_entry, phi_set);
> >  }
> >
> > +// Return TRUE if relations may be invalidated after crossing edge E.
> > +
> > +bool
> > +path_range_query::relations_may_be_invalidated (edge e)
> > +{
> > +  // As soon as the path crosses a back edge, we can encounter
> > +  // definitions of SSA_NAMEs that may have had a use in the path
> > +  // already, so this will then be a new definition.  The relation
> > +  // code is all designed around seeing things in dominator order, and
> > +  // crossing a back edge in the path violates this assumption.
> > +  return (e->flags & EDGE_DFS_BACK);
> > +}
> > +
> >  // Compute ranges defined in the current block, or exported to the
> >  // next block.
> >
> > @@ -440,6 +453,22 @@ path_range_query::compute_ranges_in_block (basic_block 
> > bb)
> >// Solve imports that are exported to the next block.
> >basic_block next = next_bb ();
> >

Re: [PATCH] Update the type of control.base after changed

2022-01-21 Thread Richard Biener via Gcc-patches
On Fri, 21 Jan 2022, Jiufu Guo wrote:

> Hi,
> 
> This patch correct the type of niter->control.base, when it is updated
> as a PLUS expr.
> During build PLUS expr, the result type should align with the type of
> the operands.
> 
> Bootstrap and regtest pass on ppc64/ppc64le and x86.
> Is this ok for trunk?

OK.

Thanks,
Richard.

> BR,
> Jiufu
> 
> 
>   PR tree-optimization/102087
> 
> gcc/ChangeLog:
> 
>   * tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
>   Correct PLUS result type.
> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/pr102087_1.c: New test.
> 
> ---
>  gcc/tree-ssa-loop-niter.c | 17 +++--
>  gcc/testsuite/gcc.dg/pr102087_1.c | 13 +
>  2 files changed, 28 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr102087_1.c
> 
> diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
> index b767056aeb0..21cc257c91b 100644
> --- a/gcc/tree-ssa-loop-niter.c
> +++ b/gcc/tree-ssa-loop-niter.c
> @@ -1579,8 +1579,21 @@ number_of_iterations_until_wrap (class loop *loop, 
> tree type, affine_iv *iv0,
>   { IVbase - STEP, +, STEP } != bound
>   Here, biasing IVbase by 1 step makes 'bound' be the value before wrap.
>   */
> -  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
> -  niter->control.base, niter->control.step);
> +  tree base_type = TREE_TYPE (niter->control.base);
> +  if (POINTER_TYPE_P (base_type))
> +{
> +  tree utype = unsigned_type_for (base_type);
> +  niter->control.base
> + = fold_build2 (MINUS_EXPR, utype,
> +fold_convert (utype, niter->control.base),
> +fold_convert (utype, niter->control.step));
> +  niter->control.base = fold_convert (base_type, niter->control.base);
> +}
> +  else
> +niter->control.base
> +  = fold_build2 (MINUS_EXPR, base_type, niter->control.base,
> +  niter->control.step);
> +
>span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
> fold_convert (niter_type, niter->control.step));
>niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
> diff --git a/gcc/testsuite/gcc.dg/pr102087_1.c 
> b/gcc/testsuite/gcc.dg/pr102087_1.c
> new file mode 100644
> index 000..ba4efe3b412
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/pr102087_1.c
> @@ -0,0 +1,13 @@
> +/* PR tree-optimization/102087 */
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -fprefetch-loop-arrays -w" { target x86_64-*-* 
> powerpc*-*-* } } */
> +
> +char **Gif_ClipImage_gfi_0;
> +int Gif_ClipImage_gfi_1, Gif_ClipImage_y, Gif_ClipImage_shift;
> +void Gif_ClipImage() {
> +  Gif_ClipImage_y = Gif_ClipImage_gfi_1 - 1;
> +  for (; Gif_ClipImage_y >= Gif_ClipImage_shift; Gif_ClipImage_y++)
> +Gif_ClipImage_gfi_0[Gif_ClipImage_shift] =
> +Gif_ClipImage_gfi_0[Gif_ClipImage_y];
> +}
> +
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Ivo Totev; HRB 36809 (AG Nuernberg)


Re: [PATCH] Reset relations when crossing backedges.

2022-01-21 Thread Richard Biener via Gcc-patches
On Fri, Jan 21, 2022 at 9:30 AM Aldy Hernandez via Gcc-patches
 wrote:
>
> As discussed in PR103721, the problem here is that we are crossing a
> backedge and causing us to use relations from a previous iteration of a
> loop.
>
> This handles the testcases in both PR103721 and PR104067 which are variants
> of the same thing.
>
> Tested on x86-64 Linux with the usual regstrap as well as verifying the
> thread count before and after the patch.  The number of threads is
> reduced by a miniscule amount.
>
> I assume we need release manager approval at this point?  OK for trunk?

Not for regression fixes.

Btw, I wonder whether you have to treat irreducible regions in the same
way more generally - which edges are marked as backedge there depends
on which edge into the region was visited first.  I also wonder how we
guarantee that all users of the resolve mode have backedges marked
properly?  Also note that CFG manipulation routines in general do not
keep backedge markings up-to-date so incremental modification and
resolving queries might not mix.

It's a bit unfortunate that we can't query the CFG on whether backedges
are marked.

If you're only dealing with non-irreducible regions you can use a
dominance query to identify a backedge.  Oh, and irreducible regions
are also not marked (but at least CFG manipulation tries to conservatively
keep that info up-to-date).

> gcc/ChangeLog:
>
> PR 103721/tree-optimization

swapped, it should be PR tree-optimization/103721

> * gimple-range-path.cc
> (path_range_query::relations_may_be_invalidated): New.
> (path_range_query::compute_ranges_in_block): Reset relations if
> they may be invalidated.
> (path_range_query::maybe_register_phi_relation): Exit if relations
> may be invalidated on incoming edge.
> (path_range_query::compute_phi_relations): Pass incoming PHI edge
> to maybe_register_phi_relation.
> * gimple-range-path.h (relations_may_be_invalidated): New.
> (maybe_register_phi_relation): Pass edge instead of tree.
> * tree-ssa-threadbackward.cc (back_threader::back_threader):
> * value-relation.cc (path_oracle::path_oracle): Call
> mark_dfs_back_edges.
> (path_oracle::register_relation): Add SSA names to m_registered
> bitmap.
> (path_oracle::reset_path): Clear m_registered bitmap.
> * value-relation.h (path_oracle::set_root_oracle): New.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.dg/pr103721-2.c: New test.
> * gcc.dg/pr103721.c: New test.
> ---
>  gcc/gimple-range-path.cc  | 48 +++
>  gcc/gimple-range-path.h   |  3 +-
>  gcc/testsuite/gcc.dg/pr103721-2.c | 28 ++
>  gcc/testsuite/gcc.dg/pr103721.c   | 25 
>  gcc/tree-ssa-threadbackward.cc|  4 +++
>  gcc/value-relation.cc |  4 +--
>  gcc/value-relation.h  |  1 +
>  7 files changed, 104 insertions(+), 9 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/pr103721-2.c
>  create mode 100644 gcc/testsuite/gcc.dg/pr103721.c
>
> diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
> index a1bcca0b5fb..3ee4989f4b0 100644
> --- a/gcc/gimple-range-path.cc
> +++ b/gcc/gimple-range-path.cc
> @@ -400,6 +400,19 @@ path_range_query::compute_ranges_in_phis (basic_block bb)
>bitmap_ior_into (m_has_cache_entry, phi_set);
>  }
>
> +// Return TRUE if relations may be invalidated after crossing edge E.
> +
> +bool
> +path_range_query::relations_may_be_invalidated (edge e)
> +{
> +  // As soon as the path crosses a back edge, we can encounter
> +  // definitions of SSA_NAMEs that may have had a use in the path
> +  // already, so this will then be a new definition.  The relation
> +  // code is all designed around seeing things in dominator order, and
> +  // crossing a back edge in the path violates this assumption.
> +  return (e->flags & EDGE_DFS_BACK);
> +}
> +
>  // Compute ranges defined in the current block, or exported to the
>  // next block.
>
> @@ -440,6 +453,22 @@ path_range_query::compute_ranges_in_block (basic_block 
> bb)
>// Solve imports that are exported to the next block.
>basic_block next = next_bb ();
>edge e = find_edge (bb, next);
> +
> +  if (m_resolve && relations_may_be_invalidated (e))
> +{
> +  if (DEBUG_SOLVER)
> +   fprintf (dump_file,
> +"Resetting relations as they may be invalidated in 
> %d->%d.\n",
> +e->src->index, e->dest->index);
> +
> +  path_oracle *p = get_path_oracle ();
> +  p->reset_path ();
> +  // ?? Instead of nuking the root oracle altogether, we could
> +  // reset the path oracle to search for relations from the top of
> +  // the loop with the root oracle.  Something for future development.
> +  p->set_root_oracle (nullptr);
> +}
> +
>EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
>  {
>tree name = ssa_name (i);
>

[PATCH v3, rs6000] Add a combine pattern for CA minus one [PR95737]

2022-01-21 Thread HAO CHEN GUI via Gcc-patches
Hi,
   This patch adds a combine pattern for "CA minus one". As CA only has two
values (0 or 1), we could convert following pattern
  (sign_extend:DI (plus:SI (reg:SI 98 ca)
(const_int -1 [0x]
to
   (plus:DI (reg:DI 98 ca)
(const_int -1 [0x])))
   With this patch, one unnecessary sign extend is eliminated.

   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-01-20 Haochen Gui 

gcc/
* config/rs6000/rs6000.md (extenddi_ca_minus_one): Define.

gcc/testsuite/
* gcc.target/powerpc/pr95737.c: New.


patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6ecb0bd6142..1d8b212962f 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -2358,6 +2358,19 @@ (define_insn "subf3_carry_in_xx"
   "subfe %0,%0,%0"
   [(set_attr "type" "add")])

+(define_insn_and_split "*extenddi_ca_minus_one"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (sign_extend:DI (plus:SI (reg:SI CA_REGNO)
+(const_int -1]
+  ""
+  "#"
+  ""
+  [(parallel [(set (match_dup 0)
+  (plus:DI (reg:DI CA_REGNO)
+   (const_int -1)))
+ (clobber (reg:DI CA_REGNO))])]
+  ""
+)

 (define_insn "@neg2"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr95737.c 
b/gcc/testsuite/gcc.target/powerpc/pr95737.c
new file mode 100644
index 000..d4d6a4198cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr95737.c
@@ -0,0 +1,10 @@
+/* PR target/95737 */
+/* { dg-do compile { target lp64 } } */
+/* { dg-options "-O2 -mno-isel" } */
+/* { dg-final { scan-assembler-not {\mextsw\M} } } */
+
+
+unsigned long negativeLessThan (unsigned long a, unsigned long b)
+{
+   return -(a < b);
+}


[PATCH] Update the type of control.base after changed

2022-01-21 Thread Jiufu Guo via Gcc-patches
Hi,

This patch correct the type of niter->control.base, when it is updated
as a PLUS expr.
During build PLUS expr, the result type should align with the type of
the operands.

Bootstrap and regtest pass on ppc64/ppc64le and x86.
Is this ok for trunk?

BR,
Jiufu


PR tree-optimization/102087

gcc/ChangeLog:

* tree-ssa-loop-niter.c (number_of_iterations_until_wrap):
Correct PLUS result type.

gcc/testsuite/ChangeLog:

* gcc.dg/pr102087_1.c: New test.

---
 gcc/tree-ssa-loop-niter.c | 17 +++--
 gcc/testsuite/gcc.dg/pr102087_1.c | 13 +
 2 files changed, 28 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr102087_1.c

diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b767056aeb0..21cc257c91b 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -1579,8 +1579,21 @@ number_of_iterations_until_wrap (class loop *loop, tree 
type, affine_iv *iv0,
  { IVbase - STEP, +, STEP } != bound
  Here, biasing IVbase by 1 step makes 'bound' be the value before wrap.
  */
-  niter->control.base = fold_build2 (MINUS_EXPR, niter_type,
-niter->control.base, niter->control.step);
+  tree base_type = TREE_TYPE (niter->control.base);
+  if (POINTER_TYPE_P (base_type))
+{
+  tree utype = unsigned_type_for (base_type);
+  niter->control.base
+   = fold_build2 (MINUS_EXPR, utype,
+  fold_convert (utype, niter->control.base),
+  fold_convert (utype, niter->control.step));
+  niter->control.base = fold_convert (base_type, niter->control.base);
+}
+  else
+niter->control.base
+  = fold_build2 (MINUS_EXPR, base_type, niter->control.base,
+niter->control.step);
+
   span = fold_build2 (MULT_EXPR, niter_type, niter->niter,
  fold_convert (niter_type, niter->control.step));
   niter->bound = fold_build2 (PLUS_EXPR, niter_type, span,
diff --git a/gcc/testsuite/gcc.dg/pr102087_1.c 
b/gcc/testsuite/gcc.dg/pr102087_1.c
new file mode 100644
index 000..ba4efe3b412
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr102087_1.c
@@ -0,0 +1,13 @@
+/* PR tree-optimization/102087 */
+/* { dg-do compile } */
+/* { dg-options "-O3 -fprefetch-loop-arrays -w" { target x86_64-*-* 
powerpc*-*-* } } */
+
+char **Gif_ClipImage_gfi_0;
+int Gif_ClipImage_gfi_1, Gif_ClipImage_y, Gif_ClipImage_shift;
+void Gif_ClipImage() {
+  Gif_ClipImage_y = Gif_ClipImage_gfi_1 - 1;
+  for (; Gif_ClipImage_y >= Gif_ClipImage_shift; Gif_ClipImage_y++)
+Gif_ClipImage_gfi_0[Gif_ClipImage_shift] =
+Gif_ClipImage_gfi_0[Gif_ClipImage_y];
+}
+
-- 
2.25.1



Re: [PATCH] s390: Change costs for load on condition.

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 11:10, Robin Dapp wrote:
> Hi,
> 
> this patch is a follow-up patch to the recent ifcvt changes. It
> increased costs for a load on condition to 6.  This ensures that we
> if-convert sequences of three regular instructions (of cost 4) e.g. a
> compare and two SETs into two loads on condition (of cost 6).  With a
> cost of 5, four-insn sequences (three SETs) would also be if-converted.
> 
> The adjustment to the mov[qi/si]cc expander makes sure we if-convert a
> QImode/bool.  Before, combine would create a paradoxical subreg itself
> but need an additional insn.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/s390.cc (s390_rtx_costs): Increase costs for load
>   on condition.
>   * config/s390/s390.md: Change mov[qi/si]cc expander.

Could you please add two tests for the sequences which are improved here. Just 
to make sure we get
aware once it breaks again.

Patch is ok. Thanks!

Andreas


[PATCH] Reset relations when crossing backedges.

2022-01-21 Thread Aldy Hernandez via Gcc-patches
As discussed in PR103721, the problem here is that we are crossing a
backedge and causing us to use relations from a previous iteration of a
loop.

This handles the testcases in both PR103721 and PR104067 which are variants
of the same thing.

Tested on x86-64 Linux with the usual regstrap as well as verifying the
thread count before and after the patch.  The number of threads is
reduced by a miniscule amount.

I assume we need release manager approval at this point?  OK for trunk?

gcc/ChangeLog:

PR 103721/tree-optimization
* gimple-range-path.cc
(path_range_query::relations_may_be_invalidated): New.
(path_range_query::compute_ranges_in_block): Reset relations if
they may be invalidated.
(path_range_query::maybe_register_phi_relation): Exit if relations
may be invalidated on incoming edge.
(path_range_query::compute_phi_relations): Pass incoming PHI edge
to maybe_register_phi_relation.
* gimple-range-path.h (relations_may_be_invalidated): New.
(maybe_register_phi_relation): Pass edge instead of tree.
* tree-ssa-threadbackward.cc (back_threader::back_threader):
* value-relation.cc (path_oracle::path_oracle): Call
mark_dfs_back_edges.
(path_oracle::register_relation): Add SSA names to m_registered
bitmap.
(path_oracle::reset_path): Clear m_registered bitmap.
* value-relation.h (path_oracle::set_root_oracle): New.

gcc/testsuite/ChangeLog:

* gcc.dg/pr103721-2.c: New test.
* gcc.dg/pr103721.c: New test.
---
 gcc/gimple-range-path.cc  | 48 +++
 gcc/gimple-range-path.h   |  3 +-
 gcc/testsuite/gcc.dg/pr103721-2.c | 28 ++
 gcc/testsuite/gcc.dg/pr103721.c   | 25 
 gcc/tree-ssa-threadbackward.cc|  4 +++
 gcc/value-relation.cc |  4 +--
 gcc/value-relation.h  |  1 +
 7 files changed, 104 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr103721-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr103721.c

diff --git a/gcc/gimple-range-path.cc b/gcc/gimple-range-path.cc
index a1bcca0b5fb..3ee4989f4b0 100644
--- a/gcc/gimple-range-path.cc
+++ b/gcc/gimple-range-path.cc
@@ -400,6 +400,19 @@ path_range_query::compute_ranges_in_phis (basic_block bb)
   bitmap_ior_into (m_has_cache_entry, phi_set);
 }
 
+// Return TRUE if relations may be invalidated after crossing edge E.
+
+bool
+path_range_query::relations_may_be_invalidated (edge e)
+{
+  // As soon as the path crosses a back edge, we can encounter
+  // definitions of SSA_NAMEs that may have had a use in the path
+  // already, so this will then be a new definition.  The relation
+  // code is all designed around seeing things in dominator order, and
+  // crossing a back edge in the path violates this assumption.
+  return (e->flags & EDGE_DFS_BACK);
+}
+
 // Compute ranges defined in the current block, or exported to the
 // next block.
 
@@ -440,6 +453,22 @@ path_range_query::compute_ranges_in_block (basic_block bb)
   // Solve imports that are exported to the next block.
   basic_block next = next_bb ();
   edge e = find_edge (bb, next);
+
+  if (m_resolve && relations_may_be_invalidated (e))
+{
+  if (DEBUG_SOLVER)
+   fprintf (dump_file,
+"Resetting relations as they may be invalidated in %d->%d.\n",
+e->src->index, e->dest->index);
+
+  path_oracle *p = get_path_oracle ();
+  p->reset_path ();
+  // ?? Instead of nuking the root oracle altogether, we could
+  // reset the path oracle to search for relations from the top of
+  // the loop with the root oracle.  Something for future development.
+  p->set_root_oracle (nullptr);
+}
+
   EXECUTE_IF_SET_IN_BITMAP (m_imports, 0, i, bi)
 {
   tree name = ssa_name (i);
@@ -742,9 +771,19 @@ path_range_query::range_of_stmt (irange &r, gimple *stmt, 
tree)
   return true;
 }
 
+// If possible, register the relation on the incoming edge E into PHI.
+
 void
-path_range_query::maybe_register_phi_relation (gphi *phi, tree arg)
+path_range_query::maybe_register_phi_relation (gphi *phi, edge e)
 {
+  tree arg = gimple_phi_arg_def (phi, e->dest_idx);
+
+  if (!gimple_range_ssa_p (arg))
+return;
+
+  if (relations_may_be_invalidated (e))
+return;
+
   basic_block bb = gimple_bb (phi);
   tree result = gimple_phi_result (phi);
 
@@ -754,7 +793,7 @@ path_range_query::maybe_register_phi_relation (gphi *phi, 
tree arg)
 return;
 
   if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "  from bb%d:", bb->index);
+fprintf (dump_file, "maybe_register_phi_relation in bb%d:", bb->index);
 
   get_path_oracle ()->killing_def (result);
   m_oracle->register_relation (entry_bb (), EQ_EXPR, arg, result);
@@ -787,10 +826,7 @@ path_range_query::compute_phi_relations (basic_block bb, 
basic_block prev)
   for (size_t i = 0; i < nargs; ++i)
if 

Re: [PATCH] s390: Split CCSmode into CCSINT and CCSFP

2022-01-21 Thread Andreas Krebbel via Gcc-patches
On 1/20/22 17:13, Robin Dapp wrote:
> Hi,
> 
> this patch splits the CCSmode into an integer and a floating point
> variant.  This allows ifcvt to consider floating point compares which
> would be rejected before because they could not be reversed.
> 
> Bootstrapped and regtested on s390x.
> 
> Is it OK?
> 
> Regards
>  Robin
> 
> --
> 
> gcc/ChangeLog:
> 
>   * config/s390/predicates.md: Add CCSINTmode and CCSFPmode.
>   * config/s390/s390-modes.def (UNORDERED): Likewise.
>   (CC_MODE): Likewise.
>   * config/s390/s390.cc (s390_cc_modes_compatible): Likewise.
>   (s390_match_ccmode_set): Likewise.
>   (s390_select_ccmode): Likewise.
>   (s390_branch_condition_mask): Likewise.
>   (s390_reverse_condition): Likewise.
>   * config/s390/s390.h (REVERSIBLE_CC_MODE): Likewise.
>   * config/s390/s390.md: Likewise.
>   * config/s390/subst.md: Likewise.

> diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
> index 33194d3f3d6..ec47416cc1b 100644
> --- a/gcc/config/s390/predicates.md
> +++ b/gcc/config/s390/predicates.md
> @@ -325,7 +325,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == LTU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == UNGT;

Can we get an UNGT for CCSINTmode here? Shouldn't this be just GT?

>
>  case E_CCSRmode:
> @@ -370,7 +371,8 @@
>  case E_CCURmode:
>return GET_CODE (op) == GEU;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>return GET_CODE (op) == LE;
>
>  case E_CCSRmode:
> diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
> index b419907960e..eafe1e12938 100644
> --- a/gcc/config/s390/s390-modes.def
> +++ b/gcc/config/s390/s390-modes.def
> @@ -48,12 +48,12 @@ CCUR: EQ  GTU  LTU NE 
> (CLGF/R)
>
>  Signed compares
>
> -CCS:  EQ  LT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,
> -   LTDBR, LTDR, LTEBR, 
> LTER,
> +CCSINT: EQLT   GT  UNORDERED  (LTGFR, LTGR, LTR, 
> ICM/Y,

CC3 for signed integer compares should not occur. So perhaps '-' instead of 
UNORDERED?

> CG/R, C/R/Y, CGHI, 
> CHI,
> -   CDB/R, CD/R, CEB/R, 
> CE/R,
> -   ADB/R, AEB/R, SDB/R, 
> SEB/R,
> SRAG, SRA, SRDA)
> +CCSFP:  EQLT   GT  UNORDERED  (CDB/R, CD/R, CEB/R, 
> CE/R,
> +   LTDBR, LTDR, LTEBR, 
> LTER,
> +   ADB/R, AEB/R, SDB/R, 
> SEB/R)
>  CCSR: EQ  GT   LT  UNORDERED  (CGF/R, CH/Y)
>  CCSFPS: EQLT   GT  UNORDERED  (KEB/R, KDB/R, KXBR, 
> KDTR,
>  KXTR, WFK)
...
> @@ -2139,7 +2148,8 @@ s390_branch_condition_mask (rtx code)
>   }
>break;
>
> -case E_CCSmode:
> +case E_CCSINTmode:
> +case E_CCSFPmode:
>  case E_CCSFPSmode:
>switch (GET_CODE (code))
>   {

We will need a new switch statement for CCSINT without all the FP only 
comparison operators.

Andreas


Re: [PATCH v2] Disable -fsplit-stack support on non-glibc targets

2022-01-21 Thread Uros Bizjak via Gcc-patches
On Thu, Jan 20, 2022 at 11:52 PM Richard Sandiford
 wrote:
>
> cc:ing the x86 and s390 maintainers
>
> soeren--- via Gcc-patches  writes:
> > From: Sören Tempel 
> >
> > The -fsplit-stack option requires the pthread_t TCB definition in the
> > libc to provide certain struct fields at specific hardcoded offsets. As
> > far as I know, only glibc provides these fields at the required offsets.
> > Most notably, musl libc does not have these fields. However, since gcc
> > accesses the fields using a fixed offset, this does not cause a
> > compile-time error, but instead results in a silent memory corruption at
> > run-time with musl libc. For example, on s390x libgcc's
> > __stack_split_initialize CTOR will overwrite the cancel field in the
> > pthread_t TCB on musl.
> >
> > The -fsplit-stack option is used within the gcc code base itself by
> > gcc-go (if available). On musl-based systems with split-stack support
> > (i.e. s390x or x86) this causes Go programs compiled with gcc-go to
> > misbehave at run-time.
> >
> > This patch fixes gcc-go on musl by disabling -fsplit-stack in gcc itself
> > since it is not supported on non-glibc targets anyhow. This is achieved
> > by checking if gcc targets a glibc-based system. This check has been
> > added for x86 and s390x, the rs6000 config already checks for
> > TARGET_GLIBC_MAJOR. Other architectures do not have split-stack
> > support. With this patch applied, the gcc-go configure script will
> > detect that -fsplit-stack support is not available and will not use it.
> >
> > See https://www.openwall.com/lists/musl/2012/10/16/12
> >
> > This patch was written under the assumption that glibc is the only libc
> > implementation which supports the required fields at the required
> > offsets in the pthread_t TCB. The patch has been tested on Alpine Linux
> > Edge on the s390x and x86 architectures by bootstrapping Google's Go
> > implementation with gcc-go.
> >
> > Signed-off-by: Sören Tempel 
> >
> > gcc/ChangeLog:
> >
> >   * common/config/s390/s390-common.c (s390_supports_split_stack):
> >   Only support split-stack on glibc targets.
> >   * config/i386/gnu-user-common.h (STACK_CHECK_STATIC_BUILTIN): Ditto.
> >   * config/i386/gnu.h (defined): Ditto.

LGTM for x86 parts.

Thanks,
Uros.

> > ---
> > This version of the patch addresses feedback by Andrew Pinski and uses
> > OPTION_GLIBC as well as opts->x_linux_libc == LIBC_GLIBC to detect glibc
> > targets (instead of relying on TARGET_GLIBC_MAJOR).
> >
> >  gcc/common/config/s390/s390-common.c | 11 +--
> >  gcc/config/i386/gnu-user-common.h|  5 +++--
> >  gcc/config/i386/gnu.h|  6 +-
> >  3 files changed, 17 insertions(+), 5 deletions(-)
>
> Sorry for the slow review.  The patch LGTM bar some minor formatting
> nits below, but target maintainers should have the final say.
>
> > diff --git a/gcc/common/config/s390/s390-common.c 
> > b/gcc/common/config/s390/s390-common.c
> > index b6bc8501742..fc86e0bc5e7 100644
> > --- a/gcc/common/config/s390/s390-common.c
> > +++ b/gcc/common/config/s390/s390-common.c
> > @@ -116,13 +116,20 @@ s390_handle_option (struct gcc_options *opts 
> > ATTRIBUTE_UNUSED,
> >
> >  /* -fsplit-stack uses a field in the TCB, available with glibc-2.23.
> > We don't verify it, since earlier versions just have padding at
> > -   its place, which works just as well.  */
> > +   its place, which works just as well. For other libc implementations
>
> GCC style is to use 2 spaces after a full stop.  Same for the x86 part.
>
> > +   we disable the feature entirely to avoid corrupting the TCB.  */
> >
> >  static bool
> >  s390_supports_split_stack (bool report ATTRIBUTE_UNUSED,
> >  struct gcc_options *opts ATTRIBUTE_UNUSED)
>
> These parameters are no longer unused after the patch, so it'd be good
> to remove the attributes.
>
> >  {
> > -  return true;
> > +  if (opts->x_linux_libc == LIBC_GLIBC) {
> > +return true;
> > +  } else {
> > +if (report)
> > +  error("%<-fsplit-stack%> currently only supported on GNU/Linux");
> > +return false;
> > +  }
>
> Normal GCC formatting would be something like:
>
>   if (opts->x_linux_libc == LIBC_GLIBC)
> return true;
>
>   if (report)
> error ("%<-fsplit-stack%> currently only supported on GNU/Linux");
>   return false;
>
> Sorry for the fussy rules.
>
> Thanks,
> Richard
>
> >  }
> >
> >  #undef TARGET_DEFAULT_TARGET_FLAGS
> > diff --git a/gcc/config/i386/gnu-user-common.h 
> > b/gcc/config/i386/gnu-user-common.h
> > index 00226f5a455..6e13315b5a3 100644
> > --- a/gcc/config/i386/gnu-user-common.h
> > +++ b/gcc/config/i386/gnu-user-common.h
> > @@ -66,7 +66,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #define STACK_CHECK_STATIC_BUILTIN 1
> >
> >  /* We only build the -fsplit-stack support in libgcc if the
> > -   assembler has full support for the CFI directives.  */
> > -#if HAVE_GAS_CFI_PERSONALITY_DIRECTIVE
> > +   assembler has full support for t

Re: [PATCH] Fix alignment of stack slots for overaligned types [PR103500]

2022-01-21 Thread Richard Sandiford via Gcc-patches
Richard Sandiford via Gcc-patches  writes:
> How about instead:
>
> (1) Define a new ASLK_* flag for assign_stack_local_1.
>
> (2) When the flag is set, make:
>
>   if (alignment_in_bits > MAX_SUPPORTED_STACK_ALIGNMENT)
> {
>   alignment_in_bits = MAX_SUPPORTED_STACK_ALIGNMENT;
>   alignment = MAX_SUPPORTED_STACK_ALIGNMENT / BITS_PER_UNIT;
> }
>
> increase the size by (new_align - old_align).

Er, I meant old_align - new_align here :-)

>
> (3) When the flag is set, and the new alignment is smaller than the
> original alignment, call align_dynamic_address before creating
> the MEM, as in your original patch.
>
> There are probably other details, but it looks like that should cope
> with the x86 dynamic stack realignment scheme even with the max-based
> alignment calculation discussed above.
>
> Thanks,
> Richard