Re: [PATCH] [i386]Add missing BMI function to align with clang

2021-12-20 Thread Uros Bizjak via Gcc-patches
On Tue, Dec 21, 2021 at 6:22 AM Haochen Jiang  wrote:
>
> Hi all,
>
> This patch adds missing BMI function _tzcnt_u16, _andn_u32, _andn_u64 to 
> align with clang.
>
> Regtested on x86_64-pc-linux-gnu. Ok for trunk?
>
> BRs,
> Haochen
>
> gcc/ChangeLog:
>
> * config/i386/bmiintrin.h (_tzcnt_u16): New define function.
> (_andn_u32): Ditto.
> (_andn_u64): Ditto.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/i386/bmi-1.c: Add new test for new define function.
> * gcc.target/i386/bmi-2.c: Ditto.
> * gcc.target/i386/bmi-3.c: Ditto.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/bmiintrin.h   | 18 ++
>  gcc/testsuite/gcc.target/i386/bmi-1.c |  8 +++-
>  gcc/testsuite/gcc.target/i386/bmi-2.c |  8 +++-
>  gcc/testsuite/gcc.target/i386/bmi-3.c |  8 +++-
>  4 files changed, 39 insertions(+), 3 deletions(-)
>
> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
> index 439d81cba11..92450a644eb 100644
> --- a/gcc/config/i386/bmiintrin.h
> +++ b/gcc/config/i386/bmiintrin.h
> @@ -40,12 +40,24 @@ __tzcnt_u16 (unsigned short __X)
>return __builtin_ia32_tzcnt_u16 (__X);
>  }
>
> +extern __inline unsigned short __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
> +_tzcnt_u16 (unsigned short __X)
> +{
> +  return __builtin_ia32_tzcnt_u16 (__X);
> +}
> +
>  extern __inline unsigned int __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
>  __andn_u32 (unsigned int __X, unsigned int __Y)
>  {
>return ~__X & __Y;
>  }
>
> +extern __inline unsigned int __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
> +_andn_u32 (unsigned int __X, unsigned int __Y)
> +{
> +  return __andn_u32 (__X, __Y);
> +}
> +
>  extern __inline unsigned int __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
>  __bextr_u32 (unsigned int __X, unsigned int __Y)
>  {
> @@ -114,6 +126,12 @@ __andn_u64 (unsigned long long __X, unsigned long long 
> __Y)
>return ~__X & __Y;
>  }
>
> +extern __inline unsigned long long __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
> +_andn_u64 (unsigned long long __X, unsigned long long __Y)
> +{
> +  return __andn_u64 (__X, __Y);
> +}
> +
>  extern __inline unsigned long long __attribute__((__gnu_inline__, 
> __always_inline__, __artificial__))
>  __bextr_u64 (unsigned long long __X, unsigned long long __Y)
>  {
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c 
> b/gcc/testsuite/gcc.target/i386/bmi-1.c
> index 738705e29d8..141adaac016 100644
> --- a/gcc/testsuite/gcc.target/i386/bmi-1.c
> +++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
> -/* { dg-final { scan-assembler "andn\[^\\n]*eax" } } */
> +/* { dg-final { scan-assembler-times "andn\[^\\n]*eax" 2 } } */
>  /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*eax" 2 } } */
>  /* { dg-final { scan-assembler-times "blsi\[^\\n]*eax" 2 } } */
>  /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*eax" 2 } } */
> @@ -15,6 +15,12 @@ func_andn32 (unsigned int X, unsigned int Y)
>return __andn_u32(X, Y);
>  }
>
> +unsigned int
> +func_andn32_2 (unsigned int X, unsigned int Y)
> +{
> +  return _andn_u32(X, Y);
> +}
> +
>  unsigned int
>  func_bextr32 (unsigned int X, unsigned int Y)
>  {
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c 
> b/gcc/testsuite/gcc.target/i386/bmi-2.c
> index 6b8595eb9e1..3f9052a4991 100644
> --- a/gcc/testsuite/gcc.target/i386/bmi-2.c
> +++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile { target { ! ia32  } } } */
>  /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
> -/* { dg-final { scan-assembler "andn\[^\\n]*rax" } } */
> +/* { dg-final { scan-assembler-times "andn\[^\\n]*rax" 2 } } */
>  /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*rax" 2 } } */
>  /* { dg-final { scan-assembler-times "blsi\[^\\n]*rax" 2 } } */
>  /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*rax" 2 } } */
> @@ -15,6 +15,12 @@ func_andn64 (unsigned long long X, unsigned long long Y)
>return __andn_u64 (X, Y);
>  }
>
> +unsigned long long
> +func_andn64_2 (unsigned long long X, unsigned long long Y)
> +{
> +  return _andn_u64 (X, Y);
> +}
> +
>  unsigned long long
>  func_bextr64 (unsigned long long X, unsigned long long Y)
>  {
> diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c 
> b/gcc/testsuite/gcc.target/i386/bmi-3.c
> index ddc5e0f66e2..0b91bc25bf8 100644
> --- a/gcc/testsuite/gcc.target/i386/bmi-3.c
> +++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
> @@ -1,6 +1,6 @@
>  /* { dg-do compile } */
>  /* { dg-options "-O2 -mbmi " } */
> -/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
> +/* { dg-final { scan-assembler-times "tzcntw\[^\\n]*%?ax" 2 } } */
>
>  #include 
>
> @@ -9,3 +9,9 @@ func_tzcnt16 (unsigned short X)
>  {
>return __tzcnt_u16(X);
>  }
> +
> +unsigned short
> +func_tzcnt16_2 

Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-20 Thread Daniel Krügler via Gcc-patches
Am Di., 21. Dez. 2021 um 07:08 Uhr schrieb François Dumont via
Libstdc++ :
>
> Hi
>
>  Is there a chance for this patch to be integrated for next gcc
> release ?
>
> François
>

No counterargument for the acceptance, but: Shouldn't
__small_size_threshold() be a noexcept function?

- Daniel


[PATCH] [i386] Add define_insn_and_split for vpcmp{b, w, d, q} vpcmp{ph, ps, pd}.

2021-12-20 Thread liuhongt via Gcc-patches
The purpose of those define_insn_and_split:
1. Combine vpcmpuw and zero_extend into vpcmpuw.
2. Canonicalize vpcmpuw pattern so CSE can replace duplicate vpcmpuw to just 
kmov
3. Use DImode as dest of zero_extend so cprop_hardreg can eliminate redundant 
kmov.

It should partially fix the issue in PR.
Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ready to push to trunk.

gcc/ChangeLog:

PR target/103750
* config/i386/sse.md
(*_cmp3_zero_extend):
New define_insn_and_split.
(*_cmp3): Ditto.
(*_cmp3_zero_extenddi): New define_insn.
(*_cmp3_zero_extend):
New define_insn_and_split.
(*_ucmp3_zero_extend):
Ditto.
(*_ucmp3): Ditto.
(*_ucmp3_zero_extenddi): New define_insn.
(*_ucmp3_zero_extend):
New define_insn_and_split.

gcc/testsuite/ChangeLog:

* gcc.target/i386/bitwise_mask_op-3.c: Adjust test/
* g++.target/i386/pr103750-1.C: New test.
---
 gcc/config/i386/sse.md| 267 ++
 gcc/testsuite/g++.target/i386/pr103750-1.C|  50 
 .../gcc.target/i386/bitwise_mask_op-3.c   |   6 +-
 3 files changed, 320 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-1.C

diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..fb885d58272 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -3702,6 +3702,75 @@ (define_insn 
"_cmp3"
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+;; Those Splitters are used to canonicalize vpcmpuw pattern, so that CSE can 
transfrom
+;; duplicated vpcmpuw to vpcmpuw and kmov
+;; Choose biggest mode(DImode) as dest, so kmov can be optimized by 
cprop_hardreg.
+(define_insn_and_split 
"*_cmp3_zero_extend"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+   (zero_extend:SWI248x
+ (unspec:
+   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (mode)
+   < GET_MODE_PRECISION (mode))"
+  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& mode != E_DImode"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+   UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn_and_split "*_cmp3"
+  [(set (match_operand: 0 "register_operand" "=k")
+   (unspec:
+ [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+  (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+  (match_operand:SI 3 "" "n")]
+ UNSPEC_PCMP))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (mode) < 64"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+   UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
+(define_insn "*_cmp3_zero_extenddi"
+  [(set (match_operand:DI 0 "register_operand" "=k")
+   (zero_extend:DI
+ (unspec:
+   [(match_operand:V48H_AVX512VL 1 "register_operand" "v")
+(match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && GET_MODE_NUNITS (mode) < 64"
+  "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "")])
+
 (define_insn_and_split "*_cmp3"
   [(set (match_operand: 0 "register_operand")
(not:
@@ -3735,6 +3804,72 @@ (define_insn 
"_cmp3"
(set_attr "prefix" "evex")
(set_attr "mode" "")])
 
+(define_insn_and_split 
"*_cmp3_zero_extend"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
+   (zero_extend:SWI248x
+ (unspec:
+   [(match_operand:VI12_AVX512VL 1 "register_operand" "v")
+(match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+(match_operand:SI 3 "" "n")]
+   UNSPEC_PCMP)))]
+  "TARGET_AVX512BW
+   && (GET_MODE_NUNITS (mode)
+   < GET_MODE_PRECISION (mode))"
+  "vpcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  "&& mode != E_DImode"
+  [(set (match_dup 0)
+   (zero_extend:DI
+ (unspec:
+   [(match_dup 1)
+(match_dup 2)
+(match_dup 3)]
+  UNSPEC_PCMP)))]
+  "operands[0] = lowpart_subreg (DImode, operands[0], mode);"
+  [(set_attr "type" "ssecmp")
+   (set_attr 

Re: [PATCH] elf: Add __libc_get_static_tls_bounds [BZ #16291]

2021-12-20 Thread Florian Weimer via Gcc-patches
* Fāng-ruì Sòng:

>> I *think* you can get what you need via existing GLIBC_PRIVATE
>> interfaces.  But in order to describe how to caox the data out of glibc,
>> I need to know what you need.
>
> Unfortunate no, not reliably. Currently _dl_get_tls_static_info
> (https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L207)
> is used. It has the size information but not the start address, so the
> sanitizer runtime is doing very ugly/error-prone probing of the start
> address by reading the thread pointer and hard coding the `struct
> pthread` size for various glibc versions.
> Every time `struct pthreads` increases in glibc, sanitizer runtime has to 
> adapt.
>
> __libc_get_static_tls_bounds if supported by glibc, can replace
> _dl_get_tls_static_info and the existing glibc specific GetTls hacks,
> and provide a bit more compatibility when glibc struct pthreads
> increases.

We already export the size of struct pthread, though, as a GLIBC_PRIVATE
symbol.

  const unsigned int *psize = dlvsym("_thread_db_sizeof_pthread",
 "GLIBC_PRIVATE");
  if (psize != nullptr) {
val = *psize;
atomic_store_relaxed(_descriptor_size, val);
return val;
  }

in ThreadDescriptorSize() in
compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp should work
with any further glibc changes today.  It definitely beats hard-coding
the architecture-specific struct pthread size.

Using _thread_db_sizeof_pthread does not remove all magic constants from
the code, and there is of course the _dl_get_tls_static_info symbol
usage.

This is a fairly recent change (glibc 2.34, I believe) that was
introduced to improve thread debugging without debuginfo present in the
glibc objects (which is why it's a GLIBC_PRIVATE symbol).  libthread_db
is bundled with glibc, so the _thread_db_* symbols are not gurantueed to
be stable, but there are no immediate plans to change this interface.

Thanks,
Florian



Re: [PATCH][Hashtable 6/6] PR 68303 small size optimization

2021-12-20 Thread François Dumont via Gcc-patches

Hi

    Is there a chance for this patch to be integrated for next gcc 
release ?


François


On 23/09/21 6:36 am, François Dumont wrote:

Ping ?

On 16/08/21 9:03 pm, François Dumont wrote:

On 17/07/20 2:58 pm, Jonathan Wakely wrote:

On 17/11/19 22:31 +0100, François Dumont wrote:

This is an implementation of PR 68303.

I try to use this idea as much as possible to avoid computation of 
hash codes.


Note that tests are not showing any gain. I guess hash computation 
must be quite bad to get a benefit from it. So I am only activating 
it when hash code is not cached and/or when computation is not fast.


If the tests don't show any benefit, why bother making the change?


I eventually managed to demonstrate this optimization through a 
performance test case.




Does it help the example in the PR?


No, the code attached to the PR just show what the user has done to 
put in place this optim on his side.


What I needed was a slow hash code computation compared to the equal 
operation. I realized that I had to use longer string to achieve this.


Moreover making this optim dependant on 
_Hashtable_traits::__hash_cached was just wrong as we cannot use the 
cached hash code here as the input is a key instance, not a node.


I introduce _Hashtable_hash_traits<_Hash> to offer a single 
customization point as this optim depends highly on the difference 
between a hash code computation and a comparison. Maybe I should put 
it at std namespace scope to ease partial specialization ?


Performance test results before the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
40r   32u    8s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
22r   22u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
36r   36u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st 
insert     404r  244u  156s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: 
find/erase     315r  315u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd 
insert     233r  233u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key 
     299r  298u    0s 2061942208mem    0pf


after the patch:

unordered_small_size.cc      std::unordered_set: 1st insert      
41r   33u    7s 264000112mem    0pf
unordered_small_size.cc      std::unordered_set: find/erase      
24r   25u    1s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd insert      
34r   34u    0s 191999776mem    0pf
unordered_small_size.cc      std::unordered_set: erase key       
25r   25u    0s -191999808mem    0pf
unordered_small_size.cc      std::unordered_set: 1st 
insert     399r  232u  165s -1989936256mem    0pf
unordered_small_size.cc      std::unordered_set: 
find/erase     196r  197u    0s 2061942272mem    0pf
unordered_small_size.cc      std::unordered_set: 2nd 
insert     221r  222u    0s -2061942208mem    0pf
unordered_small_size.cc      std::unordered_set: erase key 
     178r  178u    0s 2061942208mem    0pf


    libstdc++: Optimize operations on small size hashtable [PR 68303]

    When hasher is identified as slow and the number of elements is 
limited in the
    container use a brute-force loop on those elements to look for a 
given key using
    the key_equal functor. For the moment the default threshold below 
which the

    container is considered as small is 20.

    libstdc++-v3/ChangeLog:

    PR libstdc++/68303
    * include/bits/hashtable_policy.h
    (_Hashtable_hash_traits<_Hash>): New.
    (_Hash_code_base<>::_M_hash_code(const 
_Hash_node_value<>&)): New.

    (_Hashtable_base<>::_M_key_equals): New.
    (_Hashtable_base<>::_M_equals): Use latter.
    (_Hashtable_base<>::_M_key_equals_tr): New.
    (_Hashtable_base<>::_M_equals_tr): Use latter.
    * include/bits/hashtable.h
    (_Hashtable<>::__small_size_threshold()): New, use 
_Hashtable_hash_traits.
    (_Hashtable<>::find): Loop through elements to look for 
key if size is lower

    than __small_size_threshold().
    (_Hashtable<>::_M_emplace(true_type, _Args&&...)): Likewise.
    (_Hashtable<>::_M_insert_unique(_Kt&&, _Args&&, const 
_NodeGenerator&)): Likewise.
(_Hashtable<>::_M_compute_hash_code(const_iterator, const 
key_type&)): New.
    (_Hashtable<>::_M_emplace(const_iterator, false_type, 
_Args&&...)): Use latter.

    (_Hashtable<>::_M_find_before_node(const key_type&)): New.
    (_Hashtable<>::_M_erase(true_type, const key_type&)): Use 
latter.
    (_Hashtable<>::_M_erase(false_type, const key_type&)): 
Likewise.
    * src/c++11/hashtable_c++0x.cc: Include 
.

    * testsuite/util/testsuite_performane.h
   

[PATCH] [i386]Add missing BMI function to align with clang

2021-12-20 Thread Haochen Jiang via Gcc-patches
Hi all,

This patch adds missing BMI function _tzcnt_u16, _andn_u32, _andn_u64 to align 
with clang.

Regtested on x86_64-pc-linux-gnu. Ok for trunk?

BRs,
Haochen

gcc/ChangeLog:

* config/i386/bmiintrin.h (_tzcnt_u16): New define function.
(_andn_u32): Ditto.
(_andn_u64): Ditto.

gcc/testsuite/ChangeLog:

* gcc.target/i386/bmi-1.c: Add new test for new define function.
* gcc.target/i386/bmi-2.c: Ditto.
* gcc.target/i386/bmi-3.c: Ditto.
---
 gcc/config/i386/bmiintrin.h   | 18 ++
 gcc/testsuite/gcc.target/i386/bmi-1.c |  8 +++-
 gcc/testsuite/gcc.target/i386/bmi-2.c |  8 +++-
 gcc/testsuite/gcc.target/i386/bmi-3.c |  8 +++-
 4 files changed, 39 insertions(+), 3 deletions(-)

diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 439d81cba11..92450a644eb 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -40,12 +40,24 @@ __tzcnt_u16 (unsigned short __X)
   return __builtin_ia32_tzcnt_u16 (__X);
 }
 
+extern __inline unsigned short __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
+_tzcnt_u16 (unsigned short __X)
+{
+  return __builtin_ia32_tzcnt_u16 (__X);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 __andn_u32 (unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
+_andn_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __andn_u32 (__X, __Y);
+}
+
 extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, 
__artificial__))
 __bextr_u32 (unsigned int __X, unsigned int __Y)
 {
@@ -114,6 +126,12 @@ __andn_u64 (unsigned long long __X, unsigned long long __Y)
   return ~__X & __Y;
 }
 
+extern __inline unsigned long long __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
+_andn_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __andn_u64 (__X, __Y);
+}
+
 extern __inline unsigned long long __attribute__((__gnu_inline__, 
__always_inline__, __artificial__))
 __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-1.c 
b/gcc/testsuite/gcc.target/i386/bmi-1.c
index 738705e29d8..141adaac016 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-1.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-1.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*eax" } } */
+/* { dg-final { scan-assembler-times "andn\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "blsi\[^\\n]*eax" 2 } } */
 /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*eax" 2 } } */
@@ -15,6 +15,12 @@ func_andn32 (unsigned int X, unsigned int Y)
   return __andn_u32(X, Y);
 }
 
+unsigned int
+func_andn32_2 (unsigned int X, unsigned int Y)
+{
+  return _andn_u32(X, Y);
+}
+
 unsigned int
 func_bextr32 (unsigned int X, unsigned int Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-2.c 
b/gcc/testsuite/gcc.target/i386/bmi-2.c
index 6b8595eb9e1..3f9052a4991 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-2.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-2.c
@@ -1,6 +1,6 @@
 /* { dg-do compile { target { ! ia32  } } } */
 /* { dg-options "-O2 -fno-ipa-icf -mbmi " } */
-/* { dg-final { scan-assembler "andn\[^\\n]*rax" } } */
+/* { dg-final { scan-assembler-times "andn\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "bextr\[ \\t]+\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "blsi\[^\\n]*rax" 2 } } */
 /* { dg-final { scan-assembler-times "blsmsk\[^\\n]*rax" 2 } } */
@@ -15,6 +15,12 @@ func_andn64 (unsigned long long X, unsigned long long Y)
   return __andn_u64 (X, Y);
 }
 
+unsigned long long
+func_andn64_2 (unsigned long long X, unsigned long long Y)
+{
+  return _andn_u64 (X, Y);
+}
+
 unsigned long long
 func_bextr64 (unsigned long long X, unsigned long long Y)
 {
diff --git a/gcc/testsuite/gcc.target/i386/bmi-3.c 
b/gcc/testsuite/gcc.target/i386/bmi-3.c
index ddc5e0f66e2..0b91bc25bf8 100644
--- a/gcc/testsuite/gcc.target/i386/bmi-3.c
+++ b/gcc/testsuite/gcc.target/i386/bmi-3.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mbmi " } */
-/* { dg-final { scan-assembler "tzcntw\[^\\n]*(%|)ax" } } */
+/* { dg-final { scan-assembler-times "tzcntw\[^\\n]*%?ax" 2 } } */
 
 #include 
 
@@ -9,3 +9,9 @@ func_tzcnt16 (unsigned short X)
 {
   return __tzcnt_u16(X);
 }
+
+unsigned short
+func_tzcnt16_2 (unsigned short X)
+{
+  return _tzcnt_u16(X);
+}
-- 
2.18.1



Re: [PATCH v8 2/2] Don't move cold code out of loop by checking bb count

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/20 15:29, Richard Biener wrote:
> On Wed, Dec 8, 2021 at 7:32 AM Xionghu Luo  wrote:
>>
>>
>>
>> On 2021/12/7 20:17, Richard Biener wrote:
> +  class loop *coldest_loop = coldest_outermost_loop[loop->num];
> +  if (loop_depth (coldest_loop) < loop_depth (outermost_loop))
> +{
> +  class loop *hotter_loop = hotter_than_inner_loop[loop->num];
> +  if (!hotter_loop
> +   || loop_depth (hotter_loop) < loop_depth (outermost_loop))
> + return outermost_loop;
> +
> +  /*  hotter_loop is between OUTERMOST_LOOP and LOOP like:
> + [loop tree root, ..., coldest_loop, ..., outermost_loop, ...,
> + hotter_loop, second_coldest_loop, ..., loop]
> + return second_coldest_loop to be the hoist target.  */
> +  class loop *aloop;
> +  for (aloop = hotter_loop->inner; aloop; aloop = aloop->next)
> + if (flow_loop_nested_p (aloop, loop))
 should be:

 if (aloop == loop || flow_loop_nested_p (aloop, loop))
>>> OK with that fixed.
>>>
>>> Are necessary prerequesites committed to avoid regressions?
>>> I guess we need to keep a watchful eye and eventually revert
>>> (or gate with a --param disabled by default) the new behavior if
>>> severe regressions are discovered.
>>>
>>> Thanks and sorry for the repeated delays.
>>> Richard.
>>>
>>
>> Thanks for your review, I learned quite a lot and gained very useful
>> comments & help through the period :)  There are still 3 patches required
>> to avoid regression or so, I've reorganized them and sent it out.
>>
>> https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586371.html
>>
>>
>> In addition, cooked the patch to add option for disable/enable it.
>> Is it OK to merge it to current patch?
> 
> Hmm, let's go without this flag for now, we can add something like this
> when we see a testcase where that makes sense (and where profile info
> is not wrecked by other bugs).
> 
> Adding a --param would be a no brainer, but for new user-facing options
> we should be more careful.
> 

Thanks.  Committed to r12-6087.


-- 
Thanks,
Xionghu


Re: [PATCH 3/3] Fix loop split incorrect count and probability

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/13 16:57, Xionghu Luo via Gcc-patches wrote:
> 
> 
> On 2021/12/9 07:47, Jeff Law wrote:
>>> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
>>> index 3f6ad046623..33128061aab 100644
>>> --- a/gcc/tree-ssa-loop-split.c
>>> +++ b/gcc/tree-ssa-loop-split.c
>>>
>>> @@ -607,6 +610,38 @@ split_loop (class loop *loop1)
>>>   tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge
>>> (loop1));
>>>   patch_loop_exit (loop1, guard_stmt, guard_next, newend,
>>> initial_true);
>>>   +    update_ssa (TODO_update_ssa);
>>> +
>>> +    /* Proportion first loop's bb counts except those dominated by true
>>> +   branch to avoid drop 1s down.  */
>>> +    basic_block *bbs1, *bbs2;
>>> +    bbs1 = get_loop_body (loop1);
>>> +    unsigned j;
>>> +    for (j = 0; j < loop1->num_nodes; j++)
>>> +  if (bbs1[j] == loop1->latch
>>> +  || !dominated_by_p (CDI_DOMINATORS, bbs1[j], true_edge->dest))
>>> +    bbs1[j]->count
>>> +  = bbs1[j]->count.apply_probability (true_edge->probability);
>>> +    free (bbs1);
>> It looks like there's two copies of this code in this patch, one in
>> split_loop and the other in do_split_loop_on_cond.  Would it make sense
>> to factor it out into its own little function?
>>
>>
>>> +
>>> +    /* Proportion second loop's bb counts except those dominated by
>>> false
>>> +   branch to avoid drop 1s down.  */
>>> +    basic_block bbi_copy = get_bb_copy (false_edge->dest);
>>> +    bbs2 = get_loop_body (loop2);
>>> +    for (j = 0; j < loop2->num_nodes; j++)
>>> +  if (bbs2[j] == loop2->latch
>>> +  || !dominated_by_p (CDI_DOMINATORS, bbs2[j], bbi_copy))
>>> +    bbs2[j]->count = bbs2[j]->count.apply_probability (
>>> +  true_edge->probability.invert ());
>>> +    free (bbs2);
>> Similarly for this block of code.
>>
>> If those can be reasonably factored out into two helper functions to be
>> called from split_loop and do_split_loop_on_cond, then this is OK with
>> the refactoring.
>>
>> jeff
> 
> 
> Thanks for the comments, updated as below.  Will commit this patchset and the
> approved patch for LIM if there are no objections:

This patch is committed to r12-6086.

> 
> 
> [PATCH v2 3/3] Fix loop split incorrect count and probability
> 
> In tree-ssa-loop-split.c, split_loop and split_loop_on_cond does two
> kind of split. split_loop only works for single loop and insert edge at
> exit when split, while split_loop_on_cond is not limited to single loop
> and insert edge at latch when split.  Both split behavior should consider
> loop count and probability update.  For split_loop, loop split condition
> is moved in front of loop1 and loop2; But split_loop_on_cond moves the
> condition between loop1 and loop2, this patch does:
>  1) profile count proportion for both original loop and copied loop
> without dropping down the true branch's count;
>  2) probability update in the two loops and between the two loops.
> 
> Regression tested pass, OK for master?
> 
> Changes diff for split_loop and split_loop_on_cond cases:
> 
> 1) diff base/loop-split.c.151t.lsplit patched/loop-split.c.152t.lsplit
> ...
> [local count: 118111600]:
>_1 = end_6(D) - beg_7(D);
>j_9 = _1 + beg2_8(D);
>if (end_6(D) > beg_7(D))
>  goto ; [89.00%]
>else
>  goto ; [11.00%]
> 
> [local count: 105119324]:
>if (j_9 >= c_11(D))
> -goto ; [100.00%]
> +goto ; [33.00%]
>else
> -goto ; [100.00%]
> +goto ; [67.00%]
> 
> -   [local count: 105119324]:
> +   [local count: 34689377]:
>_27 = end_6(D) + -1;
>_28 = beg_7(D) - end_6(D);
>_29 = j_9 + _28;
>_30 = _29 + 1;
>_31 = MAX_EXPR ;
> 
> -   [local count: 955630225]:
> +   [local count: 315357973]:
># i_18 = PHI 
># j_19 = PHI 
>printf ("a: %d %d\n", i_18, j_19);
>i_13 = i_18 + -1;
>j_14 = j_19 + -1;
>if (j_14 >= _31)
> -goto ; [89.00%]
> +goto ; [29.37%]
>else
> -goto ; [11.00%]
> +goto ; [70.63%]
> 
> -   [local count: 850510901]:
> +   [local count: 280668596]:
>goto ; [100.00%]
> 
> -   [local count: 105119324]:
> +   [local count: 70429947]:
># i_24 = PHI 
># j_25 = PHI 
> 
> [local count: 955630225]:
># i_3 = PHI 
># j_2 = PHI 
>i_22 = i_3 + -1;
>j_23 = j_2 + -1;
>if (beg_7(D) < i_22)
>  goto ; [89.00%]
>else
>  goto ; [11.00%]
> 
> -   [local count: 850510901]:
> +   [local count: 569842305]:
>goto ; [100.00%]
> 
> [local count: 105119324]:
># i_32 = PHI 
># j_33 = PHI 
>if (beg_7(D) < i_32)
>  goto ; [80.00%]
>else
>  goto ; [20.00%]
> 
> [local count: 105119324]:
> 
> [local count: 118111600]:
>return 0;
> 
>  }
> 
> 2) diff base/loop-cond-split-1.c.151t.lsplit  
> patched/loop-cond-split-1.c.151t.lsplit:
> ...
> [local count: 118111600]:
>if (n_7(D) > 0)
>  goto ; [89.00%]
>else
>  goto ; [11.00%]
> 
> [local count: 118111600]:
>return;
> 
> [local 

Re: [PATCH 2/3] Fix incorrect loop exit edge probability [PR103270]

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/16 19:18, Jan Hubicka wrote:
>>>
>>>
>>> ./contrib/analyze_brprob.py ~/workspace/tests/spec2017/dump_file_all
>>> HEURISTICS   BRANCHES  (REL)  BR. HITRATE   
>>>  HITRATE   COVERAGE COVERAGE  (REL)  predict.def  (REL) HOT 
>>> branches (>10%)
>>> noreturn call   1   0.0%  100.00%   
>>> 50.00% /  50.00%  2 2.00   0.0% 100%:1
>>> Fortran zero-sized array3   0.0%   66.67%   
>>> 41.71% /  60.50%362   362.00   0.0% 100%:3
>>> loop iv compare16   0.0%   93.75%   
>>> 98.26% /  98.76% 279847  279.85k   0.0% 93%:4
>>> __builtin_expect   35   0.0%   97.14%   
>>> 78.09% /  78.35%   17079558   17.08M   0.0%
>>> loop guard with recursion  45   0.1%   86.67%   
>>> 85.13% /  85.14% 67224244126.72G   1.3% 74%:4
>>> extra loop exit80   0.1%   58.75%   
>>> 81.49% /  89.21%  438470261  438.47M   0.1% 86%:3
>>> guess loop iv compare 235   0.3%   80.85%   
>>> 52.83% /  73.97%  148558247  148.56M   0.0% 47%:3
>>> negative return   241   0.3%   71.37%   
>>> 25.33% /  92.61%  250402383  250.40M   0.0% 69%:2
>>> loop exit with recursion  315   0.4%   74.60%   
>>> 85.07% /  85.71% 94031368589.40G   1.8% 59%:4
>>> const return  320   0.4%   51.88%   
>>> 90.45% /  95.63%  925341727  925.34M   0.2% 76%:5
>>> indirect call 377   0.5%   51.46%   
>>> 84.72% /  91.14% 21337728482.13G   0.4% 69%:1
>>> polymorphic call  410   0.5%   44.15%   
>>> 31.26% /  79.37% 32726882443.27G   0.6% 53%:2
>>> recursive call506   0.7%   39.53%   
>>> 44.97% /  83.92% 12110368061.21G   0.2% 10%:1
>>> goto  618   0.8%   64.24%   
>>> 65.37% /  83.57%  702446178  702.45M   0.1% 20%:1
>>> null return   800   1.1%   64.62%   
>>> 56.59% /  77.70%  603952067  603.95M   0.1% 28%:2
>>> continue  956   1.3%   63.70%   
>>> 65.65% /  79.97% 37803037993.78G   0.7% 52%:3
>>> loop guard   1177   1.6%   56.33%   
>>> 42.54% /  80.32% 73736014577.37G   1.4% 50%:2
>>> opcode values positive (on trees)2020   2.7%   62.38%   
>>> 64.16% /  84.44%31695571761   31.70G   6.0% 21%:2
>>> loop exit3293   4.4%   76.19%   
>>> 87.18% /  88.35%50377138963   50.38G   9.6% 18%:1
>>> loop iterations  4761   6.3%   99.98%   
>>> 84.27% /  84.27%73463634555   73.46G  13.9%
>>> pointer (on trees)   8076  10.7%   56.23%   
>>> 69.36% /  83.15%1232201   12.32G   2.3%
>>> call11396  15.1%   64.14%   
>>> 74.13% /  89.82%25197949198   25.20G   4.8% 34%:1
>>> opcode values nonequal (on trees)   12237  16.3%   70.70%   
>>> 70.86% /  83.54%36638772333   36.64G   6.9%
>>> guessed loop iterations 16760  22.3%   99.78%   
>>> 91.49% /  91.49%   162952747918  162.95G  30.9%
>>>
>>> HEURISTICS   BRANCHES  (REL)  BR. HITRATE   
>>>  HITRATE   COVERAGE COVERAGE  (REL)  predict.def  (REL) HOT 
>>> branches (>10%)
>>> no prediction   12730  16.9%   39.29%   
>>> 33.32% /  79.93%   121106031835  121.11G  23.0%
>>> first match 25261  33.6%   92.17%   
>>> 88.33% /  88.98%   296652487962  296.65G  56.3%
>>> DS theory   28333  37.7%   63.03%   
>>> 72.05% /  85.00%   109563734005  109.56G  20.8%
>>> combined75232 100.0%   73.17%   
>>> 72.32% /  86.08%   527351738575  527.35G 100.0%
>>>
>>> Loop count: 37870
>>>   avg. # of iter: 8444.77
>>>   median # of iter: 7.00
>>>   avg. (1% cutoff) # of iter: 174.68
>>>   avg. (5% cutoff) # of iter: 55.14
>>>   avg. (10% cutoff) # of iter: 35.21
>>>   avg. (20% cutoff) # of iter: 26.23
>>>   avg. (30% cutoff) # of iter: 21.70
>>
>> This is the output data collected without the patch, as can be seen, no 
>> difference on "extra loop exit".
>> But 

Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/21 09:32, David Edelsohn wrote:
> Explicit clobbers like this help one side of the issue.  For vscr, other
> than the sat bit there is only the nj bit, and we just ignore that :-)
> 
>> This patch is okay.  Thanks for updating the machine description and
>> for cleaning up the formatting.
> x2.  Thanks!


Committed to r12-6084.

-- 
Thanks,
Xionghu


Re: [PATCH] elf: Add __libc_get_static_tls_bounds [BZ #16291]

2021-12-20 Thread Fāng-ruì Sòng via Gcc-patches
On Mon, Nov 29, 2021 at 11:30 AM Florian Weimer  wrote:
>
> * Fāng-ruì Sòng:
>
> > PING^3
>
> I think the core issue with this patch is like this:
>
> * I do not want to commit glibc to a public API that disallows future
>   changes to the way we allocate static TLS.  While static TLS objects
>   cannot move in memory, the extent of the static TLS area (minimum and
>   maximum address) is not fixed by ABI necessities.
>
> * Joseph does not want to add a GLIBC_PRIVATE ABI that is exclusively
>   used externally.
>
> I have tried repeatly to wrap my head around how the sanitizers use the
> static TLS boundary information.  Based on what I can tell, there are
> several applications:
>
> 1. Thead Sanitizer needs to know application-accessible thread-specific
>memory that is carried via the glibc thread (stack) cache from one
>thread to another one, seemingly without synchronization.  Since the
>synchronization happens internally within glibc, without that extra
>knowledge, Thread Sanitizer would report a false positive.  This
>covers only data to which the application has direct access, internal
>access by glibc does not count because it is not going to be
>instrumented.
>
> 2. Address Sanitizer needs TLS boundary information for bounds checking.
>Again this only applies to accesses that can be instrumented, so only
>objects whose addresses leak to application code count.  (Maybe this
>is a fringe use case, though: it seems to apply only to “extern
>__thread int a[];“ and similar declarations, where the declared type
>is not enough.)
>
> 3. Leak Sanitizer needs to find all per-thread pointers pointing into
>the malloc heap, within memory not itself allocated by malloc.  This
>includes the DTV, pthread_getspecific metadata, and perhaps also user
>data set by pthread_getspecific, and of course static TLS variables.
>This goes someone beyond what people would usually consider static
>TLS: the DTV and struct pthread are not really part of static TLS as
>such.  And struct pthread has several members that contain malloc'ed
>pointers.
>
> Is this a complete list of uses?

Perhaps add HWAddressSanitizer along with Address Sanitizer and Memory
Sanitizer along with Thread Sanitizer.
Then I think the list is complete.

> I *think* you can get what you need via existing GLIBC_PRIVATE
> interfaces.  But in order to describe how to caox the data out of glibc,
> I need to know what you need.

Unfortunate no, not reliably. Currently _dl_get_tls_static_info
(https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp#L207)
is used. It has the size information but not the start address, so the
sanitizer runtime is doing very ugly/error-prone probing of the start
address by reading the thread pointer and hard coding the `struct
pthread` size for various glibc versions.
Every time `struct pthreads` increases in glibc, sanitizer runtime has to adapt.

__libc_get_static_tls_bounds if supported by glibc, can replace
_dl_get_tls_static_info and the existing glibc specific GetTls hacks,
and provide a bit more compatibility when glibc struct pthreads
increases.

> (Cc:ing a few people from a recent GCC thread.)
>
> Thanks,
> Florian
>


-- 
宋方睿


Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/21 10:19, Xionghu Luo via Gcc-patches wrote:
> 
> 
> On 2021/12/21 09:32, David Edelsohn wrote:
>> On Mon, Dec 20, 2021 at 6:55 PM Segher Boessenkool
>>  wrote:
>>>
>>> On Mon, Dec 20, 2021 at 11:45:45AM -0500, David Edelsohn wrote:
 On Mon, Dec 20, 2021 at 3:24 AM Xionghu Luo  wrote:
> These four UNSPECS seems could be replaced with native RTL, and why
> "(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
> in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):
>
>   This bit is sticky; that is, once set to 1 it
>   remains set to 1 until it is set to 0 by an
>   mtvscr instruction.
>
> The RTL pattern set it to 0 but final ASM doesn't present it?  And why
> not use Clobber VSCR_REGNO instead?

 The design came from the early implementation of Altivec:

 https://gcc.gnu.org/pipermail/gcc-patches/2002-May/077409.html
> 
> Thanks, I constructed a testcase for this,
> 
> cat vadds.c
> #include 
> #include 
> vector signed int foo1 (vector signed int a, vector signed int c) {
>   vector signed int ret = vec_vaddsws (a, c);
>   union {vector unsigned short v; unsigned short s[8];} u;
>   u.v = vec_mfvscr();
>   printf("foo1: vscr == { %d,%d,%d,%d,%d,%d,%d,%d }\n", u.s[0], u.s[1],
> u.s[2],
>  u.s[3], u.s[4], u.s[5], u.s[6], u.s[7]);
>   return a;
> }
> 
> vector signed int foo2 (vector signed int a, vector signed int c) {
>   vector signed int ret = vec_vaddsws (a, c);
>   union {vector unsigned short v; unsigned short s[8];} u;
>   u.v = vec_mfvscr();
>   printf("foo2: vscr == { %d,%d,%d,%d,%d,%d,%d,%d }\n", u.s[0], u.s[1],
> u.s[2],
>  u.s[3], u.s[4], u.s[5], u.s[6], u.s[7]);
>   return ret;
> }
> 
> int main ()
> {
>   vector signed int a = {0x12345678, 0x22345678, 0x32345678, 0x42345678};
>   vector signed int c = {0x12345678, 0x22345678, 0x32345678, 0x42345678};
>   vector signed int b1 = foo1 (a, c);
>   vector signed int b2 = foo2 (a, c);
>   return b1[0]+b2[0];
> }
> 
> The output is:
> 
> ./a.out
> foo1: vscr == { 0,0,0,0,0,0,0,0 }
> foo2: vscr == { 1,0,0,0,0,0,0,0 }
> 
> 
> So is this expected behavior?  Seems doesn't meet with Aldy's
> description...  (foo1's temp is optimized away so no vaddsws produced)


Just realized that foo1's "ret" is optimized way in gimple already, so
no such RTL and vaddsws generated in foo1, only foo2's vaddsws will set
VSCR explicitly.

> 
> " foo()
> {
>   vector int result;
> 
>   result = vec_adds(blah, blah);
>   __check_for_saturation__
> }
> 
> gcc, will optimize away vec_adds() because the result is a local
> variable unused later.  then when we check the saturation bit in VSCR,
> we get wrong results.
> 
> this patch explains to gcc all about VSCR, and adds it as a global
> register as well."
> 
> 

 If one later checks for saturation (reads VSCR), one needs a
 corresponding SET of the value.  It's set in an architecture-specific
 manner that isn't described to GCC, but it's set, not just clobbered
 and in an undefined state.
>>>
>>> Well.  RTL clobber and set do exactly the same thing, except with
>>> clobber it is not specified *what* value is set.  All bits are set, all
>>> bits are defined.  There is no (direct) way in RTL to say
>>> "undetermined".
>>>
>>> An RTL clobber would work just fine afaics?
>>
>> I don't know about the original intention from Aldy, but if one were
>> looking at an RTL dump and the code used the saturation bit from VSCR,
>> it might be confusing to see a CLOBBER instead of a SET.  The SET
>> documents that VSCR_REGNO is assigned a specific value; GCC doesn't
>> know about the semantics, but it's not some undefined bit pattern.
>> CLOBBER implies a trash value or a value that one will not query
>> later, i.e., one would want to SET the register to a specific value
>> before using it.

Agree that SET is better than CLOBBER.  Thank you!

>>
>>>
 The RTL does not describe that VSCR is set to the value 0.  The
 (const_int 0) is not the value set.  You can think of the (const_int
 0) as a dummy RTL argument to the VSCR UNSPEC.  UNSPEC requires at
 least one argument and the pattern doesn't try to express the
 argument, so it uses a dummy RTL constant.
>>>
>>> Yup.  Traditionally (pc) was used for this.  Nowadays (const_int 0) is
>>> not really more expensive anymore, and many people find it clearer (but
>>> not in this case it seems :-) ).
>>>
 It's part of a PARALLEL
 and the plus or minus already expresses the data dependency of the
 pattern on the input operands.
>>>
>>> But they do not describe any dependency on vscr, or output to it.  This
>>> is the same problem we have with fpscr (most FP insns use some of its
>>> fields, most set some, but there is no way to cleanly express that).
>>
>> It describes that VSCR_REGNO is set, an output. It doesn't describe
>> how it is set nor inform the compiler that the value depends on the
>> input operands in some 

Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread Xionghu Luo via Gcc-patches



On 2021/12/21 09:32, David Edelsohn wrote:
> On Mon, Dec 20, 2021 at 6:55 PM Segher Boessenkool
>  wrote:
>>
>> On Mon, Dec 20, 2021 at 11:45:45AM -0500, David Edelsohn wrote:
>>> On Mon, Dec 20, 2021 at 3:24 AM Xionghu Luo  wrote:
 These four UNSPECS seems could be replaced with native RTL, and why
 "(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
 in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):

   This bit is sticky; that is, once set to 1 it
   remains set to 1 until it is set to 0 by an
   mtvscr instruction.

 The RTL pattern set it to 0 but final ASM doesn't present it?  And why
 not use Clobber VSCR_REGNO instead?
>>>
>>> The design came from the early implementation of Altivec:
>>>
>>> https://gcc.gnu.org/pipermail/gcc-patches/2002-May/077409.html

Thanks, I constructed a testcase for this,

cat vadds.c
#include 
#include 
vector signed int foo1 (vector signed int a, vector signed int c) {
  vector signed int ret = vec_vaddsws (a, c);
  union {vector unsigned short v; unsigned short s[8];} u;
  u.v = vec_mfvscr();
  printf("foo1: vscr == { %d,%d,%d,%d,%d,%d,%d,%d }\n", u.s[0], u.s[1],
u.s[2],
 u.s[3], u.s[4], u.s[5], u.s[6], u.s[7]);
  return a;
}

vector signed int foo2 (vector signed int a, vector signed int c) {
  vector signed int ret = vec_vaddsws (a, c);
  union {vector unsigned short v; unsigned short s[8];} u;
  u.v = vec_mfvscr();
  printf("foo2: vscr == { %d,%d,%d,%d,%d,%d,%d,%d }\n", u.s[0], u.s[1],
u.s[2],
 u.s[3], u.s[4], u.s[5], u.s[6], u.s[7]);
  return ret;
}

int main ()
{
  vector signed int a = {0x12345678, 0x22345678, 0x32345678, 0x42345678};
  vector signed int c = {0x12345678, 0x22345678, 0x32345678, 0x42345678};
  vector signed int b1 = foo1 (a, c);
  vector signed int b2 = foo2 (a, c);
  return b1[0]+b2[0];
}

The output is:

./a.out
foo1: vscr == { 0,0,0,0,0,0,0,0 }
foo2: vscr == { 1,0,0,0,0,0,0,0 }


So is this expected behavior?  Seems doesn't meet with Aldy's
description...  (foo1's temp is optimized away so no vaddsws produced)

" foo()
{
  vector int result;

  result = vec_adds(blah, blah);
  __check_for_saturation__
}

gcc, will optimize away vec_adds() because the result is a local
variable unused later.  then when we check the saturation bit in VSCR,
we get wrong results.

this patch explains to gcc all about VSCR, and adds it as a global
register as well."


>>>
>>> If one later checks for saturation (reads VSCR), one needs a
>>> corresponding SET of the value.  It's set in an architecture-specific
>>> manner that isn't described to GCC, but it's set, not just clobbered
>>> and in an undefined state.
>>
>> Well.  RTL clobber and set do exactly the same thing, except with
>> clobber it is not specified *what* value is set.  All bits are set, all
>> bits are defined.  There is no (direct) way in RTL to say
>> "undetermined".
>>
>> An RTL clobber would work just fine afaics?
> 
> I don't know about the original intention from Aldy, but if one were
> looking at an RTL dump and the code used the saturation bit from VSCR,
> it might be confusing to see a CLOBBER instead of a SET.  The SET
> documents that VSCR_REGNO is assigned a specific value; GCC doesn't
> know about the semantics, but it's not some undefined bit pattern.
> CLOBBER implies a trash value or a value that one will not query
> later, i.e., one would want to SET the register to a specific value
> before using it.
> 
>>
>>> The RTL does not describe that VSCR is set to the value 0.  The
>>> (const_int 0) is not the value set.  You can think of the (const_int
>>> 0) as a dummy RTL argument to the VSCR UNSPEC.  UNSPEC requires at
>>> least one argument and the pattern doesn't try to express the
>>> argument, so it uses a dummy RTL constant.
>>
>> Yup.  Traditionally (pc) was used for this.  Nowadays (const_int 0) is
>> not really more expensive anymore, and many people find it clearer (but
>> not in this case it seems :-) ).
>>
>>> It's part of a PARALLEL
>>> and the plus or minus already expresses the data dependency of the
>>> pattern on the input operands.
>>
>> But they do not describe any dependency on vscr, or output to it.  This
>> is the same problem we have with fpscr (most FP insns use some of its
>> fields, most set some, but there is no way to cleanly express that).
> 
> It describes that VSCR_REGNO is set, an output. It doesn't describe
> how it is set nor inform the compiler that the value depends on the
> input operands in some complicated way unknown to the compiler, but
> the compiler cannot do anything useful with the additional
> information.
> 
>>
>> Explicit clobbers like this help one side of the issue.  For vscr, other
>> than the sat bit there is only the nj bit, and we just ignore that :-)
>>
>>> This patch is okay.  Thanks for updating the machine description and
>>> for cleaning up the formatting.
>>
>> x2.  Thanks!
>>
>>
>> Segher

-- 
Thanks,
Xionghu


Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread David Edelsohn via Gcc-patches
On Mon, Dec 20, 2021 at 6:55 PM Segher Boessenkool
 wrote:
>
> On Mon, Dec 20, 2021 at 11:45:45AM -0500, David Edelsohn wrote:
> > On Mon, Dec 20, 2021 at 3:24 AM Xionghu Luo  wrote:
> > > These four UNSPECS seems could be replaced with native RTL, and why
> > > "(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
> > > in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):
> > >
> > >   This bit is sticky; that is, once set to 1 it
> > >   remains set to 1 until it is set to 0 by an
> > >   mtvscr instruction.
> > >
> > > The RTL pattern set it to 0 but final ASM doesn't present it?  And why
> > > not use Clobber VSCR_REGNO instead?
> >
> > The design came from the early implementation of Altivec:
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2002-May/077409.html
> >
> > If one later checks for saturation (reads VSCR), one needs a
> > corresponding SET of the value.  It's set in an architecture-specific
> > manner that isn't described to GCC, but it's set, not just clobbered
> > and in an undefined state.
>
> Well.  RTL clobber and set do exactly the same thing, except with
> clobber it is not specified *what* value is set.  All bits are set, all
> bits are defined.  There is no (direct) way in RTL to say
> "undetermined".
>
> An RTL clobber would work just fine afaics?

I don't know about the original intention from Aldy, but if one were
looking at an RTL dump and the code used the saturation bit from VSCR,
it might be confusing to see a CLOBBER instead of a SET.  The SET
documents that VSCR_REGNO is assigned a specific value; GCC doesn't
know about the semantics, but it's not some undefined bit pattern.
CLOBBER implies a trash value or a value that one will not query
later, i.e., one would want to SET the register to a specific value
before using it.

>
> > The RTL does not describe that VSCR is set to the value 0.  The
> > (const_int 0) is not the value set.  You can think of the (const_int
> > 0) as a dummy RTL argument to the VSCR UNSPEC.  UNSPEC requires at
> > least one argument and the pattern doesn't try to express the
> > argument, so it uses a dummy RTL constant.
>
> Yup.  Traditionally (pc) was used for this.  Nowadays (const_int 0) is
> not really more expensive anymore, and many people find it clearer (but
> not in this case it seems :-) ).
>
> > It's part of a PARALLEL
> > and the plus or minus already expresses the data dependency of the
> > pattern on the input operands.
>
> But they do not describe any dependency on vscr, or output to it.  This
> is the same problem we have with fpscr (most FP insns use some of its
> fields, most set some, but there is no way to cleanly express that).

It describes that VSCR_REGNO is set, an output. It doesn't describe
how it is set nor inform the compiler that the value depends on the
input operands in some complicated way unknown to the compiler, but
the compiler cannot do anything useful with the additional
information.

>
> Explicit clobbers like this help one side of the issue.  For vscr, other
> than the sat bit there is only the nj bit, and we just ignore that :-)
>
> > This patch is okay.  Thanks for updating the machine description and
> > for cleaning up the formatting.
>
> x2.  Thanks!
>
>
> Segher


Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread Segher Boessenkool
On Mon, Dec 20, 2021 at 11:45:45AM -0500, David Edelsohn wrote:
> On Mon, Dec 20, 2021 at 3:24 AM Xionghu Luo  wrote:
> > These four UNSPECS seems could be replaced with native RTL, and why
> > "(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
> > in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):
> >
> >   This bit is sticky; that is, once set to 1 it
> >   remains set to 1 until it is set to 0 by an
> >   mtvscr instruction.
> >
> > The RTL pattern set it to 0 but final ASM doesn't present it?  And why
> > not use Clobber VSCR_REGNO instead?
> 
> The design came from the early implementation of Altivec:
> 
> https://gcc.gnu.org/pipermail/gcc-patches/2002-May/077409.html
> 
> If one later checks for saturation (reads VSCR), one needs a
> corresponding SET of the value.  It's set in an architecture-specific
> manner that isn't described to GCC, but it's set, not just clobbered
> and in an undefined state.

Well.  RTL clobber and set do exactly the same thing, except with
clobber it is not specified *what* value is set.  All bits are set, all
bits are defined.  There is no (direct) way in RTL to say
"undetermined".

An RTL clobber would work just fine afaics?

> The RTL does not describe that VSCR is set to the value 0.  The
> (const_int 0) is not the value set.  You can think of the (const_int
> 0) as a dummy RTL argument to the VSCR UNSPEC.  UNSPEC requires at
> least one argument and the pattern doesn't try to express the
> argument, so it uses a dummy RTL constant.

Yup.  Traditionally (pc) was used for this.  Nowadays (const_int 0) is
not really more expensive anymore, and many people find it clearer (but
not in this case it seems :-) ).

> It's part of a PARALLEL
> and the plus or minus already expresses the data dependency of the
> pattern on the input operands.

But they do not describe any dependency on vscr, or output to it.  This
is the same problem we have with fpscr (most FP insns use some of its
fields, most set some, but there is no way to cleanly express that).

Explicit clobbers like this help one side of the issue.  For vscr, other
than the sat bit there is only the nj bit, and we just ignore that :-)

> This patch is okay.  Thanks for updating the machine description and
> for cleaning up the formatting.

x2.  Thanks!


Segher


Re: [PATCH, rs6000] Implement mffscrni pattern

2021-12-20 Thread Segher Boessenkool
Hi!

On Mon, Dec 20, 2021 at 01:55:51PM +0800, HAO CHEN GUI wrote:
>   * config/rs6000/rs6000-call.c
>   (rs6000_expand_set_fpscr_rn_builtin): Not copy argument to a reg if
>   it's a constant. The pattern for constant can be recognized now.

(Two spaces after full stop).

> +;; Return 1 if op is an unsigned 2-bit constant integer.
> +(define_predicate "u2bit_cint_operand"
> +  (and (match_code "const_int")
> +   (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))

Simple project for anyone: use IN_RANGE more :-)

> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -177,6 +177,7 @@ (define_c_enum "unspecv"
> UNSPECV_MFFS  ; Move from FPSCR
> UNSPECV_MFFSL ; Move from FPSCR light instruction version
> UNSPECV_MFFSCRN   ; Move from FPSCR float rounding mode
> +   UNSPECV_MFFSCRNI  ; Move from FPSCR float rounding mode with imm

Why can this not just use the existing UNSPECV_MFFSCRN?  That way, an
mffsrcn can automatically morph into an mffscrni if the argument is a
constant integer, etc.

> @@ -6333,9 +6342,15 @@ (define_expand "rs6000_set_fpscr_rn"
>   new rounding mode bits from operands[0][62:63] into FPSCR[62:63].  */
>if (TARGET_P9_MISC)
>  {
> -  rtx src_df = force_reg (DImode, operands[0]);
> -  src_df = simplify_gen_subreg (DFmode, src_df, DImode, 0);
> -  emit_insn (gen_rs6000_mffscrn (tmp_df, src_df));
> +  if (CONST_INT_P (operands[0])
> +   && const_0_to_3_operand (operands[0], VOIDmode))

const_0_to_3_operand already checks that CONST_INT_P is true (so you
do not need to test the latter separately).

> @@ -6357,7 +6372,8 @@ (define_expand "rs6000_set_fpscr_rn"
>rtx tmp_di = gen_reg_rtx (DImode);
> 
>/* Extract new RN mode from operand.  */
> -  emit_insn (gen_anddi3 (tmp_rn, operands[0], GEN_INT (0x3)));
> +  rtx op0 = convert_to_mode (DImode, operands[0], false);
> +  emit_insn (gen_anddi3 (tmp_rn, op0, GEN_INT (0x3)));

Please write 3 as 3.  Not as 0x0003, not as 0x03, not as 0x3.  I realise
you just copied this, but :-)

> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/mffscrni_p9.c
> @@ -0,0 +1,9 @@
> +/* { dg-do compile { target { has_arch_pwr9 } } } */

Don't do that?  Instead, use -mdejagnu-cpu=power9.  This will give more
coverage, and also will make us need less future test maintenance (if on
Power28 we will generate different code for this, for example).

> +void __attribute__ ((noinline)) wrap_set_fpscr_rn (int val)
> +{
> +  __builtin_set_fpscr_rn (val);
> +}

Should be noipa, not just noinline?


Segher


Re: [PATCH v4] c-format: Add -Wformat-int-precision option [PR80060]

2021-12-20 Thread Daniil Stas via Gcc-patches
On Sat, 27 Nov 2021 22:18:23 +
Daniil Stas  wrote:

> This option is enabled by default when -Wformat option is enabled. A
> user can specify -Wno-format-int-precision to disable emitting
> warnings when passing an argument of an incompatible integer type to
> a 'd', 'i', 'b', 'B', 'o', 'u', 'x', or 'X' conversion specifier when
> it has the same precision as the expected type.
> 
> Signed-off-by: Daniil Stas 
> 
> gcc/c-family/ChangeLog:
> 
>   * c-format.c (check_format_types): Don't emit warnings when
>   passing an argument of an incompatible integer type to
>   a 'd', 'i', 'b', 'B', 'o', 'u', 'x', or 'X' conversion
>   specifier when it has the same precision as the expected type
>   if -Wno-format-int-precision option is specified.
>   * c.opt: Add -Wformat-int-precision option.
> 
> gcc/ChangeLog:
> 
>   * doc/invoke.texi: Add -Wformat-int-precision option
> description.
> 
> gcc/testsuite/ChangeLog:
> 
>   * c-c++-common/Wformat-int-precision-1.c: New test.
>   * c-c++-common/Wformat-int-precision-2.c: New test.
> ---
> Changes for v4:
>   - Added 'b' and 'B' format specifiers to the option descriptions.
> 
> Changes for v3:
>   - Added additional @code{} derictives to the documentation where
> needed.
>   - Changed tests to run on "! long_neq_int" target instead of "lp64".
>   - Added a test case to check that gcc still emits warnings for
> arguments with different precision even with
> -Wno-format-int-precision option enabled.
> 
> Changes for v2:
>   - Changed the option name to -Wformat-int-precision.
>   - Changed the option description as was suggested by Martin.
>   - Changed Wformat-int-precision-2.c to use dg-bogus instead of
> previous invalid syntax.
> 
>  gcc/c-family/c-format.c |  2 +-
>  gcc/c-family/c.opt  |  6 ++
>  gcc/doc/invoke.texi | 17
> - .../c-c++-common/Wformat-int-precision-1.c  |
> 7 +++ .../c-c++-common/Wformat-int-precision-2.c  |  8
>  5 files changed, 38 insertions(+), 2 deletions(-)
>  create mode 100644
> gcc/testsuite/c-c++-common/Wformat-int-precision-1.c create mode
> 100644 gcc/testsuite/c-c++-common/Wformat-int-precision-2.c
> 
> diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
> index e735e092043..c66787f931f 100644
> --- a/gcc/c-family/c-format.c
> +++ b/gcc/c-family/c-format.c
> @@ -4248,7 +4248,7 @@ check_format_types (const substring_loc
> _loc, && (!pedantic || i < 2)
> && char_type_flag)
>   continue;
> -  if (types->scalar_identity_flag
> +  if ((types->scalar_identity_flag || !warn_format_int_precision)
> && (TREE_CODE (cur_type) == TREE_CODE (wanted_type)
> || (INTEGRAL_TYPE_P (cur_type)
> && INTEGRAL_TYPE_P (wanted_type)))
> diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
> index 4b8a094b206..d7d952765c6 100644
> --- a/gcc/c-family/c.opt
> +++ b/gcc/c-family/c.opt
> @@ -684,6 +684,12 @@ C ObjC C++ LTO ObjC++ Warning
> Alias(Wformat-overflow=, 1, 0) IntegerRange(0, 2) Warn about function
> calls with format strings that write past the end of the destination
> region.  Same as -Wformat-overflow=1. 
> +Wformat-int-precision
> +C ObjC C++ ObjC++ Var(warn_format_int_precision) Warning
> LangEnabledBy(C ObjC C++ ObjC++,Wformat=,warn_format >= 1, 0) +Warn
> when passing an argument of an incompatible integer type to a 'd',
> 'i', +'b', 'B', 'o', 'u', 'x', or 'X' conversion specifier even when
> it has the same +precision as the expected type. +
>  Wformat-security
>  C ObjC C++ ObjC++ Var(warn_format_security) Warning LangEnabledBy(C
> ObjC C++ ObjC++,Wformat=, warn_format >= 2, 0) Warn about possible
> security problems with format functions. diff --git
> a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index
> 3bddfbaae6a..94a7ad96c50 100644 --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -351,7 +351,7 @@ Objective-C and Objective-C++ Dialects}.
>  -Werror  -Werror=*  -Wexpansion-to-defined  -Wfatal-errors @gol
>  -Wfloat-conversion  -Wfloat-equal  -Wformat  -Wformat=2 @gol
>  -Wno-format-contains-nul  -Wno-format-extra-args  @gol
> --Wformat-nonliteral  -Wformat-overflow=@var{n} @gol
> +-Wformat-nonliteral  -Wformat-overflow=@var{n}
> -Wformat-int-precision @gol -Wformat-security  -Wformat-signedness
> -Wformat-truncation=@var{n} @gol -Wformat-y2k  -Wframe-address @gol
>  -Wframe-larger-than=@var{byte-size}  -Wno-free-nonheap-object @gol
> @@ -6122,6 +6122,21 @@ If @option{-Wformat} is specified, also warn
> if the format string is not a string literal and so cannot be
> checked, unless the format function takes its format arguments as a
> @code{va_list}. 
> +@item -Wformat-int-precision
> +@opindex Wformat-int-precision
> +@opindex Wno-format-int-precision
> +Warn when passing an argument of an incompatible integer type to
> +a @samp{d}, @samp{i}, @samp{b}, @samp{B}, @samp{o}, @samp{u},
> @samp{x}, +or @samp{X} 

[PATCH v2] ix86: Don't use the 'm' constraint for x86_64_general_operand

2021-12-20 Thread H.J. Lu via Gcc-patches
On Mon, Dec 20, 2021 at 12:38 PM Jakub Jelinek  wrote:
>
> On Mon, Dec 20, 2021 at 11:44:08AM -0800, H.J. Lu wrote:
> > The problem is in
> >
> > (define_memory_constraint "TARGET_MEM_CONSTRAINT"
> >   "Matches any valid memory."
> >   (and (match_code "mem")
> >(match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 
> > 0),
> >  MEM_ADDR_SPACE (op))")))
> >
> > define_register_constraint allows LRA to convert the operand to the form
> > '(mem (reg X))', where X is a base register.  I am testing the v2 patch with
>
> If you mean replacing an immediate with a MEM containing that immediate,
> isn't that often the right thing though?
> I mean, if the register pressure is high and options are either spill some
> register, set it to immediate, use it in one instruction and then fill the
> spilled register (i.e. 2 memory loads), compared to a MEM use on the
> arithmetic instruction one MEM seems cheaper to me.  With -fPIC and the
> cst needing runtime relocation slightly less so of course.
>

We will check the performance impact on SPEC CPU 2017.
Here is the v2 patch.  Liwei,  can you help collect SPEC CPU 2017
impact of the enclosed patch?  Thanks.

> The code due to ivopts is trying to have something like
>   size_t a = (size_t) _list;
>   size_t b = 0xffa8 - a;
>   size_t c = x + b;
> and for that cst -  one needs actually 2 registers, one to hold the
> constant and one to hold the (%rip) based address.
> (insn 790 789 791 111 (set (reg:DI 292)
> (const_int -88 [0xffa8])) "dl-tunables.c":304:15 76 
> {*movdi_internal}
>  (nil))
> (insn 791 790 792 111 (set (reg:DI 293)
> (symbol_ref:DI ("tunable_list") [flags 0x2]   tunable_list>)) "dl-tunables.c":304:15 76 {*movdi_internal}
>  (nil))
> (insn 792 791 793 111 (parallel [
> (set (reg:DI 291)
> (minus:DI (reg:DI 292)
> (reg:DI 293)))
> (clobber (reg:CC 17 flags))
> ]) "dl-tunables.c":304:15 299 {*subdi_1}
>  (nil))
> (insn 793 792 794 111 (parallel [
> (set (reg:DI 294)
> (plus:DI (reg:DI 291)
> (reg:DI 198 [ ivtmp.176 ])))
> (clobber (reg:CC 17 flags))
> ]) "dl-tunables.c":304:15 226 {*adddi_1}
>  (nil))
> It would be smarter to rewrite the above into a lea 88+tunable_list(%rip), 
> %temp1
> and use a subtraction instead of addition in the last insn above, or of
> course in the particular case even consider the following 2 instructions
> that do:
> (insn 794 793 795 111 (set (reg:DI 296)
> (symbol_ref:DI ("tunable_list") [flags 0x2]   tunable_list>)) "dl-tunables.c":304:15 76 {*movdi_internal}
>  (nil))
> (insn 795 794 796 111 (parallel [
> (set (reg:DI 295 [ cur ])
> (plus:DI (reg:DI 294)
> (reg:DI 296)))
> (clobber (reg:CC 17 flags))
> ]) "dl-tunables.c":304:15 226 {*adddi_1}
>  (nil))
> and find out that _list - _list is 0 and we don't need it at
> all.  Guess we don't figure that out due to the cast of one of those
> addresses to size_t and the other one used in POINTER_PLUS_EXPR as normal
> pointer.
>
> Jakub
>


--
H.J.
From 45676709be2a39920e0e9326049676e6dfdd7885 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" 
Date: Sun, 19 Dec 2021 08:47:03 -0800
Subject: [PATCH v2] ix86: Don't use the 'm' constraint for
 x86_64_general_operand

The 'm' constraint is defined with define_memory_constraint which allows
LRA to convert the operand to the form '(mem (reg X))', where X is a
base register.  To prevent LRA from generating '(mem (reg X))' from a
register:

1. Add a 'BM' constraint which is similar to the 'm' constraint, but
is defined with define_constraint.
2. Add a 'm' mode attribute which is mapped to the 'm' constraint for
general_operand and the 'BM' constraint for x86_64_general_operand.
3. Replace the 'm' constraint on  with the ''
constraint.
4. Replace the 'm' constraint on x86_64_general_operand with the 'BM'
constraint.

gcc/

	PR target/103762
	* config/i386/constraints.md (BM): New constraint.
	* config/i386/i386.md (m): New mode attribute.
	Replace the 'm' constraint on  with the ''
	constraint.
	Replace the 'm' constraint on x86_64_general_operand with the
	'BM' constraint.

gcc/testsuite/

	* gcc.target/i386/pr103762-1a.c: New test.
	* gcc.target/i386/pr103762-1b.c: Likewise.
	* gcc.target/i386/pr103762-1c.c: Likewise.
---
 gcc/config/i386/constraints.md  |  10 +
 gcc/config/i386/i386.md |  67 +-
 gcc/testsuite/gcc.target/i386/pr103762-1a.c | 647 
 gcc/testsuite/gcc.target/i386/pr103762-1b.c |   7 +
 gcc/testsuite/gcc.target/i386/pr103762-1c.c |   7 +
 5 files changed, 706 insertions(+), 32 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103762-1a.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr103762-1b.c
 create mode 100644 

[PATCH] PR fortran/103777 - ICE in gfc_simplify_maskl, at fortran/simplify.c:4918

2021-12-20 Thread Harald Anlauf via Gcc-patches
Dear all,

we need to check the arguments of the elemental MASKL and MASKR
intrinsics also before simplifying.

Testcase by Gerhard.  The fix is almost obvious, but I'm happy to
get feedback in case there is something I overlooked.  (There is
already a check on scalar arguments to MASKL/MASKR, which however
misses the case of array arguments.)

Regtested on x86_64-pc-linux-gnu.  OK for mainline?

Thanks,
Harald

From b58a44bc861ee3d1e67e3b7c949a301b6290c05c Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Mon, 20 Dec 2021 22:59:53 +0100
Subject: [PATCH] Fortran: check arguments of MASKL/MASKR intrinsics before
 simplification

gcc/fortran/ChangeLog:

	PR fortran/103777
	* simplify.c (gfc_simplify_maskr): Check validity of argument 'I'
	before simplifying.
	(gfc_simplify_maskl): Likewise.

gcc/testsuite/ChangeLog:

	PR fortran/103777
	* gfortran.dg/masklr_3.f90: New test.
---
 gcc/fortran/simplify.c |  6 ++
 gcc/testsuite/gfortran.dg/masklr_3.f90 | 14 ++
 2 files changed, 20 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/masklr_3.f90

diff --git a/gcc/fortran/simplify.c b/gcc/fortran/simplify.c
index 90067b6bbe6..b6f636d4ff1 100644
--- a/gcc/fortran/simplify.c
+++ b/gcc/fortran/simplify.c
@@ -4878,6 +4878,9 @@ gfc_simplify_maskr (gfc_expr *i, gfc_expr *kind_arg)
   bool fail = gfc_extract_int (i, );
   gcc_assert (!fail);

+  if (!gfc_check_mask (i, kind_arg))
+return _bad_expr;
+
   result = gfc_get_constant_expr (BT_INTEGER, kind, >where);

   /* MASKR(n) = 2^n - 1 */
@@ -4909,6 +4912,9 @@ gfc_simplify_maskl (gfc_expr *i, gfc_expr *kind_arg)
   bool fail = gfc_extract_int (i, );
   gcc_assert (!fail);

+  if (!gfc_check_mask (i, kind_arg))
+return _bad_expr;
+
   result = gfc_get_constant_expr (BT_INTEGER, kind, >where);

   /* MASKL(n) = 2^bit_size - 2^(bit_size - n) */
diff --git a/gcc/testsuite/gfortran.dg/masklr_3.f90 b/gcc/testsuite/gfortran.dg/masklr_3.f90
new file mode 100644
index 000..eb689f0f408
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/masklr_3.f90
@@ -0,0 +1,14 @@
+! { dg-do compile }
+! PR fortran/103777 - ICE in gfc_simplify_maskl
+! Contributed by G.Steinmetz
+
+program p
+  print *, maskl([999])   ! { dg-error "must be less than or equal" }
+  print *, maskr([999])   ! { dg-error "must be less than or equal" }
+  print *, maskl([-999])  ! { dg-error "must be nonnegative" }
+  print *, maskr([-999])  ! { dg-error "must be nonnegative" }
+  print *, maskl([32],kind=4)
+  print *, maskl([33],kind=4) ! { dg-error "must be less than or equal" }
+  print *, maskl([64],kind=8)
+  print *, maskl([65],kind=8) ! { dg-error "must be less than or equal" }
+end
--
2.26.2



[PATCH] Register --sysroot in the driver switches table

2021-12-20 Thread Olivier Hainque via Gcc-patches
Hello,

This change adjusts the processing of --sysroot to save the option in the
internal "switches" array, which lets self-specs test for it and provide a
default value possibly dependent on environment variables, as in

  --with-specs=%{!-sysroot*:--sysroot=%:getenv("WIND_BASE" /target)}

This helps the use we have of self specs for VxWorks, and
was bootstrapped and regression tested on native 64bit linux.

Ok to commit ?

Thanks in advance,

With Kind Regards,

Olivier

>From 964829ee06ffe1bedcbab6a3b4c92c5d161aaaed Mon Sep 17 00:00:00 2001
From: Olivier Hainque 
Date: Mon, 20 Dec 2021 17:47:24 +
Subject: [PATCH] Register --sysroot in the driver switches table

This change adjusts the processing of --sysroot to save the option in the
internal "switches" array, which lets self-specs test for it and provide a
default value possibly dependent on environment variables, as in

  --with-specs=%{!-sysroot*:--sysroot=%:getenv("WIND_BASE" /target)}

2021-12-20  Olivier Hainque  

gcc/
	* gcc.c (driver_handle_option): do_save --sysroot.
---
 gcc/gcc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/gcc.c b/gcc/gcc.c
index b75b50b87b2..beb37fe980e 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -4488,7 +4488,9 @@ driver_handle_option (struct gcc_options *opts,
 case OPT__sysroot_:
   target_system_root = arg;
   target_system_root_changed = 1;
-  do_save = false;
+  /* Saving this option is useful to let self-specs decide to
+ provide a default one.  */
+  do_save = true;
   break;
 
 case OPT_time_:
-- 
2.25.1



[PATCH] PR fortran/103778 - [10/11/12 Regression] ICE: Invalid expression in gfc_element_size

2021-12-20 Thread Harald Anlauf via Gcc-patches
Dear all,

again found by Gerhard: using a BOZ literal constant in situations
where an interoperable object is expected can lead to an ICE.
But obviously a BOZ in not interoperable.

Obvious patch, regtested on x86_64-pc-linux-gnu.

Will commit within 48h unless there are objections or better
suggestions.

Thanks,
Harald

From 2e6c83fbddda3215faf111263ebfc754bc07096c Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Mon, 20 Dec 2021 22:12:33 +0100
Subject: [PATCH] Fortran: BOZ literal constants are not interoperable

gcc/fortran/ChangeLog:

	PR fortran/103778
	* check.c (is_c_interoperable): A BOZ literal constant is not
	interoperable.

gcc/testsuite/ChangeLog:

	PR fortran/103778
	* gfortran.dg/illegal_boz_arg_3.f90: New test.
---
 gcc/fortran/check.c | 6 ++
 gcc/testsuite/gfortran.dg/illegal_boz_arg_3.f90 | 7 +++
 2 files changed, 13 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/illegal_boz_arg_3.f90

diff --git a/gcc/fortran/check.c b/gcc/fortran/check.c
index 625473c90d1..b4db9337e9f 100644
--- a/gcc/fortran/check.c
+++ b/gcc/fortran/check.c
@@ -5185,6 +5185,12 @@ is_c_interoperable (gfc_expr *expr, const char **msg, bool c_loc, bool c_f_ptr)
   return false;
 }

+  if (expr->ts.type == BT_BOZ)
+{
+  *msg = "BOZ literal constant";
+  return false;
+}
+
   if (expr->ts.type == BT_CLASS)
 {
   *msg = "Expression is polymorphic";
diff --git a/gcc/testsuite/gfortran.dg/illegal_boz_arg_3.f90 b/gcc/testsuite/gfortran.dg/illegal_boz_arg_3.f90
new file mode 100644
index 000..59fefa90971
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/illegal_boz_arg_3.f90
@@ -0,0 +1,7 @@
+! { dg-do compile }
+! PR fortran/103778
+
+program p
+  use iso_c_binding, only : c_sizeof
+  integer, parameter :: a = c_sizeof(z'1') ! { dg-error "cannot appear" }
+end
--
2.26.2



[PATCH] PR fortran/103776 - ICE in gfc_compare_string, at fortran/arith.c:1118

2021-12-20 Thread Harald Anlauf via Gcc-patches
Dear all,

we need to check that expressions in CASE selectors are scalar,
and reject them early if they aren't.

Testcase by Gerhard.  The fix is obvious and sort of a follow-up
to the fix for PR103591.

Regtested on x86_64-pc-linux-gnu.

Will commit as obvious within 48h unless there are objections or
other comments.

Thanks,
Harald

From 70385de214fe4de289c56e7f4e06a4b252414379 Mon Sep 17 00:00:00 2001
From: Harald Anlauf 
Date: Mon, 20 Dec 2021 22:01:05 +0100
Subject: [PATCH] Fortran: CASE selector expressions must be scalar

gcc/fortran/ChangeLog:

	PR fortran/103776
	* match.c (match_case_selector): Reject expressions in CASE
	selector which are not scalar.

gcc/testsuite/ChangeLog:

	PR fortran/103776
	* gfortran.dg/select_10.f90: New test.
---
 gcc/fortran/match.c | 13 +
 gcc/testsuite/gfortran.dg/select_10.f90 | 25 +
 2 files changed, 38 insertions(+)
 create mode 100644 gcc/testsuite/gfortran.dg/select_10.f90

diff --git a/gcc/fortran/match.c b/gcc/fortran/match.c
index 52bc5af7542..617fb35c9cd 100644
--- a/gcc/fortran/match.c
+++ b/gcc/fortran/match.c
@@ -6088,6 +6088,19 @@ match_case_selector (gfc_case **cp)
 	}
 }

+  if (c->low && c->low->rank != 0)
+{
+  gfc_error ("Expression in CASE selector at %L must be scalar",
+		 >low->where);
+  goto cleanup;
+}
+  if (c->high && c->high->rank != 0)
+{
+  gfc_error ("Expression in CASE selector at %L must be scalar",
+		 >high->where);
+  goto cleanup;
+}
+
   *cp = c;
   return MATCH_YES;

diff --git a/gcc/testsuite/gfortran.dg/select_10.f90 b/gcc/testsuite/gfortran.dg/select_10.f90
new file mode 100644
index 000..2d9b0170ce9
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/select_10.f90
@@ -0,0 +1,25 @@
+! { dg-do compile }
+! PR fortran/103776 - ICE in gfc_compare_string
+! Contributed by G.Steinmetz
+
+program p
+  integer :: n
+  select case (n)
+  case ([1])! { dg-error "must be scalar" }
+  end select
+  select case (n)
+  case (:[2])   ! { dg-error "must be scalar" }
+  end select
+  select case (n)
+  case (['1'])  ! { dg-error "must be scalar" }
+  end select
+  select case (n)
+  case (['1']:2)! { dg-error "must be scalar" }
+  end select
+  select case (n)
+  case(['1']:['2']) ! { dg-error "must be scalar" }
+  end select
+  select case (n)
+  case(1:['2']) ! { dg-error "must be scalar" }
+  end select
+end
--
2.26.2



Re: [PATCH 1/2] libphobos: fix CET for non-glibc targets

2021-12-20 Thread ibuclaw--- via Gcc-patches
> On 20/12/2021 16:41 Alex Xu (Hello71)  wrote:
> 
>  
> Excerpts from ibuc...@gdcproject.org's message of December 20, 2021 8:56 am:
> >> On 20/12/2021 01:08 Alex Xu (Hello71) via Gcc-patches 
> >>  wrote:
> >> 
> >>  
> >> On musl, linking against libphobos fails because it requires ucontext
> >> but is not explicitly linked against it. This is caused by configure
> >> assuming that it is implemented in assembly, but it is actually not
> >> implemented. This silently works on other libcs because context API does
> >> not require an external library.
> > 
> > Thanks.
> > 
> > Looks reasonable to me, also for backporting to gcc-11, which also has the 
> > same CET support.
> > 
> > Iain.
> > 
> 
> Yes, we noticed this first on gcc 11. I tested that these patches fix 
> the issue on gcc 11, and since nothing seems to have changed, I think 
> the same problem exists and will be fixed by these patches on master.
> 

Do you need me to commit this?

Iain.


Re: [PATCH] c-family: Have -Wformat-diag accept "decl-specifier" [PR103758]

2021-12-20 Thread Jakub Jelinek via Gcc-patches
On Mon, Dec 20, 2021 at 03:21:04PM -0500, Jason Merrill via Gcc-patches wrote:
> > @@ -3215,6 +3215,14 @@ check_tokens (const token_t *tokens, unsigned ntoks,
> >   plural = "s";
> > }
> > + /* As an exception, don't warn about "decl-specifier*" since
> > +it's a C++ grammar production.  */
> > + {
> > +   const size_t l = strlen ("decl-specifier");
> > +   if (!strncmp (format_chars, "decl-specifier", l))
> > + return format_chars + l - 1;
> > + }
> 
> I'd prefer to avoid repeating the string literal, but OK.

We have the startswith inline function that allows to avoid the repetition.
It wouldn't work in the above case due to the return format_chars + l - 1,
but that in itself looks quite weird to me, I'd think better would be
continue; instead of the return ...;, i.e. whenever we see decl-specifier
in the text pretend we haven't seen decl.
So
  /* As an exception, don't warn about "decl-specifier*" since
 it's a C++ grammar production.  */
  if (startswith (format_chars, "decl-specifier"))
continue;
?
Or perhaps even optimize and don't compare uselessly everything with
"decl-specifier" and do it only for the "decl" badword if it matches,
so
  if (badwords[i].name[0] == 'd'
  && startswith (format_chars, "decl-specifier"))
continue;
?

Jakub



Re: [PATCH] ix86: Don't match the 'm' constraint on x86_64_general_operand

2021-12-20 Thread Jakub Jelinek via Gcc-patches
On Mon, Dec 20, 2021 at 11:44:08AM -0800, H.J. Lu wrote:
> The problem is in
> 
> (define_memory_constraint "TARGET_MEM_CONSTRAINT"
>   "Matches any valid memory."
>   (and (match_code "mem")
>(match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0),
>  MEM_ADDR_SPACE (op))")))
> 
> define_register_constraint allows LRA to convert the operand to the form
> '(mem (reg X))', where X is a base register.  I am testing the v2 patch with

If you mean replacing an immediate with a MEM containing that immediate,
isn't that often the right thing though?
I mean, if the register pressure is high and options are either spill some
register, set it to immediate, use it in one instruction and then fill the
spilled register (i.e. 2 memory loads), compared to a MEM use on the
arithmetic instruction one MEM seems cheaper to me.  With -fPIC and the
cst needing runtime relocation slightly less so of course.

The code due to ivopts is trying to have something like
  size_t a = (size_t) _list;
  size_t b = 0xffa8 - a;
  size_t c = x + b;
and for that cst -  one needs actually 2 registers, one to hold the
constant and one to hold the (%rip) based address.
(insn 790 789 791 111 (set (reg:DI 292)
(const_int -88 [0xffa8])) "dl-tunables.c":304:15 76 
{*movdi_internal}
 (nil))
(insn 791 790 792 111 (set (reg:DI 293)
(symbol_ref:DI ("tunable_list") [flags 0x2]  )) "dl-tunables.c":304:15 76 {*movdi_internal}
 (nil))
(insn 792 791 793 111 (parallel [
(set (reg:DI 291)
(minus:DI (reg:DI 292)
(reg:DI 293)))
(clobber (reg:CC 17 flags))
]) "dl-tunables.c":304:15 299 {*subdi_1}
 (nil))
(insn 793 792 794 111 (parallel [
(set (reg:DI 294)
(plus:DI (reg:DI 291)
(reg:DI 198 [ ivtmp.176 ])))
(clobber (reg:CC 17 flags))
]) "dl-tunables.c":304:15 226 {*adddi_1}
 (nil))
It would be smarter to rewrite the above into a lea 88+tunable_list(%rip), 
%temp1
and use a subtraction instead of addition in the last insn above, or of
course in the particular case even consider the following 2 instructions
that do:
(insn 794 793 795 111 (set (reg:DI 296)
(symbol_ref:DI ("tunable_list") [flags 0x2]  )) "dl-tunables.c":304:15 76 {*movdi_internal}
 (nil))
(insn 795 794 796 111 (parallel [
(set (reg:DI 295 [ cur ])
(plus:DI (reg:DI 294)
(reg:DI 296)))
(clobber (reg:CC 17 flags))
]) "dl-tunables.c":304:15 226 {*adddi_1}
 (nil))
and find out that _list - _list is 0 and we don't need it at
all.  Guess we don't figure that out due to the cast of one of those
addresses to size_t and the other one used in POINTER_PLUS_EXPR as normal
pointer.

Jakub



Re: [PATCH] c-family: Have -Wformat-diag accept "decl-specifier" [PR103758]

2021-12-20 Thread Jason Merrill via Gcc-patches

On 12/17/21 17:59, Marek Polacek wrote:

I'm tired of seeing

cp/parser.c:15923:55: warning: misspelled term 'decl' in format; use 
'declaration' instead [-Wformat-diag]
cp/parser.c:15925:57: warning: misspelled term 'decl' in format; use 
'declaration' instead [-Wformat-diag]

every time I compile cp/parser.c, which happens...a lot.  I'd like my
compilation to be free of warnings, otherwise I'm going to miss some
important ones.

"decl-specifiers" is a C++ grammar term; it is not actual code, so
should not be wrapped with %< %>.  I hope we can accept it as an exception
in check_tokens.

It was surrounded by %< %> in cp_parser_decl_specifier_seq, so fix that.

In passing, fix a misspelling in missspellings.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

In fact, I think I'd like to backport to 11 too, so that eventually
even my system compiler stops warning about this.

PR c++/103758

gcc/c-family/ChangeLog:

* c-format.c (check_tokens): Accept "decl-specifier*".

gcc/cp/ChangeLog:

* parser.c (cp_parser_decl_specifier_seq): Replace %
with %qD.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-condition.C: Adjust dg-error.
---
  gcc/c-family/c-format.c  | 10 +-
  gcc/cp/parser.c  |  2 +-
  gcc/testsuite/g++.dg/cpp0x/constexpr-condition.C |  2 +-
  3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index 617fb5ea626..e1b1d6ba31e 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -3194,7 +3194,7 @@ check_tokens (const token_t *tokens, unsigned ntoks,
   wlen, format_chars);
else
  {
-  /* Diagnose some common missspellings.  */
+  /* Diagnose some common misspellings.  */
for (unsigned i = 0; i != sizeof badwords / sizeof *badwords; ++i)
{
  unsigned badwlen = strspn (badwords[i].name, " -");
@@ -3215,6 +3215,14 @@ check_tokens (const token_t *tokens, unsigned ntoks,
  plural = "s";
}
  
+	  /* As an exception, don't warn about "decl-specifier*" since

+it's a C++ grammar production.  */
+ {
+   const size_t l = strlen ("decl-specifier");
+   if (!strncmp (format_chars, "decl-specifier", l))
+ return format_chars + l - 1;
+ }


I'd prefer to avoid repeating the string literal, but OK.


  format_warning_substr (format_string_loc, format_string_cst,
 fmtchrpos, fmtchrpos + badwords[i].len,
 opt,
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 44eed7ea638..3b33ae0cc21 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -15820,7 +15820,7 @@ cp_parser_decl_specifier_seq (cp_parser* parser,
if (found_decl_spec
  && (flags & CP_PARSER_FLAGS_ONLY_TYPE_OR_CONSTEXPR)
  && token->keyword != RID_CONSTEXPR)
-   error ("% invalid in condition");
+   error ("%qD invalid in condition", ridpointers[token->keyword]);
  
if (found_decl_spec

  && (flags & CP_PARSER_FLAGS_ONLY_MUTABLE_OR_CONSTEXPR)
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-condition.C 
b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition.C
index 733d494c4d7..e81acba68ae 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-condition.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-condition.C
@@ -5,5 +5,5 @@ constexpr int something() { return 3; }
  
  int main() {

if (constexpr long v = something()) {}
-  if (static long v = something()) { } // { dg-error "'decl-specifier' 
invalid" }
+  if (static long v = something()) { } // { dg-error "'static' invalid" }
  }

base-commit: d7ca2a79b82c6500ead6ab983d14c609e2124eee




[PATCH] i386: Fix _pinsr and its splitters [PR103772]

2021-12-20 Thread Uros Bizjak via Gcc-patches
The clever trick to duplicate the value of the input operand into itself
proved not so clever after all.  The splitter should not clobber the input
operand in any case, since the register can hold the value outside the HImode
lowpart when accessed as subreg.  Use the standard earlyclobber approach
instead.

The testcase fails with avx2 ISA, but I was not able to create the testcase
that wouldn't require -mavx512fp16 compile flag.

2021-12-20  Uroš Bizjak  

gcc/ChangeLog:

PR target/103772
* config/i386/sse.md (_pinsr): Add
earlyclobber to (x,x,x,i) alternative.
(_pinsr peephole2): Remove.
(_pinsr splitter): Use output
operand as a temporary register.  Split after reload_completed.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Pushed to master.

Uros.
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5196149ee32..cb1c0b1edec 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17430,7 +17430,7 @@
 
 ;; sse4_1_pinsrd must come before sse2_loadld since it is preferred.
 (define_insn "_pinsr"
-  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,x")
+  [(set (match_operand:PINSR_MODE 0 "register_operand" "=x,x,x,x,v,v,")
(vec_merge:PINSR_MODE
  (vec_duplicate:PINSR_MODE
(match_operand: 2 "nonimmediate_operand" 
"r,m,r,m,r,m,x"))
@@ -17499,25 +17499,6 @@
   (const_string "*")))])
 
 ;; For TARGET_AVX2, implement insert from XMM reg with PBROADCASTW + PBLENDW.
-;; First try to get a scratch register and go through it.  In case this fails,
-;; overwrite source reg with broadcasted value and blend from there.
-(define_peephole2
-  [(match_scratch:V8_128 4 "x")
-   (set (match_operand:V8_128 0 "sse_reg_operand")
-   (vec_merge:V8_128
- (vec_duplicate:V8_128
-   (match_operand: 2 "sse_reg_operand"))
- (match_operand:V8_128 1 "sse_reg_operand")
- (match_operand:SI 3 "const_int_operand")))]
-  "TARGET_AVX2
-   && INTVAL (operands[3]) > 1
-   && ((unsigned) exact_log2 (INTVAL (operands[3]))
-   < GET_MODE_NUNITS (mode))"
-  [(set (match_dup 4)
-   (vec_duplicate:V8_128 (match_dup 2)))
-   (set (match_dup 0)
-   (vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))])
-
 (define_split
   [(set (match_operand:V8_128 0 "sse_reg_operand")
(vec_merge:V8_128
@@ -17525,18 +17506,14 @@
(match_operand: 2 "sse_reg_operand"))
  (match_operand:V8_128 1 "sse_reg_operand")
  (match_operand:SI 3 "const_int_operand")))]
-  "TARGET_AVX2 && epilogue_completed
+  "TARGET_AVX2 && reload_completed
&& INTVAL (operands[3]) > 1
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
< GET_MODE_NUNITS (mode))"
-  [(set (match_dup 4)
+  [(set (match_dup 0)
(vec_duplicate:V8_128 (match_dup 2)))
(set (match_dup 0)
-   (vec_merge:V8_128 (match_dup 4) (match_dup 1) (match_dup 3)))]
-{
-  operands[4] = lowpart_subreg (mode, operands[2],
-   mode);
-})
+   (vec_merge:V8_128 (match_dup 0) (match_dup 1) (match_dup 3)))])
 
 (define_expand "_vinsert_mask"
   [(match_operand:AVX512_VEC 0 "register_operand")


Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).

2021-12-20 Thread Jakub Jelinek via Gcc-patches
On Thu, Nov 18, 2021 at 07:30:36PM +, Hafiz Abid Qadeer wrote:
> +   if (gfc_match (" : ") != MATCH_YES)
> + {
> +   /* If no ":" then there is no allocator, we backtrack
> +  and read the variable list.  */
> +   gfc_free_expr (allocator);
> +   allocator = NULL;
> +   gfc_current_locus = old_loc;
> + }

Ok, no leak above.

> +
> +   gfc_omp_namelist **head = NULL;
> +   m = gfc_match_omp_variable_list ("", >lists[OMP_LIST_ALLOCATE],
> +false, NULL, );
> +
> +   if (m == MATCH_ERROR)
> + break;

But here it leaks.  Just call gfc_free_expr (allocator); before break.

> +
> +   gfc_omp_namelist *n;
> +   for (n = *head; n; n = n->next)
> + if (allocator)
> +   n->expr = gfc_copy_expr (allocator);
> + else
> +   n->expr = NULL;
> +   gfc_free_expr (allocator);
> +   continue;
> + }
> if ((mask & OMP_CLAUSE_AT)
> && (m = gfc_match_dupl_check (c->at == OMP_AT_UNSET, "at", true))
>!= MATCH_NO)

> +  if (omp_clauses->lists[OMP_LIST_ALLOCATE])
> +{
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next)
> + if (n->expr && (n->expr->ts.type != BT_INTEGER
> + || n->expr->ts.kind != gfc_c_intptr_kind))
> +   {
> + gfc_error ("Expected integer expression of the "
> + "'omp_allocator_handle_kind' kind at %L", >expr->where);

Formatting, "' should be indented below "Expected

> + break;
> +   }
> +
> +  /* Check for 2 things here.
> + 1.  There is no duplication of variable in allocate clause.
> + 2.  Variable in allocate clause are also present in some
> +  privatization clase (non-composite case).  */
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next)
> + n->sym->omp_allocate_clause = 0;
> +
> +  gfc_omp_namelist *prev = NULL;
> +  for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n;)
> + {
> +   if (n->sym->omp_allocate_clause == 1)
> + {
> +   gfc_warning (0, "%qs appears more than once in % "
> +"clauses at %L" , n->sym->name, >where);
> +   /* We have already seen this variable so it is a duplicate.
> +  Remove it.  */
> +   if (prev != NULL && prev->next == n)
> + {
> +   prev->next = n->next;
> +   n->next = NULL;
> +   gfc_free_omp_namelist (n, 0);
> +   n = prev->next;
> + }
> +   continue;
> + }
> +   n->sym->omp_allocate_clause = 1;
> +   prev = n;
> +   n = n->next;
> + }
> +
> +  /* non-composite constructs.  */
> +  if (code && code->op < EXEC_OMP_DO_SIMD)
> + {
> +   for (list = 0; list < OMP_LIST_NUM; list++)
> + switch (list)
> + {
> +   case OMP_LIST_PRIVATE:
> +   case OMP_LIST_FIRSTPRIVATE:
> +   case OMP_LIST_LASTPRIVATE:
> +   case OMP_LIST_REDUCTION:
> +   case OMP_LIST_REDUCTION_INSCAN:
> +   case OMP_LIST_REDUCTION_TASK:
> +   case OMP_LIST_IN_REDUCTION:
> +   case OMP_LIST_TASK_REDUCTION:
> +   case OMP_LIST_LINEAR:
> + for (n = omp_clauses->lists[list]; n; n = n->next)
> +   n->sym->omp_allocate_clause = 0;
> + break;
> +   default:
> + break;
> + }
> +
> +   for (n = omp_clauses->lists[OMP_LIST_ALLOCATE]; n; n = n->next)
> + if (n->sym->omp_allocate_clause == 1)
> +   gfc_error ("%qs specified in 'allocate' clause at %L but not "
> +  "in an explicit privatization clause",
> +  n->sym->name, >where);
> + }
> +}

Do you really need a new omp_allocate_clause bit?  From what I can see,
other code uses n->sym->mark for such purposes (temporarily marking some
symbols).
Also, I think allocate clause like the privatization clauses should allow
common blocks and I see code that uses the marks uses something like:
  for (list = OMP_LIST_TO; list != OMP_LIST_NUM;
   list = (list == OMP_LIST_TO ? OMP_LIST_LINK : OMP_LIST_NUM))
for (n = c->lists[list]; n; n = n->next)
  if (n->sym)
n->sym->mark = 0;
  else if (n->u.common->head)
n->u.common->head->mark = 0;
So, a question is if the above won't just crash if I specify
  firstprivate(/foobar/) allocate(/foobar/)
etc.

> + case OMP_LIST_ALLOCATE:
> +   for (; n != NULL; n = n->next)
> + if (n->sym->attr.referenced || declare_simd)

!$omp declare simd doesn't allow allocate clause, so why the
above " || declare_simd"?

> +   {
> + tree t = gfc_trans_omp_variable (n->sym, declare_simd);

And not , false); above?

Jakub



Re: [PATCH] ix86: Don't match the 'm' constraint on x86_64_general_operand

2021-12-20 Thread H.J. Lu via Gcc-patches
On Mon, Dec 20, 2021 at 6:53 AM Jakub Jelinek  wrote:
>
> On Sun, Dec 19, 2021 at 12:06:30PM -0800, H.J. Lu via Gcc-patches wrote:
> > --- a/gcc/config/i386/predicates.md
> > +++ b/gcc/config/i386/predicates.md
> > @@ -1199,6 +1199,12 @@
> >(and (match_operand 0 "memory_operand")
> > (not (match_test "x86_extended_reg_mentioned_p (op)"
> >
> > +;; Return true if OP is a memory operand representable on ix86.
> > +(define_predicate "ix86_memory_operand"
> > +  (and (match_operand 0 "memory_operand")
> > +   (ior (match_test "mode == QImode || mode == HImode")
> > + (match_operand 0 "x86_64_general_operand"
>
> I must be missing something, but how can this work?
> x86_64_general_operand is:
> (define_predicate "x86_64_general_operand"
>   (if_then_else (match_test "TARGET_64BIT")
> (ior (match_operand 0 "nonimmediate_operand")
>  (match_operand 0 "x86_64_immediate_operand"))
> (match_operand 0 "general_operand")))
> and so for non-immediates like MEMs it is just a normal memory_operand.
>
> Jakub
>

The problem is in

(define_memory_constraint "TARGET_MEM_CONSTRAINT"
  "Matches any valid memory."
  (and (match_code "mem")
   (match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0),
 MEM_ADDR_SPACE (op))")))

define_register_constraint allows LRA to convert the operand to the form
'(mem (reg X))', where X is a base register.  I am testing the v2 patch with

;; NB: Similar to 'm', but don't use define_memory_constraint on x86-64
;; to prevent LRA from converting the operand to the form '(mem (reg X))'
;; where X is a base register.
(define_constraint "BM"
  "@internal x86-64 memory operand."
  (and (match_code "mem")
   (match_test "memory_address_addr_space_p (GET_MODE (op), XEXP (op, 0),
 MEM_ADDR_SPACE (op))")))

;; Memory operand constraint for word modes.
(define_mode_attr m [(QI "m") (HI "m") (SI "BM") (DI "BM")])

-- 
H.J.


Re: [PATCH] Use enclosing object size if it's smaller than member [PR 101475]

2021-12-20 Thread Jeff Law via Gcc-patches




On 12/16/2021 12:56 PM, Martin Sebor via Gcc-patches wrote:

Enabling vectorization at -O2 caused quite a few tests for
warnings to start failing in GCC 12.  These tests were xfailed
and bugs were opened to track the problems until they can be
fully analyzed and ultimately fixed before GCC 12 is released.

I've now started going through these and the first such bug
I tackled is PR 102944.  As it turns out, the xfails there
are all due to a known limitation tracked in PR 101475: when
determining the size of a destination for A COMPONENT_REF,
unless asked for the size of the complete object,
compute_objsize() only considers the size of the referenced
member, even when the member is larger than the object would
allow.  This prevents warnings from diagnosing unvectorized
past-the-end accesses to objects in backing buffers (such as
in character arrays or allocated chunks of memory).

Many (though not all) accesses that are vectorized are diagnosed
because there the COMPONENT_REF is replaced by a MEM_REF.  But
because vectorization depends on target-specific things like
alignment requirements, what is and isn't diagnosed also tends
to be target-specific, making these tests quite brittle..

The attached patch corrects this oversight by using the complete
object's size instead of the member when the former is smaller.
Besides improving the out-of-bounds access detection it also
makes the tests behave more consistently across targets.

Tested on x86_64-linux and by building Glibc and verifying
that the change triggers no new warnings.
I must be missing something here.  How can the enclosing object be 
smaller than a member?


Jeff



Re: [PATCH] c++: Avoid narrowing in make_char_string_pack

2021-12-20 Thread Jason Merrill via Gcc-patches

On 12/17/21 17:58, Marek Polacek wrote:

This fixes

gcc/cp/parser.c:4618:41: warning: narrowing conversion of '(char)(*(str + 
((sizetype)i)))' from 'char' to 'unsigned char' [-Wnarrowing]
  4618 |   unsigned char s[3] = { '\'', str[i], '\'' };
   |~^

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


Hmm, odd that STRING_CST and cpp_string differ in the use of unsigned 
char.  The patch is OK>



gcc/cp/ChangeLog:

* parser.c (make_char_string_pack): Add a cast to const unsigned
char *.
---
  gcc/cp/parser.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 44eed7ea638..56232ab029f 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -4607,7 +4607,8 @@ make_char_string_pack (tree value)
  {
tree charvec;
tree argpack = make_node (NONTYPE_ARGUMENT_PACK);
-  const char *str = TREE_STRING_POINTER (value);
+  const unsigned char *str
+= (const unsigned char *) TREE_STRING_POINTER (value);
int i, len = TREE_STRING_LENGTH (value) - 1;
tree argvec = make_tree_vec (1);
  


base-commit: d7ca2a79b82c6500ead6ab983d14c609e2124eee




Re: [PATCH] c++: memfn lookup consistency in incomplete class context

2021-12-20 Thread Jason Merrill via Gcc-patches

On 12/20/21 12:46, Patrick Palka wrote:

When instantiating a call to a member function of a class template, we
repeat the member function lookup in order to obtain the corresponding
partially instantiated functions.  Within an incomplete-class context
however, we need to be more careful when repeating the lookup because we
don't want to introduce later-declared member functions that weren't
visible at template definition time.  We're currently not careful enough
in this respect, which causes us to reject memfn1.C below.

This patch fixes this issue by making tsubst_baselink filter out from
the instantiation-time lookup those member functions that were invisible
at template definition time.  This is really only necessary within an
incomplete-class context, so this patch adds a heuristic flag to BASELINK
to help us avoid needlessly performing this filtering step (which would
be a no-op) in complete-class contexts.

This is also necessary in order for the ahead-of-time pruning
implemented in r12-6075 to be effective for member functions within
class templates.

Bootstrapped and regtested on x86_64-pc-linux-gnu, does this look OK for
trunk?

gcc/cp/ChangeLog:

* call.c (build_new_method_call): Set
BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P on the pruned baselink.
* cp-tree.h (BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P): Define.
* pt.c (filter_memfn_lookup): New subroutine of tsubst_baselink.
(tsubst_baselink): Use filter_memfn_lookup on the new lookup
result when BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P is set on the
old baselink.  Remove redundant BASELINK_P check.
* search.c (build_baselink): Set
BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P appropriately.

gcc/testsuite/ChangeLog:

* g++.dg/lookup/memfn1.C: New test.
* g++.dg/template/non-dependent16b.C: New test.
---
  gcc/cp/call.c |  1 +
  gcc/cp/cp-tree.h  |  5 +
  gcc/cp/pt.c   | 91 ++-
  gcc/cp/search.c   |  4 +
  gcc/testsuite/g++.dg/lookup/memfn1.C  | 16 
  .../g++.dg/template/non-dependent16b.C| 37 
  6 files changed, 152 insertions(+), 2 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/lookup/memfn1.C
  create mode 100644 gcc/testsuite/g++.dg/template/non-dependent16b.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 1fbfc580a1e..bee367f57d7 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -11187,6 +11187,7 @@ build_new_method_call (tree instance, tree fns, vec **args,
}
  orig_fns = copy_node (orig_fns);
  BASELINK_FUNCTIONS (orig_fns) = fn;
+ BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P (orig_fns) = true;
}
  
  skip_prune:

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 8b5cfa230ba..4f175f491cb 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -464,6 +464,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX];
PACK_EXPANSION_SIZEOF_P (in *_PACK_EXPANSION)
OVL_USING_P (in OVERLOAD)
IMPLICIT_CONV_EXPR_NONTYPE_ARG (in IMPLICIT_CONV_EXPR)
+  BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P (in BASELINK)
 2: IDENTIFIER_KIND_BIT_2 (in IDENTIFIER_NODE)
ICS_THIS_FLAG (in _CONV)
DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (in VAR_DECL)
@@ -1060,6 +1061,10 @@ struct GTY(()) tree_template_decl {
  /* Nonzero if this baselink was from a qualified lookup.  */
  #define BASELINK_QUALIFIED_P(NODE) \
TREE_LANG_FLAG_0 (BASELINK_CHECK (NODE))
+/* Nonzero if the overload set for this baselink might be incomplete due
+   to the lookup being performed from an incomplete class context.  */
+#define BASELINK_FUNCTIONS_MAYBE_INCOMPLETE_P(NODE) \
+  TREE_LANG_FLAG_1 (BASELINK_CHECK (NODE))
  
  struct GTY(()) tree_baselink {

struct tree_common common;
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 4f0ae6d5851..7293f4e7b12 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -16221,6 +16221,81 @@ tsubst (tree t, tree args, tsubst_flags_t complain, 
tree in_decl)
  }
  }
  
+/* OLDFNS is a lookup set of member functions from some class template, and

+   NEWFNS is a lookup set of member functions from a specialization of that
+   class template.  Return the subset of NEWFNS which are specializations of
+   some function from OLDFNS.  */
+
+static tree
+filter_memfn_lookup (tree oldfns, tree newfns)
+{
+  /* Record all member functions from the old lookup set OLDFNS into
+ VISIBLE_SET.  */
+  hash_set visible_set;
+  for (tree fn : lkp_range (oldfns))
+{
+  if (TREE_CODE (fn) == USING_DECL)
+   {
+ /* FIXME: Punt on (dependent) USING_DECL for now; mapping
+a dependent USING_DECL to its instantiation seems
+tricky.  */


I wouldn't expect it to be that bad; the instantiation would be any 
functions from bases of the class where the USING_DECL (and the lookup) 
occurs.  But that can be a follow-up; the 

[committed] d: Merge upstream dmd ad8412530, druntime fd9a4544, phobos 495e835c2.

2021-12-20 Thread Iain Buclaw via Gcc-patches
Hi,

This patch merges the D front-end with upstream dmd ad8412530, and the
run-time libraries with upstream druntime fd9a4544 and phobos 495e835c2.

D front-end changes:

- Import dmd v2.098.1
- Remove calling of _d_delstruct from code generator.

Druntime changes:

- Import druntime v2.098.1

Phobos changes:

- Import phobos v2.098.1

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards,
Iain.

---
gcc/d/ChangeLog:

* dmd/MERGE: Merge upstream dmd ad8412530.
* expr.cc (ExprVisitor::visit (DeleteExp *)): Remove code generation
of _d_delstruct.
* runtime.def (DELSTRUCT): Remove.

libphobos/ChangeLog:

* libdruntime/MERGE: Merge upstream druntime fd9a4544.
* src/MERGE: Merge upstream phobos 495e835c2.
---
 gcc/d/dmd/MERGE   |   2 +-
 gcc/d/dmd/canthrow.d  |  16 ++
 gcc/d/dmd/dcast.d |  73 +++
 gcc/d/dmd/dinterpret.d|  41 
 gcc/d/dmd/dsymbol.d   |  16 +-
 gcc/d/dmd/dsymbolsem.d|  24 ++-
 gcc/d/dmd/expressionsem.d |  26 ++-
 gcc/d/dmd/id.d|   3 +
 gcc/d/dmd/initsem.d   | 106 ++-
 gcc/d/dmd/nogc.d  |  14 ++
 gcc/d/dmd/semantic3.d |   3 +-
 gcc/d/dmd/tokens.d|  26 +--
 gcc/d/dmd/tokens.h|  11 +-
 gcc/d/dmd/typesem.d   | 178 +-
 gcc/d/expr.cc |  12 +-
 gcc/d/runtime.def |   2 -
 gcc/testsuite/gdc.test/compilable/test22593.d |  13 ++
 .../gdc.test/fail_compilation/ice17074.d  |  12 +-
 .../gdc.test/fail_compilation/test22593.d |  23 +++
 libphobos/libdruntime/MERGE   |   2 +-
 libphobos/libdruntime/core/builtins.d |  48 -
 libphobos/libdruntime/core/lifetime.d |  20 +-
 libphobos/libdruntime/core/sys/linux/sched.d  |   3 +
 libphobos/libdruntime/object.d|  30 ++-
 libphobos/src/MERGE   |   2 +-
 libphobos/src/std/format/write.d  |  23 +++
 libphobos/src/std/range/interfaces.d  |   9 +
 libphobos/src/std/typecons.d  |   2 +-
 28 files changed, 468 insertions(+), 272 deletions(-)
 create mode 100644 gcc/testsuite/gdc.test/compilable/test22593.d
 create mode 100644 gcc/testsuite/gdc.test/fail_compilation/test22593.d

diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
index d7eff4ffd2f..b42576c2ce6 100644
--- a/gcc/d/dmd/MERGE
+++ b/gcc/d/dmd/MERGE
@@ -1,4 +1,4 @@
-93108bb9ea6216d67fa97bb4842fb59f26f6bfc7
+ad8412530e607ffebec36f2dbdff1a6f2798faf7
 
 The first line of this file holds the git revision number of the last
 merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/canthrow.d b/gcc/d/dmd/canthrow.d
index b67a9d14dd4..b1877151c88 100644
--- a/gcc/d/dmd/canthrow.d
+++ b/gcc/d/dmd/canthrow.d
@@ -82,6 +82,22 @@ extern (C++) bool canThrow(Expression e, FuncDeclaration 
func, bool mustNotThrow
 
 if (global.errors && !ce.e1.type)
 return; // error recovery
+
+import dmd.id : Id;
+
+if (ce.f && ce.f.ident == Id._d_delstruct)
+{
+// Only check if the dtor throws.
+Type tb = (*ce.arguments)[0].type.toBasetype();
+auto ts = tb.nextOf().baseElemOf().isTypeStruct();
+if (ts)
+{
+auto sd = ts.sym;
+if (sd.dtor)
+checkFuncThrows(ce, sd.dtor);
+}
+}
+
 /* If calling a function or delegate that is typed as nothrow,
  * then this expression cannot throw.
  * Note that pure functions can throw.
diff --git a/gcc/d/dmd/dcast.d b/gcc/d/dmd/dcast.d
index 2e5a79d0feb..a572a1ff6e0 100644
--- a/gcc/d/dmd/dcast.d
+++ b/gcc/d/dmd/dcast.d
@@ -1565,9 +1565,9 @@ Expression castTo(Expression e, Scope* sc, Type t, Type 
att = null)
 result = e;
 return;
 }
-if (e.op == EXP.variable)
+if (auto ve = e.isVarExp())
 {
-VarDeclaration v = (cast(VarExp)e).var.isVarDeclaration();
+VarDeclaration v = ve.var.isVarDeclaration();
 if (v && v.storage_class & STC.manifest)
 {
 result = e.ctfeInterpret();
@@ -1852,8 +1852,8 @@ Expression castTo(Expression e, Scope* sc, Type t, Type 
att = null)
 override void visit(StructLiteralExp e)
 {
 visit(cast(Expression)e);
-if (result.op == EXP.structLiteral)
-(cast(StructLiteralExp)result).stype = t; // commit type
+if (auto sle = 

Re: [gcc r12-6020] Fixed typo

2021-12-20 Thread Rainer Orth
Hi Martin,

> On 12/16/21 19:43, Joseph Myers wrote:
>> On Thu, 16 Dec 2021, Martin Liška wrote:
>> 
>>> Hello.
>>>
>>> Oh, sorry, it was me and I forgot to send the patch to the mailing list.
>>> I've basically taken it as: https://github.com/gcc-mirror/gcc/pull/57.
>>>
>>> Should I revert the change?
>> Updating both config.sub and config.guess to the latest config.git
>> versions (2021-10-27 and 2021-11-30) is probably the right thing to do
>> now.
>
> All right, I've reverted the original patch and I'm going to do sync
> from upstream.

please make config.sub executable again.  This has been lost with the
update.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread David Edelsohn via Gcc-patches
On Mon, Dec 20, 2021 at 3:24 AM Xionghu Luo  wrote:
>
> These four UNSPECS seems could be replaced with native RTL, and why
> "(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
> in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):
>
>   This bit is sticky; that is, once set to 1 it
>   remains set to 1 until it is set to 0 by an
>   mtvscr instruction.
>
> The RTL pattern set it to 0 but final ASM doesn't present it?  And why
> not use Clobber VSCR_REGNO instead?

The design came from the early implementation of Altivec:

https://gcc.gnu.org/pipermail/gcc-patches/2002-May/077409.html

If one later checks for saturation (reads VSCR), one needs a
corresponding SET of the value.  It's set in an architecture-specific
manner that isn't described to GCC, but it's set, not just clobbered
and in an undefined state.

The RTL does not describe that VSCR is set to the value 0.  The
(const_int 0) is not the value set.  You can think of the (const_int
0) as a dummy RTL argument to the VSCR UNSPEC.  UNSPEC requires at
least one argument and the pattern doesn't try to express the
argument, so it uses a dummy RTL constant.  It's part of a PARALLEL
and the plus or minus already expresses the data dependency of the
pattern on the input operands.

I'm unsure of the meaning of your question "final ASM doesn't present
it".  The operation on VSCR is implicit and not emitted as an
instruction.  It's in a PARALLEL, which means that the single Altivec
instruction has both effects.  Is that what you were asking?

> Tested pass on P10, OK for master?

This patch is okay.  Thanks for updating the machine description and
for cleaning up the formatting.

> Thanks.
>
> gcc/ChangeLog:
>
> * config/rs6000/altivec.md (altivec_vaddus): Replace
> UNSPEC_VADDU with us_plus.
> (altivec_vaddss): Replace UNSPEC_VADDS with ss_plus.
> (altivec_vsubus): Replace UNSPEC_VSUBU with us_minus.
> (altivec_vsubss): Replace UNSPEC_VSUBS with ss_minus.
> (altivec_abss_): Likewise.
> ---
>  gcc/config/rs6000/altivec.md | 29 ++---
>  1 file changed, 10 insertions(+), 19 deletions(-)
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index a057218aa28..b2909857c34 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -29,8 +29,6 @@ (define_c_enum "unspec"
> UNSPEC_VMHADDSHS
> UNSPEC_VMHRADDSHS
> UNSPEC_VADDCUW
> -   UNSPEC_VADDU
> -   UNSPEC_VADDS
> UNSPEC_VAVGU
> UNSPEC_VAVGS
> UNSPEC_VMULEUB
> @@ -61,8 +59,6 @@ (define_c_enum "unspec"
> UNSPEC_VSR
> UNSPEC_VSRO
> UNSPEC_VSUBCUW
> -   UNSPEC_VSUBU
> -   UNSPEC_VSUBS
> UNSPEC_VSUM4UBS
> UNSPEC_VSUM4S
> UNSPEC_VSUM2SWS
> @@ -517,9 +513,8 @@ (define_insn "altivec_vaddcuw"
>
>  (define_insn "altivec_vaddus"
>[(set (match_operand:VI 0 "register_operand" "=v")
> -(unspec:VI [(match_operand:VI 1 "register_operand" "v")
> -   (match_operand:VI 2 "register_operand" "v")]
> -  UNSPEC_VADDU))
> +(us_plus:VI (match_operand:VI 1 "register_operand" "v")
> +   (match_operand:VI 2 "register_operand" "v")))
> (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
>""
>"vaddus %0,%1,%2"
> @@ -527,9 +522,8 @@ (define_insn "altivec_vaddus"
>
>  (define_insn "altivec_vaddss"
>[(set (match_operand:VI 0 "register_operand" "=v")
> -(unspec:VI [(match_operand:VI 1 "register_operand" "v")
> -(match_operand:VI 2 "register_operand" "v")]
> -  UNSPEC_VADDS))
> +(ss_plus:VI (match_operand:VI 1 "register_operand" "v")
> +   (match_operand:VI 2 "register_operand" "v")))
> (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
>"VECTOR_UNIT_ALTIVEC_P (mode)"
>"vaddss %0,%1,%2"
> @@ -563,9 +557,8 @@ (define_insn "altivec_vsubcuw"
>
>  (define_insn "altivec_vsubus"
>[(set (match_operand:VI 0 "register_operand" "=v")
> -(unspec:VI [(match_operand:VI 1 "register_operand" "v")
> -(match_operand:VI 2 "register_operand" "v")]
> -  UNSPEC_VSUBU))
> +(us_minus:VI (match_operand:VI 1 "register_operand" "v")
> +(match_operand:VI 2 "register_operand" "v")))
> (set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
>"VECTOR_UNIT_ALTIVEC_P (mode)"
>"vsubus %0,%1,%2"
> @@ -573,9 +566,8 @@ (define_insn "altivec_vsubus"
>
>  (define_insn "altivec_vsubss"
>[(set (match_operand:VI 0 "register_operand" "=v")
> -(unspec:VI [(match_operand:VI 1 "register_operand" "v")
> -(match_operand:VI 2 "register_operand" "v")]
> -  UNSPEC_VSUBS))
> +(ss_minus:VI (match_operand:VI 1 "register_operand" "v")
> +(match_operand:VI 2 "register_operand" "v")))
> (set (reg:SI VSCR_REGNO) 

Re: [PATCH] Adjust VxWorks fixincludes hack for mkdir to work for C++

2021-12-20 Thread Olivier Hainque via Gcc-patches



> On 20 Dec 2021, at 17:31, Jeff Law  wrote:
> 
> Given how vxworks specific these fixinc bits are, I think they reasonably 
> fall under maintainership of vxworks bits.
> 
> So I think you can/should self-approve :-)

Thanks Jeff,

Rasmus has provided useful feedback on some of the
hunks and I'm still looking into some of the issues.

I'll proceed with those on which we're aligned, and
on the other ones when we have converged on a resolution.

Cheers,

Olivier



Re: [PATCH] Adjust VxWorks fixincludes hack for mkdir to work for C++

2021-12-20 Thread Jeff Law via Gcc-patches




On 12/17/2021 4:55 AM, Olivier Hainque via Gcc-patches wrote:

Hello,

The attached patch adjusts a very old fixincludes hack for
VxWorks, to expose a varargs function prototype for "mkdir"
instead of a varargs macro (!).

The function version is more amenable to calls from C++ like
std::mkdir(arg1, arg2).

This helps libstdc++ build failures for old versions
of VxWorks, 6.9 in particular for which we were able to get
a successful complete build after this and a couple of other
fixincludes changes which I'll send separately.

Also bootstrapped and regression tested ok for x86_64-linux,
just in case.

Ok to commit?

Thanks in advance,

With Kind Regards,

Olivier


2021-12-16  Olivier Hainque  

fixincludes/
* inclhack.def (vxworks_posix_mkdir): Refine to expose
a varargs interface.
* tests/base/sys/stat.h: Update expected results.
* fixinc.x: Regenerate.
Given how vxworks specific these fixinc bits are, I think they 
reasonably fall under maintainership of vxworks bits.


So I think you can/should self-approve :-)

jeff



Re: [PATCH] Leverage sysroot for VxWorks

2021-12-20 Thread Olivier Hainque via Gcc-patches
Hi Rasmus,

> On 20 Dec 2021, at 12:40, Rasmus Villemoes  wrote:
> 
> On 10/12/2021 19.24, Olivier Hainque wrote:
> 
>> For the toolchains we build, this is achieved with a few
>> configure options like:
>> 
>>   --with-sysroot
>>   --with-build-sysroot=${WIND_BASE}/target
> 
> So forward-porting our private patches up until
> 
> 7bf710b5116 - libstdc++: Add support for '?' in linker script globs
> 
> (i.e. the commit before this one) went without problems, with no changes
> required in our build scripts. Then when rebasing to this one, now known as
> 
> f3f923e5139 - Leverage sysroot for VxWorks
> 
> the build broke as expected because I'd been using somewhat different
> values of --with(-build)-sysroot. However, changing those configure
> options as indicated above again produced a working toolchain, so this
> is all good.

Great !

>>  --with-specs=%{!sysroot=*:--sysroot=%:getenv(WIND_BASE /target)}
> 
> However, I'm a little confused about the purpose of this one.

> First,
> shouldn't this be '%{!-sysroot=*}', i.e. with a leading dash, since
> the option is --sysroot ? But whether I use one or the other, it seems
> that the resulting compiler ignores a --sysroot argument; if I
> explicitly unexport WIND_BASE and manually add a --sysroot argument that
> should have the same effect as above, gcc fails with WIND_BASE not defined:

> The specs syntax doesn't explicitly list the negative form of %{S*:X},
> and I can't find any in-tree examples (though it's rather hard to grep
> for), so I don't even know if this is supposed to work.

I think it is, and it is used for example in:

  config/i386/i386.h:  {"cpu", 
"%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}" },  \

> It's not a big deal, we can just continue to define and export
> WIND_BASE, but it's probably best for long-term maintenance if our build
> scripts and configure options are aligned with what you do on your end,
> so I would just like to understand this fully.

Sure, appreciated.

The purpose was indeed to also prevent producing the default option
if one is there already, and you are right that the form above doesn't
work as expected on this account.

I had made a couple of basic tests and thought it did, but must have
made a mistake. Might have been skewed by an implicit conviction.

The reason for which it doesn't work is the do_save = false in
driver_handle_option for --sysroot:

   case OPT__sysroot_:
  target_system_root = arg;
  target_system_root_changed = 1;
  do_save = false;
  break;


Changing that has the intended effect on our use case,
with an additional dash indeed.

I'll test further and propose the change if it doesn't raise
unexpected problems. We could also even consider using DRIVER_SELF_SPECS
then.

Thanks for your feedback,

Olivier









RE: [3/3 PATCH][AArch32] use canonical ordering for complex mul, fma and fms

2021-12-20 Thread Tamar Christina via Gcc-patches
Updated version of patch following AArch64 review.

Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.

Ok for master? and backport along with the first patch?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/102819
PR tree-optimization/103169
* config/arm/vec-common.md (cml4): Use
canonical order.

--- inline copy of patch ---

diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 
e71d9b3811fde62159f5c21944fef9fe3f97b4bd..eab77ac8decce76d70f5b2594f4439e6ed363e6e
 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -265,18 +265,18 @@ (define_expand "arm_vcmla"
 ;; remainder.  Because of this, expand early.
 (define_expand "cml4"
   [(set (match_operand:VF 0 "register_operand")
-   (plus:VF (match_operand:VF 1 "register_operand")
-(unspec:VF [(match_operand:VF 2 "register_operand")
-(match_operand:VF 3 "register_operand")]
-   VCMLA_OP)))]
+   (plus:VF (unspec:VF [(match_operand:VF 1 "register_operand")
+(match_operand:VF 2 "register_operand")]
+   VCMLA_OP)
+(match_operand:VF 3 "register_operand")))]
   "(TARGET_COMPLEX || (TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT
  && ARM_HAVE__ARITH)) && !BYTES_BIG_ENDIAN"
 {
   rtx tmp = gen_reg_rtx (mode);
-  emit_insn (gen_arm_vcmla (tmp, operands[1],
-operands[3], operands[2]));
+  emit_insn (gen_arm_vcmla (tmp, operands[3],
+operands[2], operands[1]));
   emit_insn (gen_arm_vcmla (operands[0], tmp,
-operands[3], operands[2]));
+operands[2], operands[1]));
   DONE;
 })



rb15165.patch
Description: rb15165.patch


RE: [2/3 PATCH]AArch64 use canonical ordering for complex mul, fma and fms

2021-12-20 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Sandiford 
> Sent: Friday, December 17, 2021 4:49 PM
> To: Tamar Christina 
> Cc: gcc-patches@gcc.gnu.org; nd ; Richard Earnshaw
> ; Marcus Shawcroft
> ; Kyrylo Tkachov 
> Subject: Re: [2/3 PATCH]AArch64 use canonical ordering for complex mul,
> fma and fms
> 
> Richard Sandiford  writes:
> > Tamar Christina  writes:
> >> Hi All,
> >>
> >> After the first patch in the series this updates the optabs to expect
> >> the canonical sequence.
> >>
> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >>
> >> Ok for master? and backport along with the first patch?
> >>
> >> Thanks,
> >> Tamar
> >>
> >> gcc/ChangeLog:
> >>
> >>PR tree-optimization/102819
> >>PR tree-optimization/103169
> >>* config/aarch64/aarch64-simd.md
> (cml4,
> >>cmul3): Use canonical order.
> >>* config/aarch64/aarch64-sve.md (cml4,
> >>cmul3): Likewise.
> >>
> >> --- inline copy of patch --
> >> diff --git a/gcc/config/aarch64/aarch64-simd.md
> >> b/gcc/config/aarch64/aarch64-simd.md
> >> index
> >>
> f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..875896ee71324712c8034eeff9
> c
> >> fb5649f9b0e73 100644
> >> --- a/gcc/config/aarch64/aarch64-simd.md
> >> +++ b/gcc/config/aarch64/aarch64-simd.md
> >> @@ -556,17 +556,17 @@ (define_insn
> "aarch64_fcmlaq_lane"
> >>  ;; remainder.  Because of this, expand early.
> >>  (define_expand "cml4"
> >>[(set (match_operand:VHSDF 0 "register_operand")
> >> -  (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
> >> -  (unspec:VHSDF [(match_operand:VHSDF 2
> "register_operand")
> >> - (match_operand:VHSDF 3
> "register_operand")]
> >> - FCMLA_OP)))]
> >> +  (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1
> "register_operand")
> >> + (match_operand:VHSDF 2
> "register_operand")]
> >> + FCMLA_OP)
> >> +  (match_operand:VHSDF 3 "register_operand")))]
> >>"TARGET_COMPLEX && !BYTES_BIG_ENDIAN"
> >>  {
> >>rtx tmp = gen_reg_rtx (mode);
> >> -  emit_insn (gen_aarch64_fcmla (tmp, operands[1],
> >> -   operands[3], operands[2]));
> >> +  emit_insn (gen_aarch64_fcmla (tmp, operands[3],
> >> +   operands[1], operands[2]));
> >>emit_insn (gen_aarch64_fcmla (operands[0], tmp,
> >> -   operands[3], operands[2]));
> >> +   operands[1], operands[2]));
> >>DONE;
> >>  })
> >>
> >> @@ -583,9 +583,9 @@ (define_expand "cmul3"
> >>rtx tmp = force_reg (mode, CONST0_RTX (mode));
> >>rtx res1 = gen_reg_rtx (mode);
> >>emit_insn (gen_aarch64_fcmla (res1, tmp,
> >> -   operands[2], operands[1]));
> >> +   operands[1], operands[2]));
> >>emit_insn (gen_aarch64_fcmla (operands[0], res1,
> >> -   operands[2], operands[1]));
> >> +   operands[1], operands[2]));
> >
> > This doesn't look right.  Going from the documentation, patch 1 isn't
> > changing the operand order for CMUL: the conjugated operand (if there
> > is one) is still operand 2.  The FCMLA sequences use the opposite
> > order, where the conjugated operand (if there is one) is operand 1.
> > So I think
> 
> I meant “the first multiplication operand” rather than “operand 1” here.
> 
> > the reversal here is still needed.
> >
> > Same for the multiplication operands in CML* above.

I did actually change the order in patch 1, but didn't update the docs..
That was done because I followed the SLP order again, but now I've updated
them to do what the docs say.

Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.

Ok for master? and backport along with the first patch?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/102819
PR tree-optimization/103169
* config/aarch64/aarch64-simd.md (cml4): Use
canonical order.
* config/aarch64/aarch64-sve.md (cml4): Likewise.

--- inline copy of patch ---

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index 
f95a7e1d91c97c9e981d75e71f0b49c02ef748ba..9e41610fba85862ef7675bea1e5731b14cab59ce
 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -556,17 +556,17 @@ (define_insn "aarch64_fcmlaq_lane"
 ;; remainder.  Because of this, expand early.
 (define_expand "cml4"
   [(set (match_operand:VHSDF 0 "register_operand")
-   (plus:VHSDF (match_operand:VHSDF 1 "register_operand")
-   (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand")
-  (match_operand:VHSDF 3 "register_operand")]
-  FCMLA_OP)))]
+   (plus:VHSDF (unspec:VHSDF [(match_operand:VHSDF 1 

RE: [1/3 PATCH]middle-end vect: Simplify and extend the complex numbers validation routines.

2021-12-20 Thread Tamar Christina via Gcc-patches


> -Original Message-
> From: Richard Sandiford 
> Sent: Friday, December 17, 2021 4:19 PM
> To: Tamar Christina via Gcc-patches 
> Cc: Tamar Christina ; nd ;
> rguent...@suse.de
> Subject: Re: [1/3 PATCH]middle-end vect: Simplify and extend the complex
> numbers validation routines.
> 
> Just a comment on the documentation:
> 
> Tamar Christina via Gcc-patches  writes:
> > diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi index
> >
> 9ec051e94e10cca9eec2773e1b8c01b74b6ea4db..60dc5b3ea6087c2824ad1467
> bc66
> > e9cfebe9dcfc 100644
> > --- a/gcc/doc/md.texi
> > +++ b/gcc/doc/md.texi
> > @@ -6325,12 +6325,12 @@ Perform a vector multiply and accumulate that
> > is semantically the same as  a multiply and accumulate of complex numbers.
> >
> >  @smallexample
> > -  complex TYPE c[N];
> > -  complex TYPE a[N];
> > -  complex TYPE b[N];
> > +  complex TYPE op0[N];
> > +  complex TYPE op1[N];
> > +  complex TYPE op2[N];
> >for (int i = 0; i < N; i += 1)
> >  @{
> > -  c[i] += a[i] * b[i];
> > +  op2[i] += op1[i] * op2[i];
> >  @}
> 
> I think this should be:
> 
>   op0[i] = op1[i] * op2[i] + op3[i];
> 
> since operand 0 is the output and operand 3 is the accumulator input.
> 
> Same idea for the others.  For:
> 
> > @@ -6415,12 +6415,12 @@ Perform a vector multiply that is semantically
> > the same as multiply of  complex numbers.
> >
> >  @smallexample
> > -  complex TYPE c[N];
> > -  complex TYPE a[N];
> > -  complex TYPE b[N];
> > +  complex TYPE op0[N];
> > +  complex TYPE op1[N];
> > +  complex TYPE op2[N];
> >for (int i = 0; i < N; i += 1)
> >  @{
> > -  c[i] = a[i] * b[i];
> > +  op2[i] = op0[i] * op1[i];
> 
> …this I think it should be:
> 
>   op0[i] = op1[i] * op2[i];

Updated patch attached.

Bootstrapped Regtested on aarch64-none-linux-gnu,
x86_64-pc-linux-gnu and no regressions.

Ok for master? and backport to GCC 11 after some stew?

Thanks,
Tamar

gcc/ChangeLog:

PR tree-optimization/102819
PR tree-optimization/103169
* doc/md.texi: Update docs for cfms, cfma.
* tree-data-ref.h (same_data_refs): Accept optional offset.
* tree-vect-slp-patterns.c (is_linear_load_p): Fix issue with repeating
patterns.
(vect_normalize_conj_loc): Remove.
(is_eq_or_top): Change to take two nodes.
(enum _conj_status, compatible_complex_nodes_p,
vect_validate_multiplication): New.
(class complex_add_pattern, complex_add_pattern::matches,
complex_add_pattern::recognize, class complex_mul_pattern,
complex_mul_pattern::recognize, class complex_fms_pattern,
complex_fms_pattern::recognize, class complex_operations_pattern,
complex_operations_pattern::recognize, addsub_pattern::recognize): Pass
new cache.
(complex_fms_pattern::matches, complex_mul_pattern::matches): Pass new
cache and use new validation code.
* tree-vect-slp.c (vect_match_slp_patterns_2, vect_match_slp_patterns,
vect_analyze_slp): Pass along cache.
(compatible_calls_p): Expose.
* tree-vectorizer.h (compatible_calls_p, slp_node_hash,
slp_compat_nodes_map_t): New.
(class vect_pattern): Update signatures include new cache.

gcc/testsuite/ChangeLog:

PR tree-optimization/102819
PR tree-optimization/103169
* g++.dg/vect/pr99149.cc: xfail for now.
* gcc.dg/vect/complex/pr102819-1.c: New test.
* gcc.dg/vect/complex/pr102819-2.c: New test.
* gcc.dg/vect/complex/pr102819-3.c: New test.
* gcc.dg/vect/complex/pr102819-4.c: New test.
* gcc.dg/vect/complex/pr102819-5.c: New test.
* gcc.dg/vect/complex/pr102819-6.c: New test.
* gcc.dg/vect/complex/pr102819-7.c: New test.
* gcc.dg/vect/complex/pr102819-8.c: New test.
* gcc.dg/vect/complex/pr102819-9.c: New test.
* gcc.dg/vect/complex/pr103169.c: New test.

--- inline copy of patch ---

diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 
9ec051e94e10cca9eec2773e1b8c01b74b6ea4db..ad06b02d36876082afe4c3f3fb51887f7a522b23
 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -6325,12 +6325,13 @@ Perform a vector multiply and accumulate that is 
semantically the same as
 a multiply and accumulate of complex numbers.
 
 @smallexample
-  complex TYPE c[N];
-  complex TYPE a[N];
-  complex TYPE b[N];
+  complex TYPE op0[N];
+  complex TYPE op1[N];
+  complex TYPE op2[N];
+  complex TYPE op3[N];
   for (int i = 0; i < N; i += 1)
 @{
-  c[i] += a[i] * b[i];
+  op0[i] = op1[i] * op2[i] + op3[i];
 @}
 @end smallexample
 
@@ -6348,12 +6349,13 @@ the same as a multiply and accumulate of complex 
numbers where the second
 multiply arguments is conjugated.
 
 @smallexample
-  complex TYPE c[N];
-  complex TYPE a[N];
-  complex TYPE b[N];
+  complex TYPE op0[N];
+  complex TYPE op1[N];
+  complex TYPE op2[N];
+  complex TYPE op3[N];
   for (int i = 0; i < N; i += 1)
 @{
-  c[i] += 

Re: [PING] Fix size of static array in gcc.dg/vect/vect-simd-20.c

2021-12-20 Thread Olivier Hainque via Gcc-patches



> On 20 Dec 2021, at 16:32, Jeff Law  wrote:
> 
> 
> 
> On 12/17/2021 4:21 AM, Olivier Hainque via Gcc-patches wrote:
>> Hello,
>> 
>> Ping for https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583222.html
>> 
>> please ?
>> 
>> Thanks in advance!

> OK for the trunk.

>  Sorry it got lost.

Thanks Jeff, no pb at all. All busy :)




[PATCH] libgomp, OpenMP, nvptx: Low-latency memory allocator

2021-12-20 Thread Andrew Stubbs
This patch is submitted now for review and so I can commit a backport it 
to the OG11 branch, but isn't suitable for mainline until stage 1.


The patch implements support for omp_low_lat_mem_space and 
omp_low_lat_mem_alloc on NVPTX offload devices. The omp_pteam_mem_alloc, 
omp_cgroup_mem_alloc and omp_thread_mem_alloc allocators are also 
configured to use this space (this to match the current or intended 
behaviour in other toolchains).


The memory is drawn from the ".shared" space that is accessible only 
from within the team in which it is allocated, and which effectively 
ceases to exist when the kernel exits.  By default, 8 KiB of space is 
reserved for each team at launch time. This can be adjusted, at runtime, 
via a new environment variable "GOMP_NVPTX_LOWLAT_POOL". Reserving a 
larger amount may limit the number of teams that can be run in parallel 
(due to hardware limitations). Conversely, reducing the allocation may 
increase the number of teams that can be run in parallel. (I have not 
yet attempted to tune the default too precisely.) The actual maximum 
size will vary according to the available hardware and the number of 
variables that the compiler has placed in .shared space.


The allocator implementation is designed to add no extra space-overhead 
than omp_alloc already does (aside from rounding allocations up to a 
multiple of 8 bytes), thus the internal free and realloc must be told 
how big the original allocation was. The free algorithm maintains an 
in-order linked-list of free memory chunks. Memory is allocated on a 
first-fit basis.


If the allocation fails the NVPTX allocator returns NULL and omp_alloc 
handles the fall-back. Now that this is a thing that is likely to happen 
(low-latency memory is small) this patch also implements appropriate 
fall-back modes for the predefined allocators (fall-back for custom 
allocators already worked).


In order to support the %dynamic_smem_size PTX feature is is necessary 
to bump the minimum supported PTX version from 3.1 (~2013) to 4.1 (~2014).


OK for stage 1?

Andrewlibgomp, nvptx: low-latency memory allocator

This patch adds support for allocating low-latency ".shared" memory on
NVPTX GPU device, via the omp_low_lat_mem_space and omp_alloc.  The memory
can be allocated, reallocated, and freed using a basic but fast algorithm,
is thread safe and the size of the low-latency heap can be configured using
the GOMP_NVPTX_LOWLAT_POOL environment variable.

The use of the PTX dynamic_smem_size feature means that the minimum version
requirement is now bumped to 4.1 (still old at this point).

gcc/ChangeLog:

* config/nvptx/nvptx.c (nvptx_file_start): Bump minimum PTX version
to 4.1.

libgomp/ChangeLog:

* allocator.c (MEMSPACE_ALLOC): New macro.
(MEMSPACE_CALLOC): New macro.
(MEMSPACE_REALLOC): New macro.
(MEMSPACE_FREE): New macro.
(dynamic_smem_size): New constants.
(omp_alloc): Use MEMSPACE_ALLOC.
Implement fall-backs for predefined allocators.
(omp_free): Use MEMSPACE_FREE.
(omp_calloc): Use MEMSPACE_CALLOC.
Implement fall-backs for predefined allocators.
(omp_realloc): Use MEMSPACE_REALLOC.
Implement fall-backs for predefined allocators.
* config/nvptx/team.c (__nvptx_lowlat_heap_root): New variable.
(__nvptx_lowlat_pool): New asm varaible.
(gomp_nvptx_main): Initialize the low-latency heap.
* plugin/plugin-nvptx.c (lowlat_pool_size): New variable.
(GOMP_OFFLOAD_init_device): Read the GOMP_NVPTX_LOWLAT_POOL envvar.
(GOMP_OFFLOAD_run): Apply lowlat_pool_size.
* config/nvptx/allocator.c: New file.
* testsuite/libgomp.c/allocators-1.c: New test.
* testsuite/libgomp.c/allocators-2.c: New test.
* testsuite/libgomp.c/allocators-3.c: New test.
* testsuite/libgomp.c/allocators-4.c: New test.
* testsuite/libgomp.c/allocators-5.c: New test.
* testsuite/libgomp.c/allocators-6.c: New test.

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index ff44d9fdbef..9bc26d7de0c 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -5409,7 +5409,7 @@ nvptx_file_start (void)
   else if (TARGET_PTX_6_3)
 fputs ("\t.version\t6.3\n", asm_out_file);
   else
-fputs ("\t.version\t3.1\n", asm_out_file);
+fputs ("\t.version\t4.1\n", asm_out_file);
   if (TARGET_SM80)
 fputs ("\t.target\tsm_80\n", asm_out_file);
   else if (TARGET_SM75)
diff --git a/libgomp/allocator.c b/libgomp/allocator.c
index deebb6a79fa..b14f991c148 100644
--- a/libgomp/allocator.c
+++ b/libgomp/allocator.c
@@ -34,6 +34,38 @@
 
 #define omp_max_predefined_alloc omp_thread_mem_alloc
 
+/* These macros may be overridden in config//allocator.c.  */
+#ifndef MEMSPACE_ALLOC
+#define MEMSPACE_ALLOC(MEMSPACE, SIZE) \
+  ((void)MEMSPACE, malloc (SIZE))
+#endif
+#ifndef MEMSPACE_CALLOC
+#define MEMSPACE_CALLOC(MEMSPACE, SIZE) \
+  

Re: [PATCH, rs6000] Implement mffscrni pattern

2021-12-20 Thread David Edelsohn via Gcc-patches
On Mon, Dec 20, 2021 at 12:56 AM HAO CHEN GUI  wrote:
>
> Hi,
>   I modified the patch according to David and Segher's advice.
>
>   This patch defines a pattern for mffscrni. If the RN is a constant, it can 
> call
> gen_rs6000_mffscrni directly. The "rs6000-builtin-new.def" defines prototype 
> for builtin arguments.
> The pattern "rs6000_set_fpscr_rn" is then broken as the mode of its argument 
> is DI while its
> corresponding builtin has a const int argument. The patch also fixed it.
>
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. 
> Is this okay for trunk?
> Any recommendations? Thanks a lot.
>
> ChangeLog
> 2021-12-17 Haochen Gui 
>
> gcc/
> * config/rs6000/predicates.md (u2bit_cint_operand): Defined.
> * config/rs6000/rs6000-call.c
> (rs6000_expand_set_fpscr_rn_builtin): Not copy argument to a reg if
> it's a constant. The pattern for constant can be recognized now.
> * config/rs6000/rs6000.md (UNSPECV_MFFSCRNI): Defined.
> (rs6000_mffscrni): Defined.
> (rs6000_set_fpscr_rn): Change the type of operand[0] form DI to SI.
> Call gen_rs6000_mffscrni when operand[0] is a const int[0,3].
>
> gcc/testsuite/
> * gcc.target/powerpc/mffscrni_p9.c: New testcase for mffscrni.
> * gcc.target/powerpc/test_fpscr_rn_builtin.c: Modify the test cases to
> test mffscrn and mffscrni separately.

This revised patch is okay.

Thanks, David

>
>
> patch.diff
> diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
> index f216ffdf410..b10b4ce6065 100644
> --- a/gcc/config/rs6000/predicates.md
> +++ b/gcc/config/rs6000/predicates.md
> @@ -219,6 +219,11 @@ (define_predicate "u1bit_cint_operand"
>(and (match_code "const_int")
> (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 1")))
>
> +;; Return 1 if op is an unsigned 2-bit constant integer.
> +(define_predicate "u2bit_cint_operand"
> +  (and (match_code "const_int")
> +   (match_test "INTVAL (op) >= 0 && INTVAL (op) <= 3")))
> +
>  ;; Return 1 if op is a unsigned 3-bit constant integer.
>  (define_predicate "u3bit_cint_operand"
>(and (match_code "const_int")
> diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
> index d9736eaf21c..81261a0f24d 100644
> --- a/gcc/config/rs6000/rs6000-call.c
> +++ b/gcc/config/rs6000/rs6000-call.c
> @@ -9610,13 +9610,15 @@ rs6000_expand_set_fpscr_rn_builtin (enum insn_code 
> icode, tree exp)
>   compile time if the argument is a variable.  The least significant two
>   bits of the argument, regardless of type, are used to set the rounding
>   mode.  All other bits are ignored.  */
> -  if (CONST_INT_P (op0) && !const_0_to_3_operand(op0, VOIDmode))
> +  if (CONST_INT_P (op0))
>  {
> -  error ("Argument must be a value between 0 and 3.");
> -  return const0_rtx;
> +  if (!const_0_to_3_operand (op0, VOIDmode))
> +   {
> + error ("Argument must be a value between 0 and 3.");
> + return const0_rtx;
> +   }
>  }
> -
> -  if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
> +  else if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
>  op0 = copy_to_mode_reg (mode0, op0);
>
>pat = GEN_FCN (icode) (op0);
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index 6bec2bddbde..b18746af7ea 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -177,6 +177,7 @@ (define_c_enum "unspecv"
> UNSPECV_MFFS; Move from FPSCR
> UNSPECV_MFFSL   ; Move from FPSCR light instruction version
> UNSPECV_MFFSCRN ; Move from FPSCR float rounding mode
> +   UNSPECV_MFFSCRNI; Move from FPSCR float rounding mode with imm
> UNSPECV_MFFSCDRN; Move from FPSCR decimal float rounding mode
> UNSPECV_MTFSF   ; Move to FPSCR Fields 8 to 15
> UNSPECV_MTFSF_HI; Move to FPSCR Fields 0 to 7
> @@ -6315,6 +6316,14 @@ (define_insn "rs6000_mffscrn"
> "mffscrn %0,%1"
>[(set_attr "type" "fp")])
>
> +(define_insn "rs6000_mffscrni"
> +  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
> +   (unspec_volatile:DF [(match_operand:SI 1 "u2bit_cint_operand" "n")]
> +   UNSPECV_MFFSCRNI))]
> +   "TARGET_P9_MISC"
> +   "mffscrni %0,%1"
> +  [(set_attr "type" "fp")])
> +
>  (define_insn "rs6000_mffscdrn"
>[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
> (unspec_volatile:DF [(const_int 0)] UNSPECV_MFFSCDRN))
> @@ -6324,7 +6333,7 @@ (define_insn "rs6000_mffscdrn"
>[(set_attr "type" "fp")])
>
>  (define_expand "rs6000_set_fpscr_rn"
> - [(match_operand:DI 0 "reg_or_cint_operand")]
> + [(match_operand:SI 0 "reg_or_cint_operand")]
>"TARGET_HARD_FLOAT"
>  {
>rtx tmp_df = gen_reg_rtx (DFmode);
> @@ -6333,9 +6342,15 @@ (define_expand "rs6000_set_fpscr_rn"
>   new rounding mode bits from operands[0][62:63] 

Re: [PATCH 1/2] libphobos: fix CET for non-glibc targets

2021-12-20 Thread Alex Xu (Hello71) via Gcc-patches
Excerpts from ibuc...@gdcproject.org's message of December 20, 2021 8:56 am:
>> On 20/12/2021 01:08 Alex Xu (Hello71) via Gcc-patches 
>>  wrote:
>> 
>>  
>> On musl, linking against libphobos fails because it requires ucontext
>> but is not explicitly linked against it. This is caused by configure
>> assuming that it is implemented in assembly, but it is actually not
>> implemented. This silently works on other libcs because context API does
>> not require an external library.
> 
> Thanks.
> 
> Looks reasonable to me, also for backporting to gcc-11, which also has the 
> same CET support.
> 
> Iain.
> 

Yes, we noticed this first on gcc 11. I tested that these patches fix 
the issue on gcc 11, and since nothing seems to have changed, I think 
the same problem exists and will be fixed by these patches on master.

Cheers,
Alex.


Re: [PING] Fix size of static array in gcc.dg/vect/vect-simd-20.c

2021-12-20 Thread Jeff Law via Gcc-patches




On 12/17/2021 4:21 AM, Olivier Hainque via Gcc-patches wrote:

Hello,

Ping for https://gcc.gnu.org/pipermail/gcc-patches/2021-November/583222.html

please ?

Thanks in advance!

OK for the trunk.  Sorry it got lost.

jeff



Re: [PATCH] c-family: Have -Wformat-diag accept "decl-specifier" [PR103758]

2021-12-20 Thread Jeff Law via Gcc-patches




On 12/17/2021 3:59 PM, Marek Polacek via Gcc-patches wrote:

I'm tired of seeing

cp/parser.c:15923:55: warning: misspelled term 'decl' in format; use 
'declaration' instead [-Wformat-diag]
cp/parser.c:15925:57: warning: misspelled term 'decl' in format; use 
'declaration' instead [-Wformat-diag]

every time I compile cp/parser.c, which happens...a lot.  I'd like my
compilation to be free of warnings, otherwise I'm going to miss some
important ones.

"decl-specifiers" is a C++ grammar term; it is not actual code, so
should not be wrapped with %< %>.  I hope we can accept it as an exception
in check_tokens.

It was surrounded by %< %> in cp_parser_decl_specifier_seq, so fix that.

In passing, fix a misspelling in missspellings.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

In fact, I think I'd like to backport to 11 too, so that eventually
even my system compiler stops warning about this.

PR c++/103758

gcc/c-family/ChangeLog:

* c-format.c (check_tokens): Accept "decl-specifier*".

gcc/cp/ChangeLog:

* parser.c (cp_parser_decl_specifier_seq): Replace %
with %qD.

gcc/testsuite/ChangeLog:

* g++.dg/cpp0x/constexpr-condition.C: Adjust dg-error.
OK.  I know, cp/, but it seems trivial enough that I don't mind ACKing 
this one.



jeff



Re: [PATCH] config.gcc: Obsolete m32c-rtems target

2021-12-20 Thread Jeff Law via Gcc-patches




On 12/18/2021 11:01 AM, Joel Sherrill wrote:

On Sat, Dec 18, 2021 at 10:13 AM Joel Sherrill  wrote:



On Fri, Dec 17, 2021, 9:57 PM Jeff Law  wrote:



On 12/17/2021 9:10 AM, Joel Sherrill wrote:

---
   gcc/config.gcc | 1 +
   1 file changed, 1 insertion(+)

diff --git a/gcc/config.gcc b/gcc/config.gcc
index c8824367b13..fe93a72a16c 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -252,6 +252,7 @@ case ${target} in
| cr16-*-* \
| hppa[12]*-*-hpux10*  \
| hppa[12]*-*-hpux11*  \
+ | m32c-*-rtems* \
)
   if test "x$enable_obsolete" != xyes; then
 echo "*** Configuration ${target} is obsolete." >&2

OK.  Given that last time I tried, I couldn't even get m32c to build
newlib, I'm not terribly surprised you're deprecating it from rtems.


We removed it over a while back. The last release branch with it was using gcc 
7 or 8.

We had no indication there were any users and it led to removing some 
alternative implementations optimised for 16 bit CPUs.


I would support deprecation of m32c-*.


No complaint here.

Can I commit the gcc and wwwdocs patch for m32c-rtems and let Jeff or
someone follow up completely eliminating m32c?

Sure.
jeff



[PATCH] OpenMP front-end: allow requires dynamic_allocators

2021-12-20 Thread Andrew Stubbs

Hi all,

This patch removes the "sorry" message for the OpenMP "requires 
dynamic_allocators" feature in C, C++ and Fortran.


The clause is supposed to state that the user code will not work without 
the omp_alloc/omp_free and omp_init_allocator/omp_destroy_allocator and 
these things *are* present, so there should be no problem allowing it.


I can't see any reason for our implementation to *do* anything with this 
statement -- it's fine for the allocators to work the same with or 
without it.


I think this patch ought to be fine for GCC 12, but if not it can wait 
until stage 1 opens. I shall backport this to OG11 shortly.


OK?

AndrewOpenMP: allow requires dynamic_allocators

There's no need to reject the dynamic_allocators requires directive because
we actually do support the feature, and it doesn't have to actually "do"
anything.

gcc/c/ChangeLog:

* c-parser.c (c_parser_omp_requires): Don't "sorry" dynamic_allocators.

gcc/cp/ChangeLog:

* parser.c (cp_parser_omp_requires): Don't "sorry" dynamic_allocators.

gcc/fortran/ChangeLog:

* openmp.c (gfc_match_omp_requires): Don't "sorry" dynamic_allocators.

gcc/testsuite/ChangeLog:

* gfortran.dg/gomp/requires-8.f90: Reinstate dynamic allocators
requirement.

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index d7e5f051ac0..808a73f1038 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -22581,7 +22581,7 @@ c_parser_omp_requires (c_parser *parser)
  c_parser_skip_to_pragma_eol (parser, false);
  return;
}
- if (p)
+ if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
sorry_at (cloc, "%qs clause on % directive not "
"supported yet", p);
  if (p)
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index c2564e51e41..d55c9675c4e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -46421,7 +46421,7 @@ cp_parser_omp_requires (cp_parser *parser, cp_token 
*pragma_tok)
  cp_parser_skip_to_pragma_eol (parser, pragma_tok);
  return false;
}
- if (p)
+ if (p && this_req != OMP_REQUIRES_DYNAMIC_ALLOCATORS)
sorry_at (cloc, "%qs clause on % directive not "
"supported yet", p);
  if (p)
diff --git a/gcc/fortran/openmp.c b/gcc/fortran/openmp.c
index 2036bc1349f..f47a44f7539 100644
--- a/gcc/fortran/openmp.c
+++ b/gcc/fortran/openmp.c
@@ -5373,7 +5373,8 @@ gfc_match_omp_requires (void)
   else
goto error;
 
-  if (requires_clause & ~OMP_REQ_ATOMIC_MEM_ORDER_MASK)
+  if (requires_clause & ~(OMP_REQ_ATOMIC_MEM_ORDER_MASK
+ | OMP_REQ_DYNAMIC_ALLOCATORS))
gfc_error_now ("Sorry, %qs clause at %L on REQUIRES directive is not "
   "yet supported", clause, _loc);
   if (!gfc_omp_requires_add_clause (requires_clause, clause, _loc, 
NULL))
diff --git a/gcc/testsuite/gfortran.dg/gomp/requires-8.f90 
b/gcc/testsuite/gfortran.dg/gomp/requires-8.f90
index 3c32ae9860e..eadfcaf8609 100644
--- a/gcc/testsuite/gfortran.dg/gomp/requires-8.f90
+++ b/gcc/testsuite/gfortran.dg/gomp/requires-8.f90
@@ -4,7 +4,7 @@ contains
  subroutine foo
   interface
subroutine bar2
- !$!omp requires dynamic_allocators
+ !$omp requires dynamic_allocators
end subroutine
   end interface
   !$omp target


Re: [PATCH] ix86: Don't match the 'm' constraint on x86_64_general_operand

2021-12-20 Thread Jakub Jelinek via Gcc-patches
On Sun, Dec 19, 2021 at 12:06:30PM -0800, H.J. Lu via Gcc-patches wrote:
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1199,6 +1199,12 @@
>(and (match_operand 0 "memory_operand")
> (not (match_test "x86_extended_reg_mentioned_p (op)"
>  
> +;; Return true if OP is a memory operand representable on ix86.
> +(define_predicate "ix86_memory_operand"
> +  (and (match_operand 0 "memory_operand")
> +   (ior (match_test "mode == QImode || mode == HImode")
> + (match_operand 0 "x86_64_general_operand"

I must be missing something, but how can this work?
x86_64_general_operand is:
(define_predicate "x86_64_general_operand"
  (if_then_else (match_test "TARGET_64BIT")
(ior (match_operand 0 "nonimmediate_operand")
 (match_operand 0 "x86_64_immediate_operand"))
(match_operand 0 "general_operand")))
and so for non-immediates like MEMs it is just a normal memory_operand.

Jakub



Re: [PATCH v3 06/12] LoongArch Port: Builtin macros.

2021-12-20 Thread Xi Ruoyao via Gcc-patches
On Mon, 2021-12-20 at 15:35 +0800, Xi Ruoyao via Gcc-patches wrote:
> Hi,
> 
> I've bootstraped the patch with my GNU-stack fix
> (https://github.com/loongson/gcc/pull/62) and --enable-werror-always. 
> Bootstrap succeeded, but with some warnings:
> 
> ../../gcc/config/loongarch/loongarch.md:3205:1: warning: operand 0 missing 
> mode?
> ../../gcc/config/loongarch/loongarch.md:3282:1: warning: operand 1 missing 
> mode?
> ../../gcc/config/loongarch/loongarch.md:3329:1: warning: operand 1 missing 
> mode?
> ../../gcc/config/loongarch/loongarch.md:3392:1: warning: operand 0 missing 
> mode?
> ../../gcc/config/loongarch/loongarch.md:3470:1: warning: operand 1 missing 
> mode?
> ../../gcc/config/loongarch/loongarch.md:3520:1: warning: operand 1 missing 
> mode?
> 
> A similar issue exists for MIPS (PR101593) and I've not figured out how
> to silence these warnings.
> 
> I'm trying to run GCC regression test on LoongArch, but all my previous
> tries couldn't finish because a kernel panic was triggered.  The kernel
> was built from source pulled from
> https://github.com/loongson/linux/tree/loongarch-next.
> 
> I'm not sure if this is a bug in Huacai's patches, or a bug in mainline
> kernel, or even a hardware issue.  If you have some idea about the panic
> please inform me (this is off-topic though).

Well, I replaced the memory module and the system seems running stable...

Test summary is attached.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University
Native configuration is loongarch64-unknown-linux-gnu

=== g++ tests ===


Running target unix
FAIL: tmpdir-g++.dg-struct-layout-1/t033 cp_compat_x_tst.o-cp_compat_y_tst.o 
execute 
FAIL: c-c++-common/spec-barrier-1.c  -std=gnu++98 (test for excess errors)
FAIL: c-c++-common/spec-barrier-1.c  -std=gnu++14 (test for excess errors)
FAIL: c-c++-common/spec-barrier-1.c  -std=gnu++17 (test for excess errors)
FAIL: c-c++-common/spec-barrier-1.c  -std=gnu++2a (test for excess errors)
FAIL: g++.dg/tree-ssa/pr90883.C   scan-tree-dump dse1 "Deleted redundant store: 
.*.a = {}"
FAIL: g++.dg/warn/Waddress-5.C  -std=gnu++98 (test for excess errors)
FAIL: g++.dg/warn/Waddress-5.C  -std=gnu++14 (test for excess errors)
FAIL: g++.dg/warn/Waddress-5.C  -std=gnu++17 (test for excess errors)
FAIL: g++.dg/warn/Waddress-5.C  -std=gnu++2a (test for excess errors)
FAIL: g++.dg/lto/pr64076 cp_lto_pr64076_0.o-cp_lto_pr64076_1.o link,  -O0 -flto 
-shared -fPIC 
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++17  at line 3 (test for errors, 
line )
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++17 (test for excess errors)
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++2a  at line 3 (test for errors, 
line )
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++2a (test for excess errors)
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++2b  at line 3 (test for errors, 
line )
FAIL: g++.dg/modules/bad-mapper-3.C -std=c++2b (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++17 (internal compiler error: 
tree check: expected var_decl or function_decl or field_decl or type_decl or 
concept_decl or template_decl, have namespace_decl in get_merge_kind, at 
cp/module.cc:10074)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++17 (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_a.H module-cmi  
(gcm.cache/\$srcdir/g++.dg/modules/xtreme-header-3_a.H.gcm)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++2a (internal compiler error: 
tree check: expected var_decl or function_decl or field_decl or type_decl or 
concept_decl or template_decl, have namespace_decl in get_merge_kind, at 
cp/module.cc:10074)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++2a (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_a.H module-cmi  
(gcm.cache/\$srcdir/g++.dg/modules/xtreme-header-3_a.H.gcm)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++2b (internal compiler error: 
tree check: expected var_decl or function_decl or field_decl or type_decl or 
concept_decl or template_decl, have namespace_decl in get_merge_kind, at 
cp/module.cc:10074)
FAIL: g++.dg/modules/xtreme-header-3_a.H -std=c++2b (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_a.H module-cmi  
(gcm.cache/\$srcdir/g++.dg/modules/xtreme-header-3_a.H.gcm)
FAIL: g++.dg/modules/xtreme-header-3_b.C -std=c++17 (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_b.C -std=c++2a (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_b.C -std=c++2b (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_c.C -std=c++17 (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_c.C -std=c++2a (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-3_c.C -std=c++2b (test for excess errors)
FAIL: g++.dg/modules/xtreme-header-5_a.H -std=c++2a (internal compiler error: 
tree check: expected var_decl or function_decl or field_decl or type_decl or 
concept_decl or template_decl, have namespace_decl in get_merge_kind, at 
cp/module.cc:10074)
FAIL: 

Re: [PATCH 1/2] libphobos: fix CET for non-glibc targets

2021-12-20 Thread ibuclaw--- via Gcc-patches
> On 20/12/2021 01:08 Alex Xu (Hello71) via Gcc-patches 
>  wrote:
> 
>  
> On musl, linking against libphobos fails because it requires ucontext
> but is not explicitly linked against it. This is caused by configure
> assuming that it is implemented in assembly, but it is actually not
> implemented. This silently works on other libcs because context API does
> not require an external library.

Thanks.

Looks reasonable to me, also for backporting to gcc-11, which also has the same 
CET support.

Iain.


Re: [GCC-WWWDocs v1] htdocs/gcc-12/changes.html: Obsolete m32c-*-rtems*

2021-12-20 Thread Gerald Pfeifer
On Fri, 17 Dec 2021, Joel Sherrill wrote:
> +  
> +  
> +The m32c*-*-rtems* configuration has been obsoleted and will
> +be removed in a future release.

Aye. Thank you!

Gerald


Re: [PATCH] config.gcc: Obsolete m32c-rtems target

2021-12-20 Thread Gerald Pfeifer
On Sat, 18 Dec 2021, Joel Sherrill wrote:
> Can I commit the gcc and wwwdocs patch for m32c-rtems and let Jeff or
> someone follow up completely eliminating m32c?

Sure from my side (wwwdocs, and you probably could declare the removal
as "obvious" and/or approved by Jeff).

Gerald


Re: [PATCH] Fix alignment of stack slots for overaligned types [PR103500]

2021-12-20 Thread Richard Sandiford via Gcc-patches
Alex Coplan via Gcc-patches  writes:
> Hi,
>
> This fixes PR103500 i.e. ensuring that stack slots for
> passed-by-reference overaligned types are appropriately aligned. For the
> testcase:
>
> typedef struct __attribute__((aligned(32))) {
>   long x,y;
> } S;
> S x;
> void f(S);
> void g(void) { f(x); }
>
> on AArch64, we currently generate (at -O2):
>
> g:
> adrpx1, .LANCHOR0
> add x1, x1, :lo12:.LANCHOR0
> stp x29, x30, [sp, -48]!
> mov x29, sp
> ldp q0, q1, [x1]
> add x0, sp, 16
> stp q0, q1, [sp, 16]
> bl  f
> ldp x29, x30, [sp], 48
> ret
>
> so the stack slot for the passed-by-reference copy of the structure is
> at sp + 16, and the sp is only guaranteed to be 16-byte aligned, so the
> structure is only 16-byte aligned. The PCS requires the structure to be
> 32-byte aligned. After this patch, we generate:
>
> g:
> adrpx1, .LANCHOR0
> add x1, x1, :lo12:.LANCHOR0
> stp x29, x30, [sp, -64]!
> mov x29, sp
> add x0, sp, 47
> ldp q0, q1, [x1]
> and x0, x0, -32
> stp q0, q1, [x0]
> bl  f
> ldp x29, x30, [sp], 64
> ret
>
> i.e. we ensure 32-byte alignment for the struct.
>
> The approach taken here is similar to that in
> function.c:assign_parm_setup_block where it handles the case for
> DECL_ALIGN (parm) > MAX_SUPPORTED_STACK_ALIGNMENT. This in turn is
> similar to the approach taken in cfgexpand.c:expand_stack_vars (where
> the function calls get_dynamic_stack_size) which is the code that
> handles the alignment for overaligned structures as addressable local
> variables (see the related case discussed in the PR).

A difference with the latter is that cfgexpand (AFAICT) always
honours the DECL/TYPE_ALIGN, with LOCAL_DECL_ALIGNMENT supposedly
only increasing the alignment for efficiency reasons (rather than
decreasing it).

So…

> This patch also updates the aapcs64 test mentioned in the PR to avoid
> the frontend folding away the alignment check. I've confirmed that the
> execution test actually fails on aarch64-linux-gnu prior to the patch
> being applied and passes afterwards.
>
> Bootstrapped and regtested on aarch64-linux-gnu, x86_64-linux-gnu, and
> arm-linux-gnueabihf: no regressions.
>
> I'd appreciate any feedback. Is it OK for trunk?
>
> Thanks,
> Alex
>
> gcc/ChangeLog:
>
>   PR middle-end/103500
>   * function.c (get_stack_local_alignment): Align BLKmode overaligned
>   types to the alignment required by the type.
>   (assign_stack_temp_for_type): Handle BLKmode overaligned stack
>   slots by allocating a larger-than-necessary buffer and aligning
>   the address within appropriately.
>
> gcc/testsuite/ChangeLog:
>
>   PR middle-end/103500
>   * gcc.target/aarch64/aapcs64/rec_align-8.c (test_pass_by_ref):
>   Prevent the frontend from folding our alignment check away by
>   using snprintf to store the pointer into a string and recovering
>   it with sscanf.
>
> diff --git a/gcc/function.c b/gcc/function.c
> index 61b3bd036b8..5ed722ab959 100644
> --- a/gcc/function.c
> +++ b/gcc/function.c
> @@ -278,7 +278,9 @@ get_stack_local_alignment (tree type, machine_mode mode)
>unsigned int alignment;
>  
>if (mode == BLKmode)
> -alignment = BIGGEST_ALIGNMENT;
> +alignment = (type && TYPE_ALIGN (type) > MAX_SUPPORTED_STACK_ALIGNMENT)
> +  ? TYPE_ALIGN (type)
> +  : BIGGEST_ALIGNMENT;

…I'm not sure about this calculation.  Why do we only honour TYPE_ALIGN
if it's greater than MAX_SUPPORTED_STACK_ALIGNMENT, and fall all the
way back to BIGGEST_ALIGNMENT otherwise?  It looks like on nvptx
this would have the effect of honouring (say) 2048-byte alignment,
but not 32-byte alignment (which falls between BIGGEST_ALIGNMENT
and MAX_SUPPORTED_STACK_ALIGNMENT).

Also, what about the non-BLKmode case?  A type's mode cannot be more
aligned than the type, but a type is allowed to be more aligned than
its mode.  E.g.:

typedef unsigned int V __attribute__((vector_size(32)));
typedef struct __attribute__((aligned(32))) { V x; } S;
S x;
void f(S);
void g(void) { f(x); }

doesn't seem to be handled by the patch.  It's possible to create
variations on this on aarch64 up to 2048 bits using SVE and
-msve-vector-bits=N.

Perhaps we should treat the old alignment calculation as a minimum
and take the max of that and TYPE_ALIGN, where TYPE_ALIGN is given.

>else
>  alignment = GET_MODE_ALIGNMENT (mode);
>  
> @@ -872,21 +874,35 @@ assign_stack_temp_for_type (machine_mode mode, 
> poly_int64 size, tree type)
>  
>p = ggc_alloc ();
>  
> -  /* We are passing an explicit alignment request to assign_stack_local.
> -  One side effect of that is assign_stack_local will not round SIZE
> -  to ensure the frame offset remains suitably aligned.
> -
> -  So for requests which depended on the rounding 

Re: [PATCH take #2] x86_64: Improve code expanded for highpart multiplications.

2021-12-20 Thread Uros Bizjak via Gcc-patches
On Mon, Dec 20, 2021 at 12:26 PM Roger Sayle  wrote:
>
>
> Hi Uros,
> Many thanks for the review.  Here's a revised patch incorporating your
> suggestion to use a single define_insn with a mode iterator instead of two
> new near identical define_insns for SImode and DImode.  I initially tried
> SWI48, but a testsuite failure of pr82418.c revealed that it's more
> efficient on TARGET_64BIT to use a full width multiplication than a
> SImode highpart multiplication, i.e. we only want ?mulsi3_highpart on
> !TARGET_64BIT, so this revised patch uses a DWIH mode iterator.
>
> This patch has been tested on x86_64-pc-linux-gnu by make bootstrap and
> make -k check (with and without RUNTESTFLAGS="--target_board='unix{-m32}'")
> with no new failures.  Ok for mainline?
>
>
> 2021-12-20  Roger Sayle  
> Uroš Bizjak  
>
> gcc/ChangeLog
> * config/i386/i386.md (any_mul_highpart): New code iterator.
> (sgnprefix, s): Add attribute support for [su]mul_highpart.
> (mul3_highpart): Delete expander.
> (mul3_highpart, mulsi32_highpart_zext):
> New define_insn patterns.
> (define_peephole2): Tweak the register allocation for the above
> instructions after reload.
>
> gcc/testsuite/ChangeLog
> * gcc.target/i386/smuldi3_highpart.c: New test case.

OK.

Thanks,
Uros.

>
> Thanks again,
> Roger
> --
>
> > -Original Message-
> > From: Uros Bizjak 
> > Sent: 13 December 2021 11:53
> > To: Roger Sayle 
> > Cc: GCC Patches 
> > Subject: Re: [PATCH] x86_64: Improve code expanded for highpart
> > multiplications.
> >
> > On Fri, Dec 10, 2021 at 12:58 PM Roger Sayle 
> > wrote:
> > >
> > >
> > > While working on a middle-end patch to more aggressively use highpart
> > > multiplications on targets that support them, I noticed that the RTL
> > > expanded by the x86 backend interacts poorly with register allocation
> > > leading to suboptimal code.
> > >
> > > For the testcase,
> > > typedef int __attribute ((mode(TI))) ti_t; long foo(long x) {
> > >   return ((ti_t)x * 19065) >> 64;
> > > }
> > >
> > > we'd like to avoid:
> > > foo:movq%rdi, %rax
> > > movl$19065, %edx
> > > imulq   %rdx
> > > movq%rdx, %rax
> > > ret
> > >
> > > and would prefer:
> > > foo:movl$19065, %eax
> > > imulq   %rdi
> > > movq%rdx, %rax
> > > ret
> > >
> > > This patch provides a pair of peephole2 transformations to tweak the
> > > spills generated by reload, and at the same time replaces the current
> > > define_expand with define_insn patterns using the new [su]mul_highpart
> > > RTX codes.  I've left the old-style patterns in the machine
> > > description for the time being, but plan to remove these once my
> > > planned middle-end improvements make them obsolete.
> > >
> > > This patch has been tested on x86_64-pc-linux-gnu (both with and
> > > without the middle-end changes) by make bootstrap and make -k check
> > > with no new failures.  The new test case, which currently passes,
> > > ensures that the code we generate isn't adversely affected by changes 
> > > outside
> > the backend.
> > > Ok for mainline?
> > >
> > >
> > > 2021-12-10  Roger Sayle  
> > >
> > > gcc/ChangeLog
> > > * config/i386/i386.md (any_mul_highpart): New code iterator.
> > > (sgnprefix, s): Add attribute support for [su]mul_highpart.
> > > (mul3_highpart): Delete expander.
> > > (muldi3_highpart, mulsi3_highpart, 
> > > mulsi32_highpart_zext):
> > > New define_insn patterns.
> > > (define_peephole2): Tweak the register allocation for the above
> > > instructions after reload.
> > >
> > > gcc/testsuite/ChangeLog
> > > * gcc.target/i386/smuldi3_highpart.c: New test case.
> >
> > Can you please merge muldi3_highpart and *mulsi3_highpart using
> > SWI48 mode iterator?
> >
> > Uros.
> >
> > >
> > >
> > > Thanks in advance,
> > > Roger
> > > --
> > >


Re: [PATCH v3 03/12] LoongArch Port: Machine Decsription files.

2021-12-20 Thread Xi Ruoyao via Gcc-patches
On Fri, 2021-12-10 at 15:43 +0800, Chenghua Xu wrote:
> +
> +INT_MODE (OI, 32);
> +
> +/* Keep the OI modes from confusing the compiler into thinking
> +   that these modes could actually be used for computation.  They are
> +   only holders for vectors during data movement.  */
> +#define MAX_BITSIZE_MODE_ANY_INT (128)

This can't prevent the compiler from using OImode for calculation.  And
to make things even worse, in wide-int.h we have:

> #define WIDE_INT_MAX_ELTS \
>   ((MAX_BITSIZE_MODE_ANY_INT + HOST_BITS_PER_WIDE_INT) / 
> HOST_BITS_PER_WIDE_INT)

/* ... */

> class GTY(()) wide_int_storage
> {
> private:
>   HOST_WIDE_INT val[WIDE_INT_MAX_ELTS];

When the compiler uses OImode, this array will eventually be accessed
out-of-bound, causing ICE on fp-uint64-convert-double-{1,2}.c.

All other ports don't define it (and let genmode to determine the value
automatically).  I drafted https://github.com/loongson/gcc/pull/64 to
remove the MAX_BITSIZE_MODE_ANY_INT definition.  Not sure if it's the
"proper" fix though.
-- 
Xi Ruoyao 
School of Aerospace Science and Technology, Xidian University


Re: [patch, Fortran] Make REAL(KIND=16) detection more robust

2021-12-20 Thread Thomas Koenig via Gcc-patches



Hi FX,


I’m not opposed, but I’d really like to get this into the current branch. It is 
a much less invasive change than the power-ieee128 work. The only case I 
changed is the case where there is a kind 16, and there is a long double, but 
the long double is neither kind 10 neither kind 16. I don’t think there is such 
a platform currently (otherwise it wouldn’t have worked).


I did a bootstrap and test on POWER, and it did not
cause any regressions.

So, OK for trunk.  Thanks for the patch!

Regards

Thomas


Re: [PATCH] libiberty rust-demangle, ignore .suffix

2021-12-20 Thread Eduard-Mihai Burtescu
Apologies for the delay, the email fell through the cracks somehow.

The updated patch looks like it would work alright, only needs a couple tests, 
e.g.:
https://github.com/rust-lang/rustc-demangle/blob/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96/src/lib.rs#L413-L422
https://github.com/rust-lang/rustc-demangle/blob/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96/src/v0.rs#L1442-L1444

Thanks,
- Eddy B.

On Tue, Dec 7, 2021, at 21:16, Mark Wielaard wrote:
> Hi Eddy,
>
> On Fri, 2021-12-03 at 01:14 +0200, Eduard-Mihai Burtescu wrote:
>> On Fri, Dec 3, 2021, at 00:07, Mark Wielaard wrote:
>> > On Thu, Dec 02, 2021 at 07:35:17PM +0200, Eduard-Mihai Burtescu
>> > wrote:
>> > > That also means that for consistency, suffixes like these should
>> > > be
>> > > handled uniformly for both v0 and legacy (as rustc-demangle
>> > > does),
>> > > since LLVM doesn't distinguish.
>> > 
>> > The problem with the legacy mangling is that dot '.' is a valid
>> > character. That is why the patch only handles the v0 mangling case
>> > (where dot '.' isn't valid).
>> 
>> Thought so, that's an annoying complication - however, see later down
>> why that's still not a blocker to the way rustc-demangle handles it.
>> 
>> > > You may even be able to get Clang to generate C++ mangled symbols
>> > > with ".llvm." suffixes, with enough application of LTO.  This is
>> > > not
>> > > unlike GCC ".clone" suffixes, AFAIK.  Sadly I don't think there's
>> > > a
>> > > way to handle both as "outside the symbol", without hardcoding
>> > > ".llvm." in the implementation.
>> > 
>> > We could use the scheme used by c++ where the .suffix is added as "
>> > [clone .suffix]", it even handles multiple dots, where something
>> > like
>> > _Z3fooi.part.9.165493.constprop.775.31805
>> > demangles to
>> > foo(int) [clone .part.9.165493] [clone .constprop.775.31805]
>> > 
>> > I just don't think that is very useful and a little confusing.
>> 
>> Calling it "clone" is a bit weird, but I just checked what rustc-
>> demangle
>> does for printing suffixes back out and it's not great either:
>> - ".llvm." (and everything after it) is completely removed
>> - any left-over suffixes (after demangling), if they start with ".",
>> are
>>   not considered errors, but printed out verbatim after the
>> demangling
>> 
>> > > I don't recall the libiberty demangling API having any provisions
>> > > for the demangler deciding that a mangled symbol "stops early",
>> > > which would maybe allow for a more general solution.
>> > 
>> > No, there indeed is no interface. We might introduce a new option
>> > flag
>> > for treating '.' as end of symbol. But do we really need that
>> > flexibility?
>> 
>> That's not what I meant - a v0 or legacy symbol is self-terminating
>> in
>> its parsing (or at the very least there are not dots allowed outside
>> of a length-prefixed identifier), so that when you see the start of
>> a valid mangled symbol, you can always find its end in the string,
>> even when that end is half-way through (and is followed by suffixes
>> or any other unrelated noise).
>> 
>> What I was imagining is a way to return to the caller the number of
>> chars from the start of the original string, that were demangled,
>> letting the caller do something else with the rest of that string.
>> (see below for how rustc-demangle already does something similar)
>>
>> > > Despite all that, if it helps in practice, I would still not mind
>> > > this patch landing in its current form, I just wanted to share my
>> > > perspective on the larger issue.
>> > 
>> > Thanks for that. Do you happen to know what other rust demanglers
>> > do?
>> 
>> rustc-demangle's internal API returns a pair of the demangler and the
>> "leftover" parts of the original string, after the end of the symbol.
>> You can see here how that suffix is further checked, and kept:
>> https://github.com/rust-lang/rustc-demangle/blob/2811a1ad6f7c8bead2ef3671e4fdc10de1553e96/src/lib.rs#L108-L138
>
> Yes, returning a struct that returns the style detected, the demangled
> string and the left over chars makes sense. But we don't have an
> interface like that at the moment, and I am not sure we (currently)
> have users who want this.
>
>> As mentioned above, ".llvm." is handled differently, just above the snippet
>> linked - perhaps it was deemed too common to let it pollute the output.
>
> But that also makes it a slightly odd interface. I would imagine that
> people would be interested in the .llvm. part. Now that just gets
> dropped.
>
> Since we don't have an interface to return the suffix (and I find the
> choice of dropping .llvm. but not other suffixes odd), I think we
> should just simply always drop the .suffix. I understand now how to do
> that for legacy symbols too, thanks for the hints.
>
> See the attached update to the patch. What do you think?
>
> Thanks,
>
> Mark
>
> Attachments:
> * 0001-libiberty-rust-demangle-ignore-.suffix.patch


Re: Patch ping related to OpenMP

2021-12-20 Thread Tobias Burnus

Thanks for a DWARF-related patch review (+ fix by yourself). Otherwise,
still pending are the following OpenMP patches.

The first one is a revised patch following the review comment and
affects Fortran only. The second one is also a rather small &
post-review revised patch.

On 06.12.21 15:56, Tobias Burnus wrote:

First, thanks for the four reviews. Secondly, I missed one patch –
hence, reposted with all three pending patches:

* Re: [PATCH] [gfortran] Add support for allocate clause (OpenMP 5.0).
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/584894.html

and:

On 01.12.21 17:34, Tobias Burnus wrote:

* [PATCH] C, C++, Fortran, OpenMP: Add 'has_device_addr' clause to
'target' construct
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585361.html

* [PATCH 00/16] OpenMP: lvalues in "map" clauses and struct handling
rework
https://gcc.gnu.org/pipermail/gcc-patches/2021-November/585439.html

Tobias
-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 
München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas 
Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht 
München, HRB 106955


Re: [PATCH] Leverage sysroot for VxWorks

2021-12-20 Thread Rasmus Villemoes via Gcc-patches
On 10/12/2021 19.24, Olivier Hainque wrote:

> For the toolchains we build, this is achieved with a few
> configure options like:
> 
>   --with-sysroot
>   --with-build-sysroot=${WIND_BASE}/target

So forward-porting our private patches up until

7bf710b5116 - libstdc++: Add support for '?' in linker script globs

(i.e. the commit before this one) went without problems, with no changes
required in our build scripts. Then when rebasing to this one, now known as

f3f923e5139 - Leverage sysroot for VxWorks

the build broke as expected because I'd been using somewhat different
values of --with(-build)-sysroot. However, changing those configure
options as indicated above again produced a working toolchain, so this
is all good.

>   --with-specs=%{!sysroot=*:--sysroot=%:getenv(WIND_BASE /target)}

However, I'm a little confused about the purpose of this one. First,
shouldn't this be '%{!-sysroot=*}', i.e. with a leading dash, since
the option is --sysroot ? But whether I use one or the other, it seems
that the resulting compiler ignores a --sysroot argument; if I
explicitly unexport WIND_BASE and manually add a --sysroot argument that
should have the same effect as above, gcc fails with WIND_BASE not defined:

$ echo $WIND_BASE
/usr/powerpc-wrs-vxworks/wind_base
$ export -n WIND_BASE
$ powerpc-wrs-vxworks-gcc --sysroot=${WIND_BASE}/target -v -E -  < /dev/null
Using built-in specs.
powerpc-wrs-vxworks-gcc: fatal error: environment variable 'WIND_BASE'
not defined
compilation terminated.

The specs syntax doesn't explicitly list the negative form of %{S*:X},
and I can't find any in-tree examples (though it's rather hard to grep
for), so I don't even know if this is supposed to work.

It's not a big deal, we can just continue to define and export
WIND_BASE, but it's probably best for long-term maintenance if our build
scripts and configure options are aligned with what you do on your end,
so I would just like to understand this fully.

Rasmus


[PATCH][pushed] jit: Fix -Wodr warning

2021-12-20 Thread Martin Liška

Pushed as obvious.

Martin

gcc/jit/libgccjit.c:3957:8: warning: type 'struct version_info' violates the 
C++ One Definition Rule [-Wodr]

../../gcc/jit/libgccjit.c:3957:8: warning: type 'struct version_info' violates 
the C++ One Definition Rule [-Wodr]
 3957 | struct version_info

../../gcc/tree-ssa-loop-ivopts.c:181: note: a different type is defined in 
another translation unit
  181 | struct version_info

gcc/jit/ChangeLog:

* libgccjit.c (struct version_info): Rename to jit_version_info.
(struct jit_version_info): Likewise.
(gcc_jit_version_major): Likewise.
(gcc_jit_version_minor): Likewise.
(gcc_jit_version_patchlevel): Likewise.
---
 gcc/jit/libgccjit.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/jit/libgccjit.c b/gcc/jit/libgccjit.c
index 5cb27a20d41..3d2d838dc4d 100644
--- a/gcc/jit/libgccjit.c
+++ b/gcc/jit/libgccjit.c
@@ -3954,11 +3954,11 @@ gcc_jit_context_new_rvalue_from_vector (gcc_jit_context 
*ctxt,
 
 static pthread_mutex_t version_mutex = PTHREAD_MUTEX_INITIALIZER;
 
-struct version_info

+struct jit_version_info
 {
   /* Default constructor.  Populate via parse_basever,
  guarded by version_mutex.  */
-  version_info ()
+  jit_version_info ()
   {
 pthread_mutex_lock (_mutex);
 parse_basever (, , );
@@ -3974,21 +3974,21 @@ struct version_info
 extern int
 gcc_jit_version_major (void)
 {
-  version_info vi;
+  jit_version_info vi;
   return vi.major;
 }
 
 extern int

 gcc_jit_version_minor (void)
 {
-  version_info vi;
+  jit_version_info vi;
   return vi.minor;
 }
 
 extern int

 gcc_jit_version_patchlevel (void)
 {
-  version_info vi;
+  jit_version_info vi;
   return vi.patchlevel;
 }
 
--

2.34.1



[PATCH][pushed] opts: Support -Oz in -Ox option hints.

2021-12-20 Thread Martin Liška

Pushed as obvious after testing.

Cheers,
Martin

gcc/ChangeLog:

* opts.c (default_options_optimization): Support -Oz in -Ox option 
hints.
---
 gcc/opts.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/opts.c b/gcc/opts.c
index f45ecc56726..cdd6463e49b 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -723,7 +723,7 @@ default_options_optimization (struct gcc_options *opts,
  const int optimize_val = integral_argument (opt->arg);
  if (optimize_val == -1)
error_at (loc, "argument to %<-O%> should be a non-negative "
-  "integer, %, % or %");
+  "integer, %, %, % or %");
  else
{
  opts->x_optimize = optimize_val;
--
2.34.1



[PATCH take #2] x86_64: Improve code expanded for highpart multiplications.

2021-12-20 Thread Roger Sayle

Hi Uros,
Many thanks for the review.  Here's a revised patch incorporating your
suggestion to use a single define_insn with a mode iterator instead of two
new near identical define_insns for SImode and DImode.  I initially tried
SWI48, but a testsuite failure of pr82418.c revealed that it's more
efficient on TARGET_64BIT to use a full width multiplication than a
SImode highpart multiplication, i.e. we only want ?mulsi3_highpart on
!TARGET_64BIT, so this revised patch uses a DWIH mode iterator.

This patch has been tested on x86_64-pc-linux-gnu by make bootstrap and
make -k check (with and without RUNTESTFLAGS="--target_board='unix{-m32}'")
with no new failures.  Ok for mainline?


2021-12-20  Roger Sayle  
Uroš Bizjak  

gcc/ChangeLog
* config/i386/i386.md (any_mul_highpart): New code iterator.
(sgnprefix, s): Add attribute support for [su]mul_highpart.
(mul3_highpart): Delete expander.
(mul3_highpart, mulsi32_highpart_zext):
New define_insn patterns.
(define_peephole2): Tweak the register allocation for the above
instructions after reload.

gcc/testsuite/ChangeLog
* gcc.target/i386/smuldi3_highpart.c: New test case.

Thanks again,
Roger
--

> -Original Message-
> From: Uros Bizjak 
> Sent: 13 December 2021 11:53
> To: Roger Sayle 
> Cc: GCC Patches 
> Subject: Re: [PATCH] x86_64: Improve code expanded for highpart
> multiplications.
> 
> On Fri, Dec 10, 2021 at 12:58 PM Roger Sayle 
> wrote:
> >
> >
> > While working on a middle-end patch to more aggressively use highpart
> > multiplications on targets that support them, I noticed that the RTL
> > expanded by the x86 backend interacts poorly with register allocation
> > leading to suboptimal code.
> >
> > For the testcase,
> > typedef int __attribute ((mode(TI))) ti_t; long foo(long x) {
> >   return ((ti_t)x * 19065) >> 64;
> > }
> >
> > we'd like to avoid:
> > foo:movq%rdi, %rax
> > movl$19065, %edx
> > imulq   %rdx
> > movq%rdx, %rax
> > ret
> >
> > and would prefer:
> > foo:movl$19065, %eax
> > imulq   %rdi
> > movq%rdx, %rax
> > ret
> >
> > This patch provides a pair of peephole2 transformations to tweak the
> > spills generated by reload, and at the same time replaces the current
> > define_expand with define_insn patterns using the new [su]mul_highpart
> > RTX codes.  I've left the old-style patterns in the machine
> > description for the time being, but plan to remove these once my
> > planned middle-end improvements make them obsolete.
> >
> > This patch has been tested on x86_64-pc-linux-gnu (both with and
> > without the middle-end changes) by make bootstrap and make -k check
> > with no new failures.  The new test case, which currently passes,
> > ensures that the code we generate isn't adversely affected by changes 
> > outside
> the backend.
> > Ok for mainline?
> >
> >
> > 2021-12-10  Roger Sayle  
> >
> > gcc/ChangeLog
> > * config/i386/i386.md (any_mul_highpart): New code iterator.
> > (sgnprefix, s): Add attribute support for [su]mul_highpart.
> > (mul3_highpart): Delete expander.
> > (muldi3_highpart, mulsi3_highpart, mulsi32_highpart_zext):
> > New define_insn patterns.
> > (define_peephole2): Tweak the register allocation for the above
> > instructions after reload.
> >
> > gcc/testsuite/ChangeLog
> > * gcc.target/i386/smuldi3_highpart.c: New test case.
> 
> Can you please merge muldi3_highpart and *mulsi3_highpart using
> SWI48 mode iterator?
> 
> Uros.
> 
> >
> >
> > Thanks in advance,
> > Roger
> > --
> >
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 4e9fae8..00361f4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -992,11 +992,16 @@
 ;; Mapping of extend operators
 (define_code_iterator any_extend [sign_extend zero_extend])
 
+;; Mapping of highpart multiply operators
+(define_code_iterator any_mul_highpart [smul_highpart umul_highpart])
+
 ;; Prefix for insn menmonic.
 (define_code_attr sgnprefix [(sign_extend "i") (zero_extend "")
+(smul_highpart "i") (umul_highpart "")
 (div "i") (udiv "")])
 ;; Prefix for define_insn
-(define_code_attr s [(sign_extend "s") (zero_extend "u")])
+(define_code_attr s [(sign_extend "s") (zero_extend "u")
+(smul_highpart "s") (umul_highpart "u")])
 (define_code_attr u [(sign_extend "") (zero_extend "u")
 (div "") (udiv "u")])
 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")
@@ -8426,20 +8431,45 @@
(set_attr "bdver1_decode" "direct")
(set_attr "mode" "QI")])
 
-(define_expand "mul3_highpart"
-  [(parallel [(set (match_operand:DWIH 0 "register_operand")
-  (truncate:DWIH
-(lshiftrt:
-  (mult:
-(any_extend:
- 

[PING][PATCH, v2, 1/1, AARCH64][PR102768] aarch64: Add compiler support for Shadow Call Stack

2021-12-20 Thread Dan Li via Gcc-patches

Gentile ping for this :), thanks.
Link: https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586204.html


Shadow Call Stack can be used to protect the return address of a
function at runtime, and clang already supports this feature[1].

To enable SCS in user mode, in addition to compiler, other support
is also required (as discussed in [2]). This patch only adds basic
support for SCS from the compiler side, and provides convenience
for users to enable SCS.

For linux kernel, only the support of the compiler is required.

[1] https://clang.llvm.org/docs/ShadowCallStack.html
[2] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102768


Signed-off-by: Dan Li 

gcc/c-family/ChangeLog:

* c-attribs.c (handle_no_sanitize_shadow_call_stack_attribute):
New.

gcc/ChangeLog:

* config/aarch64/aarch64-protos.h (aarch64_shadow_call_stack_enabled):
New decl.
* config/aarch64/aarch64.c (aarch64_shadow_call_stack_enabled):
New.
(aarch64_expand_prologue):  Push x30 onto SCS before it's
pushed onto stack.
(aarch64_expand_epilogue):  Pop x30 frome SCS.
* config/aarch64/aarch64.h (TARGET_SUPPORT_SHADOW_CALL_STACK):
New macro.
(TARGET_CHECK_SCS_RESERVED_REGISTER):   Likewise.
* config/aarch64/aarch64.md (scs_push): New template.
(scs_pop):  Likewise.
* defaults.h (TARGET_SUPPORT_SHADOW_CALL_STACK):New macro.
* doc/extend.texi:  Document -fsanitize=shadow-call-stack.
* doc/invoke.texi:  Document attribute.
* flag-types.h (enum sanitize_code):Add
SANITIZE_SHADOW_CALL_STACK.
* opts-global.c (handle_common_deferred_options):   Add SCS
compile option check.
* opts.c (finish_options):  Likewise.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/shadow_call_stack_1.c: New test.
* gcc.target/aarch64/shadow_call_stack_2.c: New test.
* gcc.target/aarch64/shadow_call_stack_3.c: New test.
* gcc.target/aarch64/shadow_call_stack_4.c: New test.
---
 gcc/c-family/c-attribs.c  | 21 +
 gcc/config/aarch64/aarch64-protos.h   |  1 +
 gcc/config/aarch64/aarch64.c  | 27 +++
 gcc/config/aarch64/aarch64.h  | 11 +
 gcc/config/aarch64/aarch64.md | 18 
 gcc/defaults.h|  4 ++
 gcc/doc/extend.texi   |  7 +++
 gcc/doc/invoke.texi   | 29 
 gcc/flag-types.h  |  2 +
 gcc/opts-global.c |  6 +++
 gcc/opts.c| 12 +
 .../gcc.target/aarch64/shadow_call_stack_1.c  |  6 +++
 .../gcc.target/aarch64/shadow_call_stack_2.c  |  6 +++
 .../gcc.target/aarch64/shadow_call_stack_3.c  | 45 +++
 .../gcc.target/aarch64/shadow_call_stack_4.c  | 18 
 15 files changed, 213 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/shadow_call_stack_1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/shadow_call_stack_2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/shadow_call_stack_3.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/shadow_call_stack_4.c

diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 007b928c54b..9b3a35c06bf 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -56,6 +56,8 @@ static tree handle_cold_attribute (tree *, tree, tree, int, 
bool *);
 static tree handle_no_sanitize_attribute (tree *, tree, tree, int, bool *);
 static tree handle_no_sanitize_address_attribute (tree *, tree, tree,
  int, bool *);
+static tree handle_no_sanitize_shadow_call_stack_attribute (tree *, tree,
+ tree, int, bool *);
 static tree handle_no_sanitize_thread_attribute (tree *, tree, tree,
 int, bool *);
 static tree handle_no_address_safety_analysis_attribute (tree *, tree, tree,
@@ -454,6 +456,10 @@ const struct attribute_spec c_common_attribute_table[] =
  handle_no_sanitize_attribute, NULL },
   { "no_sanitize_address",0, 0, true, false, false, false,
  handle_no_sanitize_address_attribute, NULL },
+  { "no_sanitize_shadow_call_stack",
+ 0, 0, true, false, false, false,
+ handle_no_sanitize_shadow_call_stack_attribute,
+ NULL },
   { "no_sanitize_thread", 0, 0, true, false, false, false,
  handle_no_sanitize_thread_attribute, NULL },
   { "no_sanitize_undefined",  0, 0, true, false, false, false,
@@ -1175,6 +1181,21 @@ handle_no_sanitize_address_attribute (tree *node, tree 
name, tree, int,
   return NULL_TREE;
 }
 
+/* Handle a 

[PATCH] rs6000: Replace UNSPECS with ss_plus/us_plus and ss_minus/us_minus

2021-12-20 Thread Xionghu Luo via Gcc-patches
These four UNSPECS seems could be replaced with native RTL, and why
"(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))"
in the RTL pattern, per ISA of VSCR bit 127(VECTOR Saturation, SAT):

  This bit is sticky; that is, once set to 1 it
  remains set to 1 until it is set to 0 by an
  mtvscr instruction.

The RTL pattern set it to 0 but final ASM doesn't present it?  And why
not use Clobber VSCR_REGNO instead?  Tested pass on P10, OK for master?
Thanks.

gcc/ChangeLog:

* config/rs6000/altivec.md (altivec_vaddus): Replace
UNSPEC_VADDU with us_plus.
(altivec_vaddss): Replace UNSPEC_VADDS with ss_plus.
(altivec_vsubus): Replace UNSPEC_VSUBU with us_minus.
(altivec_vsubss): Replace UNSPEC_VSUBS with ss_minus.
(altivec_abss_): Likewise.
---
 gcc/config/rs6000/altivec.md | 29 ++---
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index a057218aa28..b2909857c34 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -29,8 +29,6 @@ (define_c_enum "unspec"
UNSPEC_VMHADDSHS
UNSPEC_VMHRADDSHS
UNSPEC_VADDCUW
-   UNSPEC_VADDU
-   UNSPEC_VADDS
UNSPEC_VAVGU
UNSPEC_VAVGS
UNSPEC_VMULEUB
@@ -61,8 +59,6 @@ (define_c_enum "unspec"
UNSPEC_VSR
UNSPEC_VSRO
UNSPEC_VSUBCUW
-   UNSPEC_VSUBU
-   UNSPEC_VSUBS
UNSPEC_VSUM4UBS
UNSPEC_VSUM4S
UNSPEC_VSUM2SWS
@@ -517,9 +513,8 @@ (define_insn "altivec_vaddcuw"
 
 (define_insn "altivec_vaddus"
   [(set (match_operand:VI 0 "register_operand" "=v")
-(unspec:VI [(match_operand:VI 1 "register_operand" "v")
-   (match_operand:VI 2 "register_operand" "v")]
-  UNSPEC_VADDU))
+(us_plus:VI (match_operand:VI 1 "register_operand" "v")
+   (match_operand:VI 2 "register_operand" "v")))
(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   ""
   "vaddus %0,%1,%2"
@@ -527,9 +522,8 @@ (define_insn "altivec_vaddus"
 
 (define_insn "altivec_vaddss"
   [(set (match_operand:VI 0 "register_operand" "=v")
-(unspec:VI [(match_operand:VI 1 "register_operand" "v")
-(match_operand:VI 2 "register_operand" "v")]
-  UNSPEC_VADDS))
+(ss_plus:VI (match_operand:VI 1 "register_operand" "v")
+   (match_operand:VI 2 "register_operand" "v")))
(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   "VECTOR_UNIT_ALTIVEC_P (mode)"
   "vaddss %0,%1,%2"
@@ -563,9 +557,8 @@ (define_insn "altivec_vsubcuw"
 
 (define_insn "altivec_vsubus"
   [(set (match_operand:VI 0 "register_operand" "=v")
-(unspec:VI [(match_operand:VI 1 "register_operand" "v")
-(match_operand:VI 2 "register_operand" "v")]
-  UNSPEC_VSUBU))
+(us_minus:VI (match_operand:VI 1 "register_operand" "v")
+(match_operand:VI 2 "register_operand" "v")))
(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   "VECTOR_UNIT_ALTIVEC_P (mode)"
   "vsubus %0,%1,%2"
@@ -573,9 +566,8 @@ (define_insn "altivec_vsubus"
 
 (define_insn "altivec_vsubss"
   [(set (match_operand:VI 0 "register_operand" "=v")
-(unspec:VI [(match_operand:VI 1 "register_operand" "v")
-(match_operand:VI 2 "register_operand" "v")]
-  UNSPEC_VSUBS))
+(ss_minus:VI (match_operand:VI 1 "register_operand" "v")
+(match_operand:VI 2 "register_operand" "v")))
(set (reg:SI VSCR_REGNO) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
   "VECTOR_UNIT_ALTIVEC_P (mode)"
   "vsubss %0,%1,%2"
@@ -3480,9 +3472,8 @@ (define_expand "altivec_absv4sf2"
 (define_expand "altivec_abss_"
   [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
(parallel [(set (match_dup 3)
-  (unspec:VI [(match_dup 2)
-  (match_operand:VI 1 "register_operand" "v")]
- UNSPEC_VSUBS))
+  (ss_minus:VI (match_dup 2)
+   (match_operand:VI 1 "register_operand" "v")))
  (set (reg:SI VSCR_REGNO)
   (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))])
(set (match_operand:VI 0 "register_operand" "=v")
-- 
2.27.0



Re: [PATCH] x86_64: Ignore zero width bitfields in ABI and issue -Wpsabi warning about C zero width bitfield ABI changes [PR102024]

2021-12-20 Thread Uros Bizjak via Gcc-patches
On Wed, Dec 15, 2021 at 3:50 PM Jakub Jelinek  wrote:
>
> On Mon, Nov 29, 2021 at 05:25:30AM -0700, H.J. Lu wrote:
> > > I'd like to ping this patch, but perhaps first it would be nice to discuss
> > > it in the x86-64 psABI group.
> > > The current psABI doesn't seem to mention zero sized bitfields at all
> > > explicitly, so perhaps theoretically they should be treated as INTEGER 
> > > class,
> > > but if they are at positions multiple of 64 bits, then it is unclear into
> > > which eightbyte they should be considered, whether the previous one if any
> > > or the next one if any.  I guess similar problem is for zero sized
> > > structures, but those should according to algorithm have NO_CLASS and so 
> > > it
> > > doesn't really make a difference.  And, no compiler I'm aware of treats
> > > the zero sized bitfields at 64 bit boundaries as INTEGER class, LLVM/ICC 
> > > are
> > > ignoring such bitfields everywhere, GCC ignores them at those boundaries
> > > (and used to ignore them in C++ everywhere).  I guess my preferred 
> > > solution
> > > would be to say explicitly that zero sized bitfields are NO_CLASS.
> > > I'm not a member of the google x86-64 psABI group, can somebody please 
> > > raise
> > > it there?
> >
> > https://groups.google.com/g/x86-64-abi/c/OYeWs14WHQ4
>
> Thanks.
> I see nobody commented on Micha's post there.
>
> Here is a patch that implements it in GCC, i.e. C++ doesn't change ABI (at 
> least
> not from the past few releases) and C does for GCC:
>
> 2021-12-15  Jakub Jelinek  
>
> PR target/102024
> * config/i386/i386.c (classify_argument): Add zero_width_bitfields
> argument, when seeing DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD bitfields,
> always ignore them, when seeing other zero sized bitfields, either
> set zero_width_bitfields to 1 and ignore it or if equal to 2 process
> it.  Pass it to recursive calls.  Add wrapper
> with old arguments and diagnose ABI differences for C structures
> with zero width bitfields.  Formatting fixes.
>
> * gcc.target/i386/pr102024.c: New test.
> * g++.target/i386/pr102024.C: New test.

Please get a signoff on the ABI change (perhaps HJ can help here),
I'll approve the implementation after that.

Uros.

>
> --- gcc/config/i386/i386.c.jj   2021-12-10 17:00:06.024369219 +0100
> +++ gcc/config/i386/i386.c  2021-12-15 15:04:49.245148023 +0100
> @@ -2065,7 +2065,8 @@ merge_classes (enum x86_64_reg_class cla
>
>  static int
>  classify_argument (machine_mode mode, const_tree type,
> -  enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
> +  enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset,
> +  int _width_bitfields)
>  {
>HOST_WIDE_INT bytes
>  = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE 
> (mode);
> @@ -2123,6 +2124,16 @@ classify_argument (machine_mode mode, co
>  misaligned integers.  */
>   if (DECL_BIT_FIELD (field))
> {
> + if (integer_zerop (DECL_SIZE (field)))
> +   {
> + if (DECL_FIELD_CXX_ZERO_WIDTH_BIT_FIELD (field))
> +   continue;
> + if (zero_width_bitfields != 2)
> +   {
> + zero_width_bitfields = 1;
> + continue;
> +   }
> +   }
>   for (i = (int_bit_position (field)
> + (bit_offset % 64)) / 8 / 8;
>i < ((int_bit_position (field) + (bit_offset % 64))
> @@ -2160,7 +2171,8 @@ classify_argument (machine_mode mode, co
>   num = classify_argument (TYPE_MODE (type), type,
>subclasses,
>(int_bit_position (field)
> -   + bit_offset) % 512);
> +   + bit_offset) % 512,
> +  zero_width_bitfields);
>   if (!num)
> return 0;
>   pos = (int_bit_position (field)
> @@ -2178,7 +2190,8 @@ classify_argument (machine_mode mode, co
>   {
> int num;
> num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
> -TREE_TYPE (type), subclasses, 
> bit_offset);
> +TREE_TYPE (type), subclasses, bit_offset,
> +zero_width_bitfields);
> if (!num)
>   return 0;
>
> @@ -2211,7 +2224,7 @@ classify_argument (machine_mode mode, co
>
>   num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
>TREE_TYPE 

Re: [PATCH] ix86: Don't match the 'm' constraint on x86_64_general_operand

2021-12-20 Thread Uros Bizjak via Gcc-patches
On Sun, Dec 19, 2021 at 9:06 PM H.J. Lu  wrote:
>
> x86_64_general_operand is different from general_operand for 64-bit
> target.  To avoid LRA selecting a memory operand which doesn't satisfy
> x86_64_general_operand for 64-bit target:
>
> 1. Add a 'BM' constraint which is similar to the 'm' constraint, but
> checks x86_64_general_operand for integers > 16 bits.
> 2. Replace the 'm' constraint on x86_64_general_operand with the 'BM'
> constraint.
>
> gcc/
>
> PR target/103762
> * config/i386/constraints.md (BM): New constraint.
> * config/i386/i386.md: Replace the 'm' constraint on
> x86_64_general_operand with the 'BM' constraint.
> * config/i386/predicates.md (ix86_memory_operand): New predicate.
>
> gcc/testsuite/
>
> * gcc.target/i386/pr103762-1a.c: New test.
> * gcc.target/i386/pr103762-1b.c: Likewise.
> * gcc.target/i386/pr103762-1c.c: Likewise.
> ---
>  gcc/config/i386/constraints.md  |   5 +
>  gcc/config/i386/i386.md |  64 +-
>  gcc/config/i386/predicates.md   |   6 +
>  gcc/testsuite/gcc.target/i386/pr103762-1a.c | 647 
>  gcc/testsuite/gcc.target/i386/pr103762-1b.c |   7 +
>  gcc/testsuite/gcc.target/i386/pr103762-1c.c |   7 +
>  6 files changed, 704 insertions(+), 32 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103762-1a.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103762-1b.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr103762-1c.c
>
> diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
> index 6b291a02d88..6677285e518 100644
> --- a/gcc/config/i386/constraints.md
> +++ b/gcc/config/i386/constraints.md
> @@ -168,6 +168,7 @@
>  ;;  z  Constant call address operand.
>  ;;  C  Integer SSE constant with all bits set operand.
>  ;;  F  Floating-point SSE constant with all bits set operand.
> +;;  M  ix86 memory operand.
>
>  (define_constraint "Bf"
>"@internal Flags register operand."
> @@ -232,6 +233,10 @@
>(and (match_test "TARGET_SSE")
> (match_operand 0 "float_vector_all_ones_operand")))
>
> +(define_constraint "BM"
> +  "@internal ix86 memory operand."
> +  (match_operand 0 "ix86_memory_operand"))
> +
>  ;; Integer constant constraints.
>  (define_constraint "Wb"
>"Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index d25453fe574..4f30819a1d9 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1385,7 +1385,7 @@
>  (define_insn "*cmp_1"
>[(set (reg FLAGS_REG)
> (compare (match_operand:SWI 0 "nonimmediate_operand" "m,")
> -(match_operand:SWI 1 "" ",m")))]
> +(match_operand:SWI 1 "" ",BM")))]
>"ix86_match_ccmode (insn, CCmode)"
>"cmp{}\t{%1, %0|%0, %1}"
>[(set_attr "type" "icmp")
> @@ -1395,7 +1395,7 @@
>[(set (reg FLAGS_REG)
> (compare
>   (minus:SWI (match_operand:SWI 0 "nonimmediate_operand" "m,")
> -(match_operand:SWI 1 "" ",m"))
> +(match_operand:SWI 1 "" ",BM"))
>   (const_int 0)))]
>"ix86_match_ccmode (insn, CCGOCmode)"
>"cmp{}\t{%1, %0|%0, %1}"
> @@ -5653,7 +5653,7 @@
>[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
> (plus:SWI48
>   (match_operand:SWI48 1 "nonimmediate_operand" "%0,0,r,r")
> - (match_operand:SWI48 2 "x86_64_general_operand" "re,m,0,le")))
> + (match_operand:SWI48 2 "x86_64_general_operand" "re,BM,0,le")))
> (clobber (reg:CC FLAGS_REG))]
>"ix86_binary_operator_ok (PLUS, mode, operands)"
>  {
> @@ -5709,7 +5709,7 @@
>[(set (match_operand:DI 0 "register_operand" "=r,r,r")
> (zero_extend:DI
>   (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r,r")
> -  (match_operand:SI 2 "x86_64_general_operand" "rme,0,le"
> +  (match_operand:SI 2 "x86_64_general_operand" 
> "rBMe,0,le"
> (clobber (reg:CC FLAGS_REG))]
>"TARGET_64BIT && ix86_binary_operator_ok (PLUS, SImode, operands)"
>  {
> @@ -5967,7 +5967,7 @@
> (compare
>   (plus:SWI
> (match_operand:SWI 1 "nonimmediate_operand" "%0,0,")
> -   (match_operand:SWI 2 "" ",m,0"))
> +   (match_operand:SWI 2 "" ",BM,0"))
>   (const_int 0)))
> (set (match_operand:SWI 0 "nonimmediate_operand" "=m,,")
> (plus:SWI (match_dup 1) (match_dup 2)))]
> @@ -6012,7 +6012,7 @@
>[(set (reg FLAGS_REG)
> (compare
>   (plus:SI (match_operand:SI 1 "nonimmediate_operand" "%0,r")
> -  (match_operand:SI 2 "x86_64_general_operand" "rme,0"))
> +  (match_operand:SI 2 "x86_64_general_operand" "rBMe,0"))
>   (const_int 0)))
> (set (match_operand:DI 0 "register_operand" "=r,r")
> (zero_extend:DI (plus:SI (match_dup 1) (match_dup 2]
> @@ -6097,7