Ping^1 [PATCH, rs6000] punish reload of lfiwzx when loading an int variable [PR102169, PR102146]

2021-10-10 Thread HAO CHEN GUI via Gcc-patches

Hi,

    Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/580479.html

Thanks


On 29/9/2021 下午 4:32, HAO CHEN GUI wrote:

Hi,

  The patch punishes reload of alternative pair of "d, Z" for movsi_internal1. 
The reload occurs if 'Z' doesn't match and generates an additional insn. So the memory 
reload should be punished.

  Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.


ChangeLog

2021-09-29 Haochen Gui 

gcc/
    * gcc/config/rs6000/rs6000.md (movsi_internal1): disparages
    slightly the alternative 'Z' of "lfiwzx" when reload is needed.

patch.diff

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 6bec2bddbde..c961f2df4a7 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7341,7 +7341,7 @@ (define_insn "*movsi_internal1"
   r,  *h, *h")
    (match_operand:SI 1 "input_operand"
  "r,  U,
-  m,  Z,  Z,
+  m,  ^Z, Z,
   r,  d,  v,
   I,  L,  eI, n,
   wa, O,  wM, wB,



Ping^1 [PATCH, rs6000] optimization for vec_reve builtin [PR100868]

2021-10-10 Thread HAO CHEN GUI via Gcc-patches

Hi,

 Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579038.html

Thanks

On 8/9/2021 下午 2:42, HAO CHEN GUI wrote:

Hi,

  The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is 
implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is 
implemented by quadword byte reverse plus halfword/word byte reverse when 
p9_vector is defined.

  Bootstrapped and tested on powerpc64le-linux with no regressions. Is this 
okay for trunk? Any recommendations? Thanks a lot.


ChangeLog

2021-09-08 Haochen Gui 

gcc/
    * config/rs6000/altivec.md (altivec_vreve2 for VEC_K):
    Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
    for v4si or v4sf when p9_vector is defined.
    (altivec_vreve2 for VEC_64): Defined. Implemented by
    xxswapd.

gcc/testsuite/
    * gcc.target/powerpc/vec_reve_1.c: New test.
    * gcc.target/powerpc/vec_reve_2.c: Likewise.


patch.diff

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1351dafbc41..a1698ce85c0 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
   DONE;
 })

-;; Vector reverse elements
+;; Vector reverse elements for V16QI V8HI V4SI V4SF
 (define_expand "altivec_vreve2"
-  [(set (match_operand:VEC_A 0 "register_operand" "=v")
-   (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
+  [(set (match_operand:VEC_K 0 "register_operand" "=v")
+   (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
  UNSPEC_VREVEV))]
   "TARGET_ALTIVEC"
 {
+  if (TARGET_P9_VECTOR)
+    {
+  if (mode == V16QImode)
+   emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
+  else if (mode == V8HImode)
+   {
+ rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+    mode, 0);
+ rtx temp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+ rtx subreg2 = simplify_gen_subreg (mode, temp,
+    V1TImode, 0);
+ emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
+   }
+  else /* V4SI and V4SF.  */
+   {
+ rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+    mode, 0);
+ rtx temp = gen_reg_rtx (V1TImode);
+ emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+ rtx subreg2 = simplify_gen_subreg (mode, temp,
+    V1TImode, 0);
+ if (mode == V4SImode)
+   emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
+ else
+   emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
+   }
+  DONE;
+    }
+
   int i, j, size, num_elements;
   rtvec v = rtvec_alloc (16);
   rtx mask = gen_reg_rtx (V16QImode);
@@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve2"
   DONE;
 })

+;; Vector reverse elements for V2DI V2DF
+(define_expand "altivec_vreve2"
+  [(set (match_operand:VEC_64 0 "register_operand" "=v")
+   (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
+ UNSPEC_VREVEV))]
+  "TARGET_ALTIVEC"
+{
+  emit_insn (gen_xxswapd_ (operands[0], operands[1]));
+  DONE;
+})
+
 ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
 ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
 (define_insn "altivec_lvlx"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c 
b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
new file mode 100644
index 000..83a9206758b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -maltivec" } */
+
+#include 
+
+vector double foo1 (vector double a)
+{
+   return vec_reve (a);
+}
+
+vector long long foo2 (vector long long a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c 
b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
new file mode 100644
index 000..b6dd33d6d79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
@@ -0,0 +1,28 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
+
+#include 
+
+vector int foo1 (vector int a)
+{
+   return vec_reve (a);
+}
+
+vector float foo2 (vector float a)
+{
+   return vec_reve (a);
+}
+
+vector short foo3 (vector short a)
+{
+   return vec_reve (a);
+}
+
+vector char foo4 (vector char a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */



PING^3 [PATCH, rs6000] Disable gimple fold for float or double vec_minmax when fast-math is not set

2021-10-10 Thread HAO CHEN GUI via Gcc-patches

Hi,

     Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-August/578162.html

Thanks


On 22/9/2021 下午 2:52, HAO CHEN GUI wrote:

Hi,

 Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-August/578162.html

Thanks


On 6/9/2021 下午 2:01, HAO CHEN GUI wrote:

Hi,

 Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-August/578162.html

Thanks

On 26/8/2021 上午 9:19, HAO CHEN GUI wrote:

Hi Bill,

   Thanks for your comments.

Hi Segher,

   Here is the ChangeLog and patch diff. Thanks.

2021-08-25 Haochen Gui 

gcc/
    * config/rs6000/rs6000-call.c (rs6000_gimple_fold_builtin):
    Modify the VSX_BUILTIN_XVMINDP, ALTIVEC_BUILTIN_VMINFP,
    VSX_BUILTIN_XVMAXDP, ALTIVEC_BUILTIN_VMAXFP expansions.

gcc/testsuite/
    * gcc.target/powerpc/vec-minmax-1.c: New test.
    * gcc.target/powerpc/vec-minmax-2.c: Likewise.

diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index b4e13af4dc6..90527734ceb 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12159,6 +12159,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return true;
 /* flavors of vec_min.  */
 case VSX_BUILTIN_XVMINDP:
+    case ALTIVEC_BUILTIN_VMINFP:
+  if (!flag_finite_math_only || flag_signed_zeros)
+    return false;
+  /* Fall through to MIN_EXPR.  */
+  gcc_fallthrough ();
 case P8V_BUILTIN_VMINSD:
 case P8V_BUILTIN_VMINUD:
 case ALTIVEC_BUILTIN_VMINSB:
@@ -12167,7 +12172,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case ALTIVEC_BUILTIN_VMINUB:
 case ALTIVEC_BUILTIN_VMINUH:
 case ALTIVEC_BUILTIN_VMINUW:
-    case ALTIVEC_BUILTIN_VMINFP:
   arg0 = gimple_call_arg (stmt, 0);
   arg1 = gimple_call_arg (stmt, 1);
   lhs = gimple_call_lhs (stmt);
@@ -12177,6 +12181,11 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   return true;
 /* flavors of vec_max.  */
 case VSX_BUILTIN_XVMAXDP:
+    case ALTIVEC_BUILTIN_VMAXFP:
+  if (!flag_finite_math_only || flag_signed_zeros)
+    return false;
+  /* Fall through to MAX_EXPR.  */
+  gcc_fallthrough ();
 case P8V_BUILTIN_VMAXSD:
 case P8V_BUILTIN_VMAXUD:
 case ALTIVEC_BUILTIN_VMAXSB:
@@ -12185,7 +12194,6 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 case ALTIVEC_BUILTIN_VMAXUB:
 case ALTIVEC_BUILTIN_VMAXUH:
 case ALTIVEC_BUILTIN_VMAXUW:
-    case ALTIVEC_BUILTIN_VMAXFP:
   arg0 = gimple_call_arg (stmt, 0);
   arg1 = gimple_call_arg (stmt, 1);
   lhs = gimple_call_lhs (stmt);
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c 
b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
new file mode 100644
index 000..547798fd65c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-1.c
@@ -0,0 +1,53 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9" } */
+/* { dg-final { scan-assembler-times {\mxvmaxdp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmaxsp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvmindp\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mxvminsp\M} 1 } } */
+
+/* This test verifies that float or double vec_min/max are bound to
+   xv[min|max][d|s]p instructions when fast-math is not set. */
+
+
+#include 
+
+#ifdef _BIG_ENDIAN
+   const int PREF_D = 0;
+#else
+   const int PREF_D = 1;
+#endif
+
+double vmaxd (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_max (va, vb), PREF_D);
+}
+
+double vmind (double a, double b)
+{
+  vector double va = vec_promote (a, PREF_D);
+  vector double vb = vec_promote (b, PREF_D);
+  return vec_extract (vec_min (va, vb), PREF_D);
+}
+
+#ifdef _BIG_ENDIAN
+   const int PREF_F = 0;
+#else
+   const int PREF_F = 3;
+#endif
+
+float vmaxf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_max (va, vb), PREF_F);
+}
+
+float vminf (float a, float b)
+{
+  vector float va = vec_promote (a, PREF_F);
+  vector float vb = vec_promote (b, PREF_F);
+  return vec_extract (vec_min (va, vb), PREF_F);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c 
b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
new file mode 100644
index 000..4c6f4365830
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec-minmax-2.c
@@ -0,0 +1,51 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -ffast-math" } */
+/* { dg-final { scan-assembler-times {\mxsmaxcdp\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxsmincdp\M} 2 } } */
+
+/* This test verifies that float or double vec_min/max can be converted
+   to scalar comparison when fast-math is set. 

[PATCH] Adjust testcase for O2 vectorization enabling.

2021-10-10 Thread liuhongt via Gcc-patches
gcc/testsuite/ChangeLog:

PR middle-end/102669
* gnat.dg/unroll1.adb: Add -fno-tree-vectorize.
---
 gcc/testsuite/gnat.dg/unroll1.adb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gnat.dg/unroll1.adb 
b/gcc/testsuite/gnat.dg/unroll1.adb
index 34d8a8f3f38..8b732dd8f44 100644
--- a/gcc/testsuite/gnat.dg/unroll1.adb
+++ b/gcc/testsuite/gnat.dg/unroll1.adb
@@ -1,5 +1,5 @@
 -- { dg-do compile }
--- { dg-options "-O2 -funroll-all-loops -fdump-rtl-loop2_unroll-details 
-fdump-tree-cunrolli-details" }
+-- { dg-options "-O2 -funroll-all-loops -fno-tree-vectorize 
-fdump-rtl-loop2_unroll-details -fdump-tree-cunrolli-details" }
 
 package body Unroll1 is
 
-- 
2.18.1



Re: [PATCH] include/longlong.h: Remove incorrect lvalue to rvalue conversion from asm output constraints

2021-10-10 Thread Florian Weimer
* Fangrui Song:

> An output constraint takes a lvalue. While GCC happily strips the
> incorrect lvalue to rvalue conversion, Clang rejects the code by default:
>
> error: invalid use of a cast in a inline asm context requiring an lvalue: 
> remove the cast or build with -fheinous-gnu-extensions
>
> The file appears to share the same origin with gmplib longlong.h but
> they differ much now (gmplib version is much longer).
>
> I don't have write access to the git repo.
> ---
>  include/longlong.h | 186 ++---
>  1 file changed, 93 insertions(+), 93 deletions(-)
>
> diff --git a/include/longlong.h b/include/longlong.h
> index c3e92e54ecc..0a21a441d2d 100644
> --- a/include/longlong.h
> +++ b/include/longlong.h
> @@ -194,8 +194,8 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, 
> UDItype);
>  #if defined (__arc__) && W_TYPE_SIZE == 32
>  #define add_ss(sh, sl, ah, al, bh, bl) \
>__asm__ ("add.f%1, %4, %5\n\tadc   %0, %2, %3" \
> -: "=r" ((USItype) (sh)), \
> -  "=" ((USItype) (sl)) \
> +: "=r" (sh), \
> +  "=" (sl) \
>  : "%r" ((USItype) (ah)), \
>"rICal" ((USItype) (bh)),  \
>"%r" ((USItype) (al)), \

This seems to alter the meanining of existing programs if sh and sl do
not have the expected type.

I think you need to add a compound expression and temporaries of type
USItype if you want to avoid the cast.


Re: [PATCH v3] [PR/target 100316] Allow constant address for __builtin___clear_cache.

2021-10-10 Thread Kito Cheng
Hi Richard:

Test with x86 and rl78, both are rejected by the front-end if modes
are different from Pmode/ptr_mode,
so I'm gonna commit this change :)

Testcase for x86 / x86_64:
```
void test(void)
{
 int __seg_fs *f = (int __seg_fs *)16;
 int __seg_fs *g = (int __seg_fs *)32;
 __builtin___clear_cache (f, g); // error: passing argument 1 of
‘__builtin___clear_cache’ from pointer to non-enclosed address space
}
```

Testcase for rl78 (Pmode == HImode):
```
void test(void)
{
 int __near *f = (int __near *)16;  // mode == HImode, same as Pmode
 int __near *g = (int __near *)32;
 __builtin___clear_cache (f, g);  // OK to compile
}

void test2(void)
{
 int __far *f = (int __far *)16; // mode == SImode
 int __far *g = (int __far *)32;
 __builtin___clear_cache (f, g);  // error: passing argument 1 of
‘__builtin___clear_cache’ from pointer to non-enclosed address space
}
```

On Fri, Oct 8, 2021 at 2:47 PM Richard Biener
 wrote:
>
> On Fri, Oct 8, 2021 at 4:40 AM Kito Cheng  wrote:
> >
> > __builtin___clear_cache was able to accept constant address for the
> > argument, but it seems no longer accept recently, and it even not
> > accept constant address which is hold in variable when optimization is
> > enable:
> >
> > ```
> > void foo3(){
> >   void *yy = (void*)0x1000;
> >   __builtin___clear_cache(yy, yy);
> > }
> > ```
> >
> > So this patch make BEGIN and END accept VOIDmode, like cselib_lookup_mem 
> > did per
> > Jim Wilson's suggestion.
> >
> > ```
> > static cselib_val *
> > cselib_lookup_mem (rtx x, int create)
> > {
> >   ...
> >   addr_mode = GET_MODE (XEXP (x, 0));
> >   if (addr_mode == VOIDmode)
> > addr_mode = Pmode;
> > ```
> >
> > Changes v2 -> v3:
> > - Use gcc_assert rather than error, maybe_emit_call_builtin___clear_cache is
> > internal use only, and we already checked the type in other place.
> >
> > Changes v1 -> v2:
> > - Check is CONST_INT intead of cehck mode, no new testcase, since
> >   constant value with other type like CONST_DOUBLE will catched by
> >   front-end.
> > e.g.
> > Code:
> > ```c
> > void foo(){
> >   __builtin___clear_cache(1.11, 0);
> > }
> > ```
> > Error message:
> > ```
> > clearcache-double.c: In function 'foo':
> > clearcache-double.c:2:27: error: incompatible type for argument 1 of 
> > '__builtin___clear_cache'
> > 2 |   __builtin___clear_cache(1.11, 0);
> >   |   ^~~~
> >   |   |
> >   |   double
> > clearcache-double.c:2:27: note: expected 'void *' but argument is of type 
> > 'double'
> > ```
> >
> > gcc/ChangeLog:
> >
> > PR target/100316
> > * builtins.c (maybe_emit_call_builtin___clear_cache): Allow
> > CONST_INT for BEGIN and END, and use gcc_assert rather than
> > error.
> >
> > gcc/testsuite/ChangeLog:
> >
> > PR target/100316
> > * gcc.c-torture/compile/pr100316.c: New.
> > ---
> >  gcc/builtins.c | 10 --
> >  gcc/testsuite/gcc.c-torture/compile/pr100316.c | 18 ++
> >  2 files changed, 22 insertions(+), 6 deletions(-)
> >  create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr100316.c
> >
> > diff --git a/gcc/builtins.c b/gcc/builtins.c
> > index 3e57eb03af0..80a1bb191c6 100644
> > --- a/gcc/builtins.c
> > +++ b/gcc/builtins.c
> > @@ -5163,12 +5163,10 @@ default_emit_call_builtin___clear_cache (rtx begin, 
> > rtx end)
> >  void
> >  maybe_emit_call_builtin___clear_cache (rtx begin, rtx end)
> >  {
> > -  if ((GET_MODE (begin) != ptr_mode && GET_MODE (begin) != Pmode)
> > -  || (GET_MODE (end) != ptr_mode && GET_MODE (end) != Pmode))
> > -{
> > -  error ("both arguments to %<__builtin___clear_cache%> must be 
> > pointers");
> > -  return;
> > -}
> > +  gcc_assert ((GET_MODE (begin) == ptr_mode || GET_MODE (begin) == Pmode
> > +  || CONST_INT_P (begin))
> > + && (GET_MODE (end) == ptr_mode || GET_MODE (end) == Pmode
> > + || CONST_INT_P (end)));
>
> OK I guess.
>
> I'm not 100% sure we might not ICE here when using
> __builtin_clear_cache on a pointer
> with some other than the default address-space which might have a mode
> that's not
> ptr_mode or Pmode?
>
> Thanks,
> Richard.
>
> >if (targetm.have_clear_cache ())
> >  {
> > diff --git a/gcc/testsuite/gcc.c-torture/compile/pr100316.c 
> > b/gcc/testsuite/gcc.c-torture/compile/pr100316.c
> > new file mode 100644
> > index 000..38eca86f49f
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.c-torture/compile/pr100316.c
> > @@ -0,0 +1,18 @@
> > +void foo(){
> > +  __builtin___clear_cache(0, 0);
> > +}
> > +
> > +void foo1(){
> > +  __builtin___clear_cache((void*)0, (void*)0);
> > +}
> > +
> > +void foo2(){
> > +  void *yy = 0;
> > +  __builtin___clear_cache(yy, yy);
> > +}
> > +
> > +void foo3(){
> > +  void *yy = (void*)0x1000;
> > +  __builtin___clear_cache(yy, yy);
> > +}
> > +
> > --
> > 2.33.0
> >


[PATCH] rs6000/test: Adjust some cases due to O2 vect [PR102658]

2021-10-10 Thread Kewen.Lin via Gcc-patches
Hi,

As PR102658 shows, commit r12-4240 enables vectorization at O2,
some cases need to be adjusted accordingly for rs6000 port.

- For target specific test cases, this adds -fno-tree-vectorize
to retain original test points, otherwise vectorization can
make some expected scalar instructions gone or generate some
unexpected instructions for vector construction.

- For generic test cases, it follows the existing suggested
practice with necessary target/xfail selector.

Tested with expected results on powerpc64le-linux-gnu and
powerpc64-linux-gnu.

Is it ok for trunk?

BR,
Kewen
-
gcc/testsuite/ChangeLog:

PR testsuite/102658
* c-c++-common/Wstringop-overflow-2.c: Adjust for rs6000 port.
* g++.dg/warn/Wuninitialized-13.C: Likewise.
* gcc.dg/Warray-parameter-3.c: Likewise.
* gcc.dg/Wstringop-overflow-21.c: Likewise.
* gcc.dg/Wstringop-overflow-68.c: Likewise.
* gcc.dg/Wstringop-overflow-76.c: Likewise.
* gcc.target/powerpc/dform-1.c: Adjust as vectorization enabled at O2.
* gcc.target/powerpc/dform-2.c: Likewise.
* gcc.target/powerpc/pr80510-2.c: Likewise.

---

diff --git a/gcc/testsuite/c-c++-common/Wstringop-overflow-2.c 
b/gcc/testsuite/c-c++-common/Wstringop-overflow-2.c
index 7d29b5f48c7..5d83caddc4e 100644
--- a/gcc/testsuite/c-c++-common/Wstringop-overflow-2.c
+++ b/gcc/testsuite/c-c++-common/Wstringop-overflow-2.c
@@ -221,10 +221,10 @@ void ga1_1 (void)
   a1_1.a[1] = 1;// { dg-warning "\\\[-Wstringop-overflow" }
   a1_1.a[2] = 2;// { dg-warning "\\\[-Wstringop-overflow" }

-  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* } } }
+  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* powerpc*-*-* } } }
   a.a[0] = 0;
-  a.a[1] = 1;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* } } }
-  a.a[2] = 2;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* } } }
+  a.a[1] = 1;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* powerpc*-*-* } } }
+  a.a[2] = 2;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* powerpc*-*-* } } }
   sink ();
 }

@@ -320,10 +320,10 @@ void ga1i_1 (void)
   a1i_1.a[1] = 1;   // { dg-warning "\\\[-Wstringop-overflow" }
   a1i_1.a[2] = 2;   // { dg-warning "\\\[-Wstringop-overflow" }

-  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* } } }
+  struct A1 a = { 0, { 1 } };   // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* powerpc*-*-* } } }
   a.a[0] = 1;
-  a.a[1] = 2;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* } } }
-  a.a[2] = 3;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* } } }
+  a.a[1] = 2;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* powerpc*-*-* } } }
+  a.a[2] = 3;   // { dg-warning "\\\[-Wstringop-overflow" "" { 
xfail { i?86-*-* x86_64-*-* powerpc*-*-* } } }
   sink ();
 }

diff --git a/gcc/testsuite/g++.dg/warn/Wuninitialized-13.C 
b/gcc/testsuite/g++.dg/warn/Wuninitialized-13.C
index 210e74c3c3b..4ad897a6486 100644
--- a/gcc/testsuite/g++.dg/warn/Wuninitialized-13.C
+++ b/gcc/testsuite/g++.dg/warn/Wuninitialized-13.C
@@ -5,7 +5,7 @@
 struct shared_count {
   shared_count () { }
   shared_count (shared_count )
-: pi (r.pi) { } // { dg-warning "\\\[-Wuninitialized" "" { xfail { 
i?86-*-* x86_64-*-* } } }
+: pi (r.pi) { } // { dg-warning "\\\[-Wuninitialized" "" { xfail { 
i?86-*-* x86_64-*-* powerpc*-*-* } } }
   int pi;
 };

diff --git a/gcc/testsuite/gcc.dg/Warray-parameter-3.c 
b/gcc/testsuite/gcc.dg/Warray-parameter-3.c
index e8a269c85c6..f7404be8742 100644
--- a/gcc/testsuite/gcc.dg/Warray-parameter-3.c
+++ b/gcc/testsuite/gcc.dg/Warray-parameter-3.c
@@ -77,7 +77,7 @@ gia3 (int a[3])
 __attribute__ ((noipa)) void
 gcas3 (char a[static 3])
 {
-  a[0] = 0; a[1] = 1; a[2] = 2; // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* } } }
+  a[0] = 0; a[1] = 1; a[2] = 2; // { dg-warning "\\\[-Wstringop-overflow" "" { 
target { i?86-*-* x86_64-*-* powerpc*-*-* } } }
   a[3] = 3;   // { dg-warning "\\\[-Warray-bounds" }
 }

diff --git a/gcc/testsuite/gcc.dg/Wstringop-overflow-21.c 
b/gcc/testsuite/gcc.dg/Wstringop-overflow-21.c
index d88bde9c740..2db6a52b22b 100644
--- a/gcc/testsuite/gcc.dg/Wstringop-overflow-21.c
+++ b/gcc/testsuite/gcc.dg/Wstringop-overflow-21.c
@@ -23,10 +23,10 @@ void test_store_zero_length (int i)
 {
   char a[3];
   struct S0 *p = (struct S0*)a;
-  p->a = 0; // { dg-warning "\\\[-Wstringop-overflow" 

Re: [PATCH] Adjust testcase for O2 vectorization enabling

2021-10-10 Thread Hongtao Liu via Gcc-patches
On Mon, Oct 11, 2021 at 10:23 AM Kewen.Lin via Gcc-patches
 wrote:
>
> Hi Hongtao,
>
> on 2021/10/11 上午10:10, liuhongt via Gcc-patches wrote:
> > libgomp/ChangeLog:
> >
> >   * testsuite/libgomp.graphite/force-parallel-8.c: Add 
> > -fno-tree-vectorize.
> > ---
> >  libgomp/testsuite/libgomp.graphite/force-parallel-8.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c 
> > b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> > index f9e07039172..0f3d138cebd 100644
> > --- a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> > +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> > @@ -1,4 +1,4 @@
> > -/* { dg-additional-options "-fdisable-tree-thread1 
> > -fdisable-tree-vrp-thread1" } */
> > +/* { dg-additional-options "-fdisable-tree-thread1 
> > -fdisable-tree-vrp-thread1 -fno-tree-vectorize" } */
> >
> >  #define N 1500
> >
> >
>
> Thanks for doing this!
>
> This issue was observed in rs6000 specific PR102658 as well.
>
> I've looked into it a bit, it's caused by the "conditional store replacement" 
> which
> is originally disabled without vectorization as below code.
>
>   /* If either vectorization or if-conversion is disabled then do
>  not sink any stores.  */
>   if (param_max_stores_to_sink == 0
>   || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize)
>   || !flag_tree_loop_if_convert)
> return false;
>
> The new change makes the innermost loop look like
>
> for (int c1 = 0; c1 <= 1499; c1 += 1) {
>   if (c1 <= 500) {
>  S_10(c0, c1);
>   } else {
>   S_9(c0, c1);
>   }
>   S_11(c0, c1);
> }
>
> and can not be splitted as:
>
> for (int c1 = 0; c1 <= 500; c1 += 1)
>   S_10(c0, c1);
>
> for (int c1 = 501; c1 <= 1499; c1 += 1)
>   S_9(c0, c1);
>
> So instead of disabling vectorization, could we just disable this cs 
> replacement
> with parameter "--param max-stores-to-sink=0"?
>
> I tested this proposal on ppc64le, it should work as well.
>
> What do you think of it?
I can confirm it also works for x86, I'll adjust the patch.
Thank you for the analysis.
>
> BR,
> Kewen



-- 
BR,
Hongtao


Re: [PATCH] Adjust testcase for O2 vectorization enabling

2021-10-10 Thread Kewen.Lin via Gcc-patches
Hi Hongtao,

on 2021/10/11 上午10:10, liuhongt via Gcc-patches wrote:
> libgomp/ChangeLog:
> 
>   * testsuite/libgomp.graphite/force-parallel-8.c: Add 
> -fno-tree-vectorize.
> ---
>  libgomp/testsuite/libgomp.graphite/force-parallel-8.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c 
> b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> index f9e07039172..0f3d138cebd 100644
> --- a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> +++ b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
> @@ -1,4 +1,4 @@
> -/* { dg-additional-options "-fdisable-tree-thread1 
> -fdisable-tree-vrp-thread1" } */
> +/* { dg-additional-options "-fdisable-tree-thread1 
> -fdisable-tree-vrp-thread1 -fno-tree-vectorize" } */
>  
>  #define N 1500
>  
> 

Thanks for doing this!

This issue was observed in rs6000 specific PR102658 as well.

I've looked into it a bit, it's caused by the "conditional store replacement" 
which
is originally disabled without vectorization as below code.

  /* If either vectorization or if-conversion is disabled then do
 not sink any stores.  */
  if (param_max_stores_to_sink == 0
  || (!flag_tree_loop_vectorize && !flag_tree_slp_vectorize)
  || !flag_tree_loop_if_convert)
return false;

The new change makes the innermost loop look like

for (int c1 = 0; c1 <= 1499; c1 += 1) {
  if (c1 <= 500) {
 S_10(c0, c1);
  } else {
  S_9(c0, c1);
  }
  S_11(c0, c1);
} 

and can not be splitted as:

for (int c1 = 0; c1 <= 500; c1 += 1)
  S_10(c0, c1);

for (int c1 = 501; c1 <= 1499; c1 += 1)
  S_9(c0, c1);

So instead of disabling vectorization, could we just disable this cs replacement
with parameter "--param max-stores-to-sink=0"?

I tested this proposal on ppc64le, it should work as well.

What do you think of it?

BR,
Kewen


[PATCH] Adjust testcase for O2 vectorization enabling

2021-10-10 Thread liuhongt via Gcc-patches
libgomp/ChangeLog:

* testsuite/libgomp.graphite/force-parallel-8.c: Add 
-fno-tree-vectorize.
---
 libgomp/testsuite/libgomp.graphite/force-parallel-8.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c 
b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
index f9e07039172..0f3d138cebd 100644
--- a/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
+++ b/libgomp/testsuite/libgomp.graphite/force-parallel-8.c
@@ -1,4 +1,4 @@
-/* { dg-additional-options "-fdisable-tree-thread1 -fdisable-tree-vrp-thread1" 
} */
+/* { dg-additional-options "-fdisable-tree-thread1 -fdisable-tree-vrp-thread1 
-fno-tree-vectorize" } */
 
 #define N 1500
 
-- 
2.18.1



[PATCH] [GCC 10 branch] tree-optimization: [PR102622]: wrong code due to signed one bit integer and "a?-1:0"

2021-10-10 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So here is the GCC 10 branch version which fixes the wrong code.
The problem is we create a negation of an one bit signed integer type
which is undefined if the value was -1.
This is not needed for GCC 11 branch since the case is handled differently
there and has been fixed there (and the trunk has now been fixed too).
So for one bit types, there is no reason to create the negation so just
setting neg to false for them, just works.

OK? Bootstrapped and tested on x86_64-linux-gnu.

PR tree-optimization/102622

gcc/ChangeLog:

* tree-ssa-phiopt.c (conditional_replacement): Set neg
to false for one bit signed types.

gcc/testsuite/ChangeLog:

* gcc.c-torture/execute/bitfld-10.c: New test.
---
 gcc/testsuite/gcc.c-torture/execute/bitfld-10.c | 24 
 gcc/tree-ssa-phiopt.c   |  5 -
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/execute/bitfld-10.c

diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c 
b/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c
new file mode 100644
index 000..bdbf573
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c
@@ -0,0 +1,24 @@
+/* PR tree-optimization/102622 */
+/* Wrong code introduced due to phi-opt
+   introducing undefined signed interger overflow
+   with one bit signed integer negation. */
+
+struct f{signed t:1;};
+int g(struct f *a, int t) __attribute__((noipa));
+int g(struct f *a, int t)
+{
+if (t)
+  a->t = -1;
+else
+  a->t = 0;
+int t1 = a->t;
+if (t1) return 1;
+return t1;
+}
+
+int main(void)
+{
+struct f a;
+if (!g(, 1))  __builtin_abort();
+return 0;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index 9ed26a3..a6c197d 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -770,9 +770,12 @@ conditional_replacement (basic_block cond_bb, basic_block 
middle_bb,
   if ((integer_zerop (arg0) && integer_onep (arg1))
   || (integer_zerop (arg1) && integer_onep (arg0)))
 neg = false;
+  /* For signed one bit types, the negation is not needed and
+ should be avoided and is the same as 1 case for non-signed
+ one bit types.  */
   else if ((integer_zerop (arg0) && integer_all_onesp (arg1))
   || (integer_zerop (arg1) && integer_all_onesp (arg0)))
-neg = true;
+neg = TYPE_PRECISION (TREE_TYPE (arg0)) != 1;
   else
 return false;
 
-- 
1.8.3.1



[PATCH v2] c-format: Add -Wformat-int-precision option [PR80060]

2021-10-10 Thread Daniil Stas via Gcc-patches
This option is enabled by default when -Wformat option is enabled. A
user can specify -Wno-format-int-precision to disable emitting
warnings when passing an argument of an incompatible integer type to
a 'd', 'i', 'o', 'u', 'x', or 'X' conversion specifier when it has
the same precision as the expected type.

Signed-off-by: Daniil Stas 

gcc/c-family/ChangeLog:

* c-format.c (check_format_types): Don't emit warnings when
passing an argument of an incompatible integer type to
a 'd', 'i', 'o', 'u', 'x', or 'X' conversion specifier when it has
the same precision as the expected type if
-Wno-format-int-precision option is specified.
* c.opt: Add -Wformat-int-precision option.

gcc/ChangeLog:

* doc/invoke.texi: Add -Wformat-int-precision option description.

gcc/testsuite/ChangeLog:

* c-c++-common/Wformat-int-precision-1.c: New test.
* c-c++-common/Wformat-int-precision-2.c: New test.
---
This is an update of patch "c-format: Add -Wformat-same-precision option 
[PR80060]".
The changes comparing to the first patch version:

- changed the option name to -Wformat-int-precision
- changed the option description as was suggested by Martin
- changed Wformat-int-precision-2.c to used dg-bogus instead of previous invalid
syntax

I also tried to combine the tests into one file with #pragma GCC diagnostic,
but looks like it's not possible. I want to test that when passing just -Wformat
option everything works as before my patch by default. And then in another test
case to check that passing -Wno-format-int-precision disables the warning. But
looks like in GCC you can't toggle the warnings such as
-Wno-format-int-precision individually but only can disable the general
-Wformat option that will disable all the formatting warnings together, which
is not the proper test.

 gcc/c-family/c-format.c |  2 +-
 gcc/c-family/c.opt  |  6 ++
 gcc/doc/invoke.texi | 17 -
 .../c-c++-common/Wformat-int-precision-1.c  |  7 +++
 .../c-c++-common/Wformat-int-precision-2.c  |  7 +++
 5 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/Wformat-int-precision-1.c
 create mode 100644 gcc/testsuite/c-c++-common/Wformat-int-precision-2.c

diff --git a/gcc/c-family/c-format.c b/gcc/c-family/c-format.c
index ca66c81f716..dd4436929f8 100644
--- a/gcc/c-family/c-format.c
+++ b/gcc/c-family/c-format.c
@@ -4243,7 +4243,7 @@ check_format_types (const substring_loc _loc,
  && (!pedantic || i < 2)
  && char_type_flag)
continue;
-  if (types->scalar_identity_flag
+  if ((types->scalar_identity_flag || !warn_format_int_precision)
  && (TREE_CODE (cur_type) == TREE_CODE (wanted_type)
  || (INTEGRAL_TYPE_P (cur_type)
  && INTEGRAL_TYPE_P (wanted_type)))
diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 06457ac739e..f5b4af3f3f6 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -660,6 +660,12 @@ C ObjC C++ LTO ObjC++ Warning Alias(Wformat-overflow=, 1, 
0) IntegerRange(0, 2)
 Warn about function calls with format strings that write past the end
 of the destination region.  Same as -Wformat-overflow=1.
 
+Wformat-int-precision
+C ObjC C++ ObjC++ Var(warn_format_int_precision) Warning LangEnabledBy(C ObjC 
C++ ObjC++,Wformat=,warn_format >= 1, 0)
+Warn when passing an argument of an incompatible integer type to a 'd', 'i',
+'o', 'u', 'x', or 'X' conversion specifier even when it has the same precision
+as the expected type.
+
 Wformat-security
 C ObjC C++ ObjC++ Var(warn_format_security) Warning LangEnabledBy(C ObjC C++ 
ObjC++,Wformat=, warn_format >= 2, 0)
 Warn about possible security problems with format functions.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 8b3ebcfbc4f..05dec6ba832 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -348,7 +348,7 @@ Objective-C and Objective-C++ Dialects}.
 -Werror  -Werror=*  -Wexpansion-to-defined  -Wfatal-errors @gol
 -Wfloat-conversion  -Wfloat-equal  -Wformat  -Wformat=2 @gol
 -Wno-format-contains-nul  -Wno-format-extra-args  @gol
--Wformat-nonliteral  -Wformat-overflow=@var{n} @gol
+-Wformat-nonliteral  -Wformat-overflow=@var{n} -Wformat-int-precision @gol
 -Wformat-security  -Wformat-signedness  -Wformat-truncation=@var{n} @gol
 -Wformat-y2k  -Wframe-address @gol
 -Wframe-larger-than=@var{byte-size}  -Wno-free-nonheap-object @gol
@@ -6056,6 +6056,21 @@ If @option{-Wformat} is specified, also warn if the 
format string is not a
 string literal and so cannot be checked, unless the format function
 takes its format arguments as a @code{va_list}.
 
+@item -Wformat-int-precision
+@opindex Wformat-int-precision
+@opindex Wno-format-int-precision
+Warn when passing an argument of an incompatible integer type to
+a @samp{d}, @samp{i}, @samp{o}, @samp{u}, @samp{x}, or @samp{X} 

[PATCH] include/longlong.h: Remove incorrect lvalue to rvalue conversion from asm output constraints

2021-10-10 Thread Fangrui Song
An output constraint takes a lvalue. While GCC happily strips the
incorrect lvalue to rvalue conversion, Clang rejects the code by default:

error: invalid use of a cast in a inline asm context requiring an lvalue: 
remove the cast or build with -fheinous-gnu-extensions

The file appears to share the same origin with gmplib longlong.h but
they differ much now (gmplib version is much longer).

I don't have write access to the git repo.
---
 include/longlong.h | 186 ++---
 1 file changed, 93 insertions(+), 93 deletions(-)

diff --git a/include/longlong.h b/include/longlong.h
index c3e92e54ecc..0a21a441d2d 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -194,8 +194,8 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, 
UDItype);
 #if defined (__arc__) && W_TYPE_SIZE == 32
 #define add_ss(sh, sl, ah, al, bh, bl) \
   __asm__ ("add.f  %1, %4, %5\n\tadc   %0, %2, %3" \
-  : "=r" ((USItype) (sh)), \
-"=" ((USItype) (sl)) \
+  : "=r" (sh), \
+"=" (sl) \
   : "%r" ((USItype) (ah)), \
 "rICal" ((USItype) (bh)),  \
 "%r" ((USItype) (al)), \
@@ -203,8 +203,8 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, 
UDItype);
   : "cc")
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub.f  %1, %4, %5\n\tsbc   %0, %2, %3" \
-  : "=r" ((USItype) (sh)), \
-"=" ((USItype) (sl)) \
+  : "=r" (sh), \
+"=" (sl) \
   : "r" ((USItype) (ah)),  \
 "rICal" ((USItype) (bh)),  \
 "r" ((USItype) (al)),  \
@@ -230,16 +230,16 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, 
UDItype);
  && W_TYPE_SIZE == 32
 #define add_ss(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds   %1, %4, %5\n\tadc   %0, %2, %3" \
-  : "=r" ((USItype) (sh)), \
-"=" ((USItype) (sl)) \
+  : "=r" (sh), \
+"=" (sl) \
   : "%r" ((USItype) (ah)), \
 "rI" ((USItype) (bh)), \
 "%r" ((USItype) (al)), \
 "rI" ((USItype) (bl)) __CLOBBER_CC)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subs   %1, %4, %5\n\tsbc   %0, %2, %3" \
-  : "=r" ((USItype) (sh)), \
-"=" ((USItype) (sl)) \
+  : "=r" (sh), \
+"=" (sl) \
   : "r" ((USItype) (ah)),  \
 "rI" ((USItype) (bh)), \
 "r" ((USItype) (al)),  \
@@ -262,8 +262,8 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, 
UDItype);
   "addcs   %0, %0, #65536\n"   \
   "adds%1, %1, %3, lsl #16\n"  \
   "adc %0, %0, %3, lsr #16"\
-  : "=" ((USItype) (xh)),\
-"=r" ((USItype) (xl)), \
+  : "=" (xh),\
+"=r" (xl), \
 "=" (__t0), "=" (__t1), "=r" (__t2)\
   : "r" ((USItype) (a)),   \
 "r" ((USItype) (b)) __CLOBBER_CC );\
@@ -348,16 +348,16 @@ extern UDItype __umulsidi3 (USItype, USItype);
 #if defined (__hppa) && W_TYPE_SIZE == 32
 #define add_ss(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \
-  : "=r" ((USItype) (sh)), \
-"=" ((USItype) (sl)) \
+  : "=r" (sh), \
+"=" (sl) \
  

Re: [Patch] Fortran: Various CLASS + assumed-rank fixed [PR102541]

2021-10-10 Thread Harald Anlauf via Gcc-patches

Hi Tobias,

just some random remarks from initially browsing your patch.

- leftover from debugging?

diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 1c24556c299..8a82e55d1f9 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -1,3 +1,4 @@
+#pragma GCC optimize(0)

- code that could be shortened/made slightly more readable:

@@ -2723,7 +2728,13 @@ gfc_variable_attr (gfc_expr *expr, gfc_typespec *ts)
else
  {
codimension = comp->attr.codimension;
-   pointer = comp->attr.pointer;
+   if (expr->ts.type == BT_CLASS
+   && comp->name[0] == '_' && comp->name[1] == 'd'
+   && comp->name[2] == 'a' && comp->name[3] == 't'
+   && comp->name[4] == 'a' && comp->name[5] == '\0')

Is there a reason to not use strcmp (comp->name, "_data") == 0?

Cheers,
Harald

Am 01.10.21 um 02:43 schrieb Tobias Burnus:

Hi all,

this patch fixes a bunch of issues with CLASS.

  * * *

Side remark: I disliked the way CLASS is represented when it was
introduced;
when writing the testcase for this PR and kept fixing the testcase fallout,
I started to hate it!
I am sure that there are more issues – but I tried hard not too look closer
at surrounding code to avoid hitting more issues.
(If you look for a project, I think if you put attributes on separate
lines,
like a separate "POINTER :: var" line, you have a high chance to hit the
error.)

  * * *

What I found rather puzzling is that the 'optional' argument could be
either
on sym->attr.optional or on CLASS_DATA (sym)->attr.optional. I think one
occurs for 'foo2' and the other for 'foo4' - not that I understand why it
differs.

I think it is otherwise straight forward. Regarding the original issue:

In order to detect an assumed-size argument to an assumed-rank array,
the last dimension has 'ubound = -1' to indicate an assume-size array;
for those size(x, dim=rank(x)-1) == -1 and size(x) < 0

However, when the dummy argument (and hence: actual argument) is either
a pointer or an allocatable, the bound is passed as is (in particular,
"-1" is a valid ubound and size(x) >= 0). – However, if the actual
argument is unallocated/not associated, rank(var) still is supposed to
work - hence, it has to be set.

The last two items did work before - but not for CLASS -> CLASS.
Additionally,
the ubound = -1 had an issue for CLASS -> TYPE as the code assumed that
expr->ref is the whole array ("var(full-array-ref)") but for CLASS the
expr->ref is a component and only expr->ref->next is the array ref.
("var%_data(full-array-ref)").

OK for mainline?

Tobias

-
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201,
80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer:
Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München;
Registergericht München, HRB 106955




PING^1 [PATCH v2 0/4] libffi: Sync with upstream

2021-10-10 Thread H.J. Lu via Gcc-patches
On Thu, Sep 2, 2021 at 8:50 AM H.J. Lu  wrote:
>
> Change in the v2 patch:
>
> 1. Disable static trampolines by default.
>
>
> GCC maintained a copy of libffi snapshot from 2009 and cherry-picked fixes
> from upstream over the last 10+ years.  In the meantime, libffi upstream
> has been changed significantly with new features, bug fixes and new target
> support.  Here is a set of patches to sync with libffi 3.4.2 release and
> make it easier to sync with libffi upstream:
>
> 1. Document how to sync with upstream.
> 2. Add scripts to help sync with upstream.
> 3. Sync with libffi 3.4.2. This patch is quite big.  It is availale at
>
> https://gitlab.com/x86-gcc/gcc/-/commit/15e80c879c571f79a0e57702848a9df5fba5be2f
> 4. Integrate libffi build and testsuite with GCC.
>
> H.J. Lu (4):
>   libffi: Add HOWTO_MERGE, autogen.sh and merge.sh
>   libffi: Sync with libffi 3.4.2
>   libffi: Integrate build with GCC
>   libffi: Integrate testsuite with GCC testsuite
>
>  libffi/.gitattributes |4 +
>  libffi/ChangeLog.libffi   | 7743 -
>  libffi/HOWTO_MERGE|   13 +
>  libffi/LICENSE|2 +-
>  libffi/LICENSE-BUILDTOOLS |  353 +
>  libffi/MERGE  |4 +
>  libffi/Makefile.am|  135 +-
>  libffi/Makefile.in|  219 +-
>  libffi/README |  450 -
>  libffi/README.md  |  495 ++
>  libffi/acinclude.m4   |   38 +-
>  libffi/autogen.sh |   11 +
>  libffi/configure  |  487 +-
>  libffi/configure.ac   |   91 +-
>  libffi/configure.host |   97 +-
>  libffi/doc/Makefile.am|3 +
>  libffi/doc/libffi.texi|  382 +-
>  libffi/doc/version.texi   |8 +-
>  libffi/fficonfig.h.in |   21 +-
>  libffi/generate-darwin-source-and-headers.py  |  143 +-
>  libffi/include/Makefile.am|2 +-
>  libffi/include/Makefile.in|3 +-
>  libffi/include/ffi.h.in   |  213 +-
>  libffi/include/ffi_cfi.h  |   21 +
>  libffi/include/ffi_common.h   |   50 +-
>  libffi/include/tramp.h|   45 +
>  libffi/libffi.map.in  |   24 +-
>  libffi/libffi.pc.in   |2 +-
>  libffi/libffi.xcodeproj/project.pbxproj   |  530 +-
>  libffi/libtool-version|   25 +-
>  libffi/man/Makefile.in|1 +
>  libffi/mdate-sh   |   39 +-
>  libffi/merge.sh   |   51 +
>  libffi/msvcc.sh   |  134 +-
>  libffi/src/aarch64/ffi.c  |  536 +-
>  libffi/src/aarch64/ffitarget.h|   35 +-
>  libffi/src/aarch64/internal.h |   33 +
>  libffi/src/aarch64/sysv.S |  189 +-
>  libffi/src/aarch64/win64_armasm.S |  506 ++
>  libffi/src/alpha/ffi.c|6 +-
>  libffi/src/arc/ffi.c  |6 +-
>  libffi/src/arm/ffi.c  |  380 +-
>  libffi/src/arm/ffitarget.h|   24 +-
>  libffi/src/arm/internal.h |   10 +
>  libffi/src/arm/sysv.S |  304 +-
>  libffi/src/arm/sysv_msvc_arm32.S  |  311 +
>  libffi/src/closures.c |  489 +-
>  libffi/src/cris/ffi.c |4 +-
>  libffi/src/csky/ffi.c |  395 +
>  libffi/src/csky/ffitarget.h   |   63 +
>  libffi/src/csky/sysv.S|  371 +
>  libffi/src/dlmalloc.c |7 +-
>  libffi/src/frv/ffi.c  |4 +-
>  libffi/src/ia64/ffi.c |   30 +-
>  libffi/src/ia64/ffitarget.h   |3 +-
>  libffi/src/ia64/unix.S|9 +-
>  libffi/src/java_raw_api.c |6 +-
>  libffi/src/kvx/asm.h  |5 +
>  libffi/src/kvx/ffi.c  |  273 +
>  libffi/src/kvx/ffitarget.h|   75 +
>  libffi/src/kvx/sysv.S |  127 +
>  libffi/src/m32r/ffi.c |2 +-
>  libffi/src/m68k/ffi.c |4 +-
>  libffi/src/m68k/sysv.S|   29 +-
>  libffi/src/m88k/ffi.c |8 +-
>  libffi/src/metag/ffi.c|   14 +-
>  libffi/src/microblaze/ffi.c   |   10 +-
>  libffi/src/mips/ffi.c |  146 +-
>  

[PATCH v4] Improve integer bit test on __atomic_fetch_[or|and]_* returns

2021-10-10 Thread H.J. Lu via Gcc-patches
Changes in v4:

1. Bypass redundant check when inputs have been transformed to the
equivalent canonical form with valid bit operation.

Changes in v3:

1.  Check invalid bit operation.

commit adedd5c173388ae505470df152b9cb3947339566
Author: Jakub Jelinek 
Date:   Tue May 3 13:37:25 2016 +0200

re PR target/49244 (__sync or __atomic builtins will not emit 'lock 
bts/btr/btc')

optimized bit test on __atomic_fetch_or_* and __atomic_fetch_and_* returns
with lock bts/btr/btc by turning

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

into

  _4 = ATOMIC_BIT_TEST_AND_SET (ptr_6, cnt_1, 0, _3);
  _5 = _4;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

into

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _11 = .ATOMIC_BIT_TEST_AND_RESET (v_8(D), bit_5(D), 1, 0);
  _4 = _11 != 0;

But it failed to optimize many equivalent, but slighly different cases:

1.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _4 = (_Bool) _1;
2.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _4 = (_Bool) _1;
3.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
4.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _7 = ~_1;
  _5 = (_Bool) _7;
5.
  _1 = __atomic_fetch_or_4 (ptr_6, 1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
6.
  _1 = __atomic_fetch_and_4 (ptr_6, ~1, _3);
  _2 = (int) _1;
  _7 = ~_2;
  _5 = (_Bool) _7;
7.
  _1 = _atomic_fetch_or_4 (ptr_6, mask, _3);
  _2 = (int) _1;
  _5 = _2 & mask;
8.
  _1 = __atomic_fetch_or_4 (ptr_6, 0x8000, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
9.
  _1 = __atomic_fetch_and_4 (ptr_6, 0x7fff, _3);
  _5 = (signed int) _1;
  _4 = _5 < 0;
10.
  _1 = 1 << bit_4(D);
  mask_5 = (unsigned int) _1;
  _2 = __atomic_fetch_or_4 (v_7(D), mask_5, 0);
  _3 = _2 & mask_5;
11.
  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _2 = (unsigned int) _1;
  _3 = __atomic_fetch_and_4 (v_9(D), _2, 0);
  _4 = (int) _3;
  _5 = _4 & mask_7;

We make

  mask_2 = 1 << cnt_1;
  _4 = __atomic_fetch_or_* (ptr_6, mask_2, _3);
  _5 = _4 & mask_2;

and

  mask_6 = 1 << bit_5(D);
  _1 = ~mask_6;
  _2 = __atomic_fetch_and_4 (v_8(D), _1, 0);
  _3 = _2 & mask_6;
  _4 = _3 != 0;

the canonical forms for this optimization and transform cases 1-9 to the
equivalent canonical form.  For cases 10 and 11, we simply remove the cast
before __atomic_fetch_or_4/__atomic_fetch_and_4 with

  _1 = 1 << bit_4(D);
  _2 = __atomic_fetch_or_4 (v_7(D), _1, 0);
  _3 = _2 & _1;

and

  mask_7 = 1 << bit_6(D);
  _1 = ~mask_7;
  _3 = __atomic_fetch_and_4 (v_9(D), _1, 0);
  _6 = _3 & mask_7;
  _5 = (int) _6;

gcc/

PR middle-end/102566
* tree-ssa-ccp.c (convert_atomic_bit_not): New function.
(optimize_atomic_bit_test_and): Transform equivalent, but slighly
different cases to their canonical forms.

gcc/testsuite/

PR middle-end/102566
* g++.target/i386/pr102566-1.C: New test.
* g++.target/i386/pr102566-2.C: Likewise.
* g++.target/i386/pr102566-3.C: Likewise.
* g++.target/i386/pr102566-4.C: Likewise.
* g++.target/i386/pr102566-5a.C: Likewise.
* g++.target/i386/pr102566-5b.C: Likewise.
* g++.target/i386/pr102566-6a.C: Likewise.
* g++.target/i386/pr102566-6b.C: Likewise.
* gcc.target/i386/pr102566-1a.c: Likewise.
* gcc.target/i386/pr102566-1b.c: Likewise.
* gcc.target/i386/pr102566-2.c: Likewise.
* gcc.target/i386/pr102566-3a.c: Likewise.
* gcc.target/i386/pr102566-3b.c: Likewise.
* gcc.target/i386/pr102566-4.c: Likewise.
* gcc.target/i386/pr102566-5.c: Likewise.
* gcc.target/i386/pr102566-6.c: Likewise.
* gcc.target/i386/pr102566-7.c: Likewise.
* gcc.target/i386/pr102566-8a.c: Likewise.
* gcc.target/i386/pr102566-8b.c: Likewise.
* gcc.target/i386/pr102566-9a.c: Likewise.
* gcc.target/i386/pr102566-9b.c: Likewise.
* gcc.target/i386/pr102566-10a.c: Likewise.
* gcc.target/i386/pr102566-10b.c: Likewise.
* gcc.target/i386/pr102566-11.c: Likewise.
* gcc.target/i386/pr102566-12.c: Likewise.
---
 gcc/testsuite/g++.target/i386/pr102566-1.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-2.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-3.C   |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-4.C   |  29 ++
 gcc/testsuite/g++.target/i386/pr102566-5a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-5b.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6a.C  |  31 ++
 gcc/testsuite/g++.target/i386/pr102566-6b.C  |  31 ++
 gcc/testsuite/gcc.target/i386/pr102566-10a.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-10b.c |  15 +
 gcc/testsuite/gcc.target/i386/pr102566-11.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-12.c  |  28 ++
 gcc/testsuite/gcc.target/i386/pr102566-1a.c  | 188 +++
 gcc/testsuite/gcc.target/i386/pr102566-1b.c  | 107 
 

Re: [PATCH] c++: error message for dependent template members [PR70417]

2021-10-10 Thread Anthony Sharp via Gcc-patches
Hi Jason,

Hope you are well. Thanks for the feedback.


> I like adding the configurability, but I think let's keep committing as
> the default behavior.  And adding the parameter to the rollback function
> seems unnecessary.  For the behavior argument, let's use an enum to be
> clearer.
>

Sure, all done. The declaration of the enum could have gone in many places
it seems but I put it in cp-tree.h. Hard to tell exactly where in the file
it needed to go but I took an educated guess and put it in what I think is
the parser.c bit.

If the next token is >, we're already at the end of the template
> argument list.  If not, then something has gone wrong, so give an error
> and skip ahead.
>

I guess it just seemed odd that with a name like
cp_parser_skip_to_end_of_template_parameter_list, the calling function
would expect that it should already be at the end of a template parameter
list. Maybe if it was called
cp_parser_check_reached_end_of_template_parameter_list... but like you
suggested that's been refactored now anyways.

Let's use cp_parser_nth_token_starts_template_argument_list_p.
>

Good point. Surprised that didn't trigger a failed test, but maybe there
just isn't one.

Without the <> they may be template names, but they aren't template-ids.
>

Very true, updated the comment to hopefully make more sense.

I don't think << is ever treated as two <; that only applies to >>.
>

Good to know, I've taken that comment out.

I think we don't want to return early here, we want to go through the
> same ( check that we do for regular names.
>

I have changed it to do that, but could something like "operator- <" ever
be intended as something other than the start of a template-id?

The || case is already handled by the test at the top of the function,
> right?
>

I thought this too originally but I don't think it is. The first check is
just a performance booster, and only returns if it sees a name without a <
after it. If it doesn't see a name, that's fine, because it might instead
be a keyword like operator or template, so we continue. We then checked for
the template and keyword operators and returned appropriately if we saw one
(although, now, because of the change above, we continue if we see the
operator keyword). But at this point we haven't checked what comes after
the operator keyword. It could be gibberish. So at that point we ensure
that we see a name and a < at that point. I don't think this check can go
any earlier, since that's the earliest we can safely ensure we see both a
name and a <, given the possibility of finding keywords initially. Although
that whole part had to be refactored so it might be clearer now anyways.

> +  /* Consume the name and the "<" because
> +  cp_parser_skip_to_end_of_template_parameter_list requires it.  */
> +  cp_lexer_consume_token (parser->lexer);
> +  cp_lexer_consume_token (parser->lexer);
> +
> +  /* Skip to the end.  If it fails, it wasn't a template.  */
> +  if (!cp_parser_skip_to_end_of_template_parameter_list (parser, true))
> +return -1;

How about factoring this call and the previous line into a

cp_parser_skip_template_argument_list (parser)

?

> -  /* Are we ready, yet?  If not, issue error message.  */
> -  if (cp_parser_require (parser, CPP_GREATER, RT_GREATER))
> -return;

We could also factor this check into a separate function, leaving only
the actual walking that's shared between the two callers.

Yeah I think that would make things a lot clearer. I think I've done what
you've suggested - I've removed the awkward call to cp_parser_require and
put it (along with the call to
cp_parser_skip_to_end_of_template_parameter_list) in its own new function
called cp_parser_ensure_reached_end_of_template_parameter_list.

I've then made another function called
cp_parser_skip_entire_template_parameter_list that consumes the "<" and
then skips to the end by calling the other function.

I'm not sure how you'd see ';' after a class template-id, but you could
> see it for a variable template.  Along with many other possible tokens.
>   I guess any operator-or-punctuator after the > would suggest a
> template-id.
>

At least one of us knows what they're talking about. Updated the comment.

You're building a cp_expr that immediately converts back to a tree; just
> pass next_token->u.value to constructor_name_p.
>

Oops... fixed.

This should mention the possible false positive and how to silence it:
> If you get actually meant to write two comparisons in t.m(0), you can
> make that clear by writing (t.m(0).
>

Done.

Hmm, that's a problem.  Can you avoid it by checking declarator_p?
>

We actually already check declarator_p in cp_parser_id_expression in that
case. The reason it throws a warning is because typename14.C is
intentionally malformed; in the eyes of the compiler it's written like an
expression because it's missing the return type (although, even adding a
return type would not make it valid). I'm not sure there's any worthwhile
way around this 

Re: [PATCH] tree-optimization: [PR102622]: wrong code due to signed one bit integer and "a?-1:0"

2021-10-10 Thread Richard Biener via Gcc-patches
On October 10, 2021 7:42:19 AM GMT+02:00, apinski--- via Gcc-patches 
 wrote:
>From: Andrew Pinski 
>
>So it turns out this is kinda of a latent bug but not really latent.
>In GCC 9 and 10, phi-opt would transform a?-1:0 (even for signed 1-bit integer)
>to -(type)a but the type is an one bit integer which means the negation is
>undefined. GCC 11 fixed the problem by checking for a?pow2cst:0 transformation
>before a?-1:0 transformation.
>
>When I added the transformations to match.pd, I had swapped the order not 
>paying
>attention and I didn't expect anything of it. Because there was no testcase 
>failing
>due to this.
>Anyways this fixes the problem on the trunk by swapping the order in match.pd 
>and
>adding a comment of why the order is this way.
>
>I will try to come up with a patch for GCC 9 and 10 series later on which fixes
>the problem there too.
>
>Note I didn't include the original testcase which requires the vectorizer and 
>AVX-512f
>as I can't figure out the right dg options to restrict it to avx-512f but I 
>did come up
>with a testcase which shows the problem and even more shows the problem with 
>the 9/10
>series as mentioned.
>
>OK? Bootstrapped and tested on x86_64-linux-gnu.

OK. 

Richard. 


>   PR tree-optimization/102622
>
>gcc/ChangeLog:
>
>   * match.pd: Swap the order of a?pow2cst:0 and a?-1:0 transformations.
>   Swap the order of a?0:pow2cst and a?0:-1 transformations.
>
>gcc/testsuite/ChangeLog:
>
>   * gcc.c-torture/execute/bitfld-10.c: New test.
>---
> gcc/match.pd  | 26 ---
> .../gcc.c-torture/execute/bitfld-10.c | 24 +
> 2 files changed, 41 insertions(+), 9 deletions(-)
> create mode 100644 gcc/testsuite/gcc.c-torture/execute/bitfld-10.c
>
>diff --git a/gcc/match.pd b/gcc/match.pd
>index 9d7c1ac637f..c153e9a6e98 100644
>--- a/gcc/match.pd
>+++ b/gcc/match.pd
>@@ -3949,15 +3949,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> /* a ? 1 : 0 -> a if 0 and 1 are integral types. */
> (if (integer_onep (@1))
>  (convert (convert:boolean_type_node @0)))
>-/* a ? -1 : 0 -> -a. */
>-(if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
>- (negate (convert (convert:boolean_type_node @0
> /* a ? powerof2cst : 0 -> a << (log2(powerof2cst)) */
> (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@1))
>  (with {
>tree shift = build_int_cst (integer_type_node, tree_log2 (@1));
>   }
>-  (lshift (convert (convert:boolean_type_node @0)) { shift; })
>+  (lshift (convert (convert:boolean_type_node @0)) { shift; })))
>+/* a ? -1 : 0 -> -a.  No need to check the TYPE_PRECISION not being 1
>+   here as the powerof2cst case above will handle that case correctly.  */
>+(if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@1))
>+ (negate (convert (convert:boolean_type_node @0))
>   (if (integer_zerop (@1))
>(with {
>   tree booltrue = constant_boolean_node (true, boolean_type_node);
>@@ -3966,16 +3967,23 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  /* a ? 0 : 1 -> !a. */
>  (if (integer_onep (@2))
>   (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } )))
>- /* a ? -1 : 0 -> -(!a). */
>- (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
>-  (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 
>
>  /* a ? powerof2cst : 0 -> (!a) << (log2(powerof2cst)) */
>- (if (INTEGRAL_TYPE_P (type) &&  integer_pow2p (@2))
>+ (if (INTEGRAL_TYPE_P (type) &&  integer_pow2p (@2)
>+ && TYPE_PRECISION (type) != 1)
>   (with {
>   tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
>}
>(lshift (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 
> ))
>-{ shift; }
>+{ shift; })))
>+ /* a ? -1 : 0 -> -(!a).  No need to check the TYPE_PRECISION not being 1
>+   here as the powerof2cst case above will handle that case correctly.  */
>+ (if (INTEGRAL_TYPE_P (type) && integer_all_onesp (@2))
>+  (negate (convert (bit_xor (convert:boolean_type_node @0) { booltrue; } 
>
>+)
>+   )
>+  )
>+ )
>+)
> #endif
> 
> /* Simplification moved from fold_cond_expr_with_comparison.  It may also
>diff --git a/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c 
>b/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c
>new file mode 100644
>index 000..bdbf5733ce7
>--- /dev/null
>+++ b/gcc/testsuite/gcc.c-torture/execute/bitfld-10.c
>@@ -0,0 +1,24 @@
>+/* PR tree-optimization/102622 */
>+/* Wrong code introduced due to phi-opt
>+   introducing undefined signed interger overflow
>+   with one bit signed integer negation. */
>+
>+struct f{signed t:1;};
>+int g(struct f *a, int t) __attribute__((noipa));
>+int g(struct f *a, int t)
>+{
>+if (t)
>+  a->t = -1;
>+else
>+  a->t = 0;
>+int t1 = a->t;
>+if (t1) return 1;
>+return t1;
>+}
>+
>+int main(void)
>+{
>+struct f a;
>+