[PATCH] rs6000: Remove stale rs6000_global_entry_point_needed_p

2022-08-08 Thread Kewen.Lin via Gcc-patches
Hi,

r10-631 had renamed rs6000_global_entry_point_needed_p to
rs6000_global_entry_point_prologue_needed_p.  This is to
remove the stale function declaration.

Bootstrapped and regtested on powerpc64-linux-gnu P8 and
powerpc64le-linux-gnu P9 and P10.

I'll push this soon.

BR,
Kewen
-
gcc/ChangeLog:

* config/rs6000/rs6000-internal.h (rs6000_global_entry_point_needed_p):
Remove function declaration.
---
 gcc/config/rs6000/rs6000-internal.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/gcc/config/rs6000/rs6000-internal.h 
b/gcc/config/rs6000/rs6000-internal.h
index d80c04b5ae5..da809d1ac8b 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -82,7 +82,6 @@ extern const char *rs6000_machine;
from rs6000-logue.cc  */

 extern int uses_TOC (void);
-extern bool rs6000_global_entry_point_needed_p (void);
 extern void rs6000_output_function_prologue (FILE *file);
 extern void rs6000_output_function_epilogue (FILE *file);
 extern bool rs6000_function_ok_for_sibcall (tree decl, tree exp);
--
2.27.0


[PATCH] rs6000: Simplify some code with rs6000_builtin_is_supported

2022-08-08 Thread Kewen.Lin via Gcc-patches
Hi,

In function rs6000_init_builtins, there is a oversight that
in one target debugging hunk with TARGET_DEBUG_BUILTIN we
missed to handle enum bif_enable ENB_CELL.  It's easy to
fix it by adding another if case.  But considering the long
term maintainability, this patch updates it with the existing
function rs6000_builtin_is_supported, which centralizes the
related conditions for different enum bif_enable, we only
need to update that function once some condition needs to
be changed later.  This also simplifies another usage in
function rs6000_expand_builtin.

Bootstrapped and regtested on powerpc64-linux-gnu P7 & P8,
and powerpc64le-linux-gnu P9 & P10.

I'll push this soon if no objections.

BR,
Kewen
-

gcc/ChangeLog:

* config/rs6000/rs6000-builtin.cc (rs6000_init_builtins): Fix the
oversight on ENB_CELL by simplifying with rs6000_builtin_is_supported.
(rs6000_expand_builtin): Simplify with rs6000_builtin_is_supported.
---
 gcc/config/rs6000/rs6000-builtin.cc | 65 ++---
 1 file changed, 4 insertions(+), 61 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin.cc 
b/gcc/config/rs6000/rs6000-builtin.cc
index 2819773d9f9..12afa86854c 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -830,44 +830,8 @@ rs6000_init_builtins (void)
   fprintf (stderr, "\nAutogenerated built-in functions:\n\n");
   for (int i = 1; i < (int) RS6000_BIF_MAX; i++)
{
- bif_enable e = rs6000_builtin_info[i].enable;
- if (e == ENB_P5 && !TARGET_POPCNTB)
-   continue;
- if (e == ENB_P6 && !TARGET_CMPB)
-   continue;
- if (e == ENB_P6_64 && !(TARGET_CMPB && TARGET_POWERPC64))
-   continue;
- if (e == ENB_ALTIVEC && !TARGET_ALTIVEC)
-   continue;
- if (e == ENB_VSX && !TARGET_VSX)
-   continue;
- if (e == ENB_P7 && !TARGET_POPCNTD)
-   continue;
- if (e == ENB_P7_64 && !(TARGET_POPCNTD && TARGET_POWERPC64))
-   continue;
- if (e == ENB_P8 && !TARGET_DIRECT_MOVE)
-   continue;
- if (e == ENB_P8V && !TARGET_P8_VECTOR)
-   continue;
- if (e == ENB_P9 && !TARGET_MODULO)
-   continue;
- if (e == ENB_P9_64 && !(TARGET_MODULO && TARGET_POWERPC64))
-   continue;
- if (e == ENB_P9V && !TARGET_P9_VECTOR)
-   continue;
- if (e == ENB_IEEE128_HW && !TARGET_FLOAT128_HW)
-   continue;
- if (e == ENB_DFP && !TARGET_DFP)
-   continue;
- if (e == ENB_CRYPTO && !TARGET_CRYPTO)
-   continue;
- if (e == ENB_HTM && !TARGET_HTM)
-   continue;
- if (e == ENB_P10 && !TARGET_POWER10)
-   continue;
- if (e == ENB_P10_64 && !(TARGET_POWER10 && TARGET_POWERPC64))
-   continue;
- if (e == ENB_MMA && !TARGET_MMA)
+ enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i;
+ if (!rs6000_builtin_is_supported (fn_code))
continue;
  tree fntype = rs6000_builtin_info[i].fntype;
  tree t = TREE_TYPE (fntype);
@@ -3370,29 +3334,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* 
subtarget */,
  but check for actual availability now, during expand time.  For
  invalid builtins, generate a normal call.  */
   bifdata *bifaddr = _builtin_info[uns_fcode];
-  bif_enable e = bifaddr->enable;
-
-  if (!(e == ENB_ALWAYS
-   || (e == ENB_P5 && TARGET_POPCNTB)
-   || (e == ENB_P6 && TARGET_CMPB)
-   || (e == ENB_P6_64 && TARGET_CMPB && TARGET_POWERPC64)
-   || (e == ENB_ALTIVEC && TARGET_ALTIVEC)
-   || (e == ENB_CELL && TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL)
-   || (e == ENB_VSX && TARGET_VSX)
-   || (e == ENB_P7 && TARGET_POPCNTD)
-   || (e == ENB_P7_64 && TARGET_POPCNTD && TARGET_POWERPC64)
-   || (e == ENB_P8 && TARGET_DIRECT_MOVE)
-   || (e == ENB_P8V && TARGET_P8_VECTOR)
-   || (e == ENB_P9 && TARGET_MODULO)
-   || (e == ENB_P9_64 && TARGET_MODULO && TARGET_POWERPC64)
-   || (e == ENB_P9V && TARGET_P9_VECTOR)
-   || (e == ENB_IEEE128_HW && TARGET_FLOAT128_HW)
-   || (e == ENB_DFP && TARGET_DFP)
-   || (e == ENB_CRYPTO && TARGET_CRYPTO)
-   || (e == ENB_HTM && TARGET_HTM)
-   || (e == ENB_P10 && TARGET_POWER10)
-   || (e == ENB_P10_64 && TARGET_POWER10 && TARGET_POWERPC64)
-   || (e == ENB_MMA && TARGET_MMA)))
+
+  if (!rs6000_builtin_is_supported (fcode))
 {
   rs6000_invalid_builtin (fcode);
   return expand_call (exp, target, ignore);
--
2.27.0


[PATCH] rs6000: Rework ELFv2 support for -fpatchable-function-entry* [PR99888]

2022-08-08 Thread Kewen.Lin via Gcc-patches
Hi,

As PR99888 and its related show, the current support for
-fpatchable-function-entry on powerpc ELFv2 doesn't work
well with global entry existence.  For example, with one
command line option -fpatchable-function-entry=3,2, it got
below w/o this patch:

  .LPFE1:
  nop
  nop
  .type   foo, @function
  foo:
  nop
  .LFB0:
  .cfi_startproc
  .LCF0:
  0:  addis 2,12,.TOC.-.LCF0@ha
  addi 2,2,.TOC.-.LCF0@l
  .localentry foo,.-foo

, the assembly is unexpected since the patched NOPs have
no effects when being entered from local entry.

This patch is to update the NOPs patched before and after
local entry, it looks like:

  .type   foo, @function
  foo:
  .LFB0:
  .cfi_startproc
  .LCF0:
  0:  addis 2,12,.TOC.-.LCF0@ha
  addi 2,2,.TOC.-.LCF0@l
  nop
  nop
  .localentry foo,.-foo
  nop

Bootstrapped and regtested on powerpc64-linux-gnu P7 & P8,
and powerpc64le-linux-gnu P9 & P10.

Is it ok for trunk?

BR,
Kewen
-

PR target/99888
PR target/105649

gcc/ChangeLog:

* config/rs6000/rs6000-internal.h
(rs6000_print_patchable_function_entry): New function declaration.
* config/rs6000/rs6000-logue.cc (rs6000_output_function_prologue):
Support patchable-function-entry by emitting NOPs before and after
local entry for the function that needs global entry.
* config/rs6000/rs6000.cc (rs6000_print_patchable_function_entry): Skip
the function that needs global entry till global entry has been
emitted.
* config/rs6000/rs6000.h (struct machine_function): New bool member
global_entry_emitted.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr99888-1.c: New test.
* gcc.target/powerpc/pr99888-2.c: New test.
* gcc.target/powerpc/pr99888-3.c: New test.
* gcc.target/powerpc/pr99888-4.c: New test.
* gcc.target/powerpc/pr99888-5.c: New test.
* gcc.target/powerpc/pr99888-6.c: New test.
* c-c++-common/patchable_function_entry-default.c: Adjust for
powerpc_elfv2 to avoid compilation error.
---
 gcc/config/rs6000/rs6000-internal.h   |  5 ++
 gcc/config/rs6000/rs6000-logue.cc | 29 
 gcc/config/rs6000/rs6000.cc   | 10 +++-
 gcc/config/rs6000/rs6000.h|  4 ++
 .../patchable_function_entry-default.c|  1 +
 gcc/testsuite/gcc.target/powerpc/pr99888-1.c  | 47 +++
 gcc/testsuite/gcc.target/powerpc/pr99888-2.c  | 47 +++
 gcc/testsuite/gcc.target/powerpc/pr99888-3.c  | 13 +
 gcc/testsuite/gcc.target/powerpc/pr99888-4.c  | 13 +
 gcc/testsuite/gcc.target/powerpc/pr99888-5.c  | 13 +
 gcc/testsuite/gcc.target/powerpc/pr99888-6.c  | 14 ++
 11 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-1.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-2.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-3.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-4.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-5.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr99888-6.c

diff --git a/gcc/config/rs6000/rs6000-internal.h 
b/gcc/config/rs6000/rs6000-internal.h
index b9e82c0468d..da809d1ac8b 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -182,10 +182,15 @@ extern tree rs6000_fold_builtin (tree fndecl 
ATTRIBUTE_UNUSED,
 tree *args ATTRIBUTE_UNUSED,
 bool ignore ATTRIBUTE_UNUSED);

+extern void rs6000_print_patchable_function_entry (FILE *,
+  unsigned HOST_WIDE_INT,
+  bool);
+
 extern bool rs6000_passes_float;
 extern bool rs6000_passes_long_double;
 extern bool rs6000_passes_vector;
 extern bool rs6000_returns_struct;
 extern bool cpu_builtin_p;

 #endif
diff --git a/gcc/config/rs6000/rs6000-logue.cc 
b/gcc/config/rs6000/rs6000-logue.cc
index 59fe1c8cb8b..ca0a6c1c64a 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -4013,11 +4013,40 @@ rs6000_output_function_prologue (FILE *file)
  fprintf (file, "\tadd 2,2,12\n");
}

+  unsigned short patch_area_size = crtl->patch_area_size;
+  unsigned short patch_area_entry = crtl->patch_area_entry;
+  /* Need to emit the patching area.  */
+  if (patch_area_size > 0)
+   {
+ cfun->machine->global_entry_emitted = true;
+ /* As ELFv2 ABI shows, the allowable bytes past the global entry
+point are 0, 4, 8, 16, 32 and 64.  Considering there are two
+non-prefixed instructions for global entry (8 bytes), the count
+for patchable NOPs before local entry would be 2, 6 and 14.  */
+  

Re: [PATCH v2, rs6000] Add multiply-add expand pattern [PR103109]

2022-08-08 Thread Kewen.Lin via Gcc-patches
Hi Haochen,

Thanks for the patch.

on 2022/8/8 14:04, HAO CHEN GUI wrote:
> Hi,
>   This patch adds an expand and several insns for multiply-add with three
> 64bit operands.
> 
>   Compared with last version, the main changes are:
> 1 The "maddld" pattern is reused for the low-part generation.
> 2 A runnable testcase replaces the original compiling case.
> 3 Fixes indention problems.
> 
>   Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
> Is this okay for trunk? Any recommendations? Thanks a lot.
> 
> ChangeLog
> 2022-08-08  Haochen Gui  
> 
> gcc/
>   PR target/103109
>   * config/rs6000/rs6000.md (maddditi4): New pattern for multiply-add.
>   (madddi4_highpart): New.
>   (madddi4_highpart_le): New.
> 
> gcc/testsuite/
>   PR target/103109
>   * gcc.target/powerpc/pr103109.c: New.
> 
> 
> 
> patch.diff
> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
> index c55ee7e171a..4c58023490a 100644
> --- a/gcc/config/rs6000/rs6000.md
> +++ b/gcc/config/rs6000/rs6000.md
> @@ -3217,7 +3217,7 @@ (define_expand "mul3"
>DONE;
>  })
> 
> -(define_insn "*maddld4"
> +(define_insn "maddld4"
>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
>   (plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
>   (match_operand:GPR 2 "gpc_reg_operand" "r"))
> @@ -3226,6 +3226,52 @@ (define_insn "*maddld4"
>"maddld %0,%1,%2,%3"
>[(set_attr "type" "mul")])
> 
> +(define_expand "maddditi4"
> +  [(set (match_operand:TI 0 "gpc_reg_operand")
> + (plus:TI
> +   (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
> +(any_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
> +   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"]
> +  "TARGET_MADDLD && TARGET_POWERPC64"
> +{
> +  rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 
> 0);
> +  rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 
> 8);
> +
> +  emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3]));
> +
> +  if (BYTES_BIG_ENDIAN)
> +emit_insn (gen_madddi4_highpart (op0_hi, operands[1], operands[2],
> + operands[3]));
> +  else
> +emit_insn (gen_madddi4_highpart_le (op0_hi, operands[1], operands[2],
> +operands[3]));
> +  DONE;
> +})
> +
> +(define_insn "madddi4_highpart"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (subreg:DI
> +   (plus:TI
> + (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
> +  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
> +  0))]
> +  "TARGET_MADDLD && BYTES_BIG_ENDIAN && TARGET_POWERPC64"
> +  "maddhd %0,%1,%2,%3"
> +  [(set_attr "type" "mul")])
> +
> +(define_insn "madddi4_highpart_le"
> +  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
> + (subreg:DI
> +   (plus:TI
> + (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
> +  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
> + (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
> +  8))]
> +  "TARGET_MADDLD && !BYTES_BIG_ENDIAN && TARGET_POWERPC64"
> +  "maddhd %0,%1,%2,%3"
> +  [(set_attr "type" "mul")])
> +
>  (define_insn "udiv3"
>[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
>  (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
> diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c 
> b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> new file mode 100644
> index 000..969b9751b21
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
> @@ -0,0 +1,110 @@
> +/* { dg-do run { target { has_arch_ppc64 } } } */
> +/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */
> +/* { dg-require-effective-target int128 } */
> +/* { dg-require-effective-target p9modulo_hw } */
> +/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */
> +/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */
> +

Maybe it's good to split this case into two, one for compiling and the other 
for running.
Since the generated asm is a test point here, with one separated case for 
compiling, we
can still have that part of test coverage on hosts which are unable to run this 
case.
You can move functions multiply_add and multiply_addu into one common header 
file, then
include it in both source files.

> +union U {
> +  __int128 i128;
> +  struct {
> +long l1;
> +long l2;
> +  } s;
> +};
> +
> +__int128
> +create_i128 (long most_sig, long least_sig)
> +{
> +  union U u;
> +
> +#if __LITTLE_ENDIAN__
> +  u.s.l1 = least_sig;
> +  u.s.l2 = most_sig;
> +#else
> +  u.s.l1 = most_sig;
> +  u.s.l2 = least_sig;
> +#endif
> +  return u.i128;
> +}
> +
> +
> +#define DEBUG 0
> +

Re: [PATCH] rs6000: Fix incorrect RTL for Power LE when removing the UNSPECS [PR106069]

2022-08-08 Thread Kewen.Lin via Gcc-patches
Hi Xionghu,

Thanks for the fix.

on 2022/8/8 11:42, Xionghu Luo wrote:
> The native RTL expression for vec_mrghw should be same for BE and LE as
> they are register and endian-independent.  So both BE and LE need
> generate exactly same RTL with index [0 4 1 5] when expanding vec_mrghw
> with vec_select and vec_concat.
> 
> (set (reg:V4SI 141) (vec_select:V4SI (vec_concat:V8SI
>  (subreg:V4SI (reg:V16QI 139) 0)
>  (subreg:V4SI (reg:V16QI 140) 0))
>  [const_int 0 4 1 5]))
> 
> Then combine pass could do the nested vec_select optimization
> in simplify-rtx.c:simplify_binary_operation_1 also on both BE and LE:
> 
> 21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel [0 4 1 5])
> 24: {r151:SI=vec_select(r150:V4SI,parallel [const_int 3]);}
> 
> =>
> 
> 21: r150:V4SI=vec_select(vec_concat(r141:V4SI,r146:V4SI),parallel)
> 24: {r151:SI=vec_select(r146:V4SI,parallel [const_int 1]);}
> 
> The endianness check need only once at ASM generation finally.
> ASM would be better due to nested vec_select simplified to simple scalar
> load.
> 
> Regression tested pass for Power8{LE,BE}{32,64} and Power{9,10}LE{32,64}

Sorry, no -m32 for LE testing.  I noticed the attachement in that PR didn't
include the test case (though the changelog has it), so I re-tested it
again, nothing changed.  :)

> Linux(Thanks to Kewen), OK for master?  Or should we revert r12-4496 to
> restore to the UNSPEC implementation?
> 

I have some concern on those changed "altivec_*_direct", IMHO the suffix
"_direct" is normally to indicate the define_insn is mapped to the
corresponding hw insn directly.  With this change, for example,
altivec_vmrghb_direct can be mapped into vmrghb or vmrglb, this looks
misleading.  Maybe we can add the corresponding _direct_le and _direct_be
versions, both are mapped into the same insn but have different RTL
patterns.  Looking forward to Segher's and David's suggestions.

> gcc/ChangeLog:
>   PR target/106069
>   * config/rs6000/altivec.md (altivec_vmrghb): Emit same native
>   RTL for BE and LE.
>   (altivec_vmrghh): Likewise.
>   (altivec_vmrghw): Likewise.
>   (*altivec_vmrghsf): Adjust.
>   (altivec_vmrglb): Likewise.
>   (altivec_vmrglh): Likewise.
>   (altivec_vmrglw): Likewise.
>   (*altivec_vmrglsf): Adjust.
>   (altivec_vmrghb_direct): Emit different ASM for BE and LE.
>   (altivec_vmrghh_direct): Likewise.
>   (altivec_vmrghw_direct_): Likewise.
>   (altivec_vmrglb_direct): Likewise.
>   (altivec_vmrglh_direct): Likewise.
>   (altivec_vmrglw_direct_): Likewise.
>   (vec_widen_smult_hi_v16qi): Adjust.
>   (vec_widen_smult_lo_v16qi): Adjust.
>   (vec_widen_umult_hi_v16qi): Adjust.
>   (vec_widen_umult_lo_v16qi): Adjust.
>   (vec_widen_smult_hi_v8hi): Adjust.
>   (vec_widen_smult_lo_v8hi): Adjust.
>   (vec_widen_umult_hi_v8hi): Adjust.
>   (vec_widen_umult_lo_v8hi): Adjust.
>   * config/rs6000/rs6000.cc (altivec_expand_vec_perm_const): Emit same
>   native RTL for BE and LE.
>   * config/rs6000/vsx.md (vsx_xxmrghw_): Likewise.
>   (vsx_xxmrglw_): Likewise.
> 
> gcc/testsuite/ChangeLog:
>   PR target/106069
>   * gcc.target/powerpc/pr106069.C: New test.
> 
> Signed-off-by: Xionghu Luo 
> ---
>  gcc/config/rs6000/altivec.md| 122 
>  gcc/config/rs6000/rs6000.cc |  36 +++---
>  gcc/config/rs6000/vsx.md|  16 +--
>  gcc/testsuite/gcc.target/powerpc/pr106069.C | 118 +++
>  4 files changed, 209 insertions(+), 83 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr106069.C
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 2c4940f2e21..8d9c0109559 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -1144,11 +1144,7 @@ (define_expand "altivec_vmrghb"
> (use (match_operand:V16QI 2 "register_operand"))]
>"TARGET_ALTIVEC"
>  {
> -  rtx (*fun) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN ? gen_altivec_vmrghb_direct
> - : gen_altivec_vmrglb_direct;
> -  if (!BYTES_BIG_ENDIAN)
> -std::swap (operands[1], operands[2]);
> -  emit_insn (fun (operands[0], operands[1], operands[2]));
> +  emit_insn (gen_altivec_vmrghb_direct (operands[0], operands[1], 
> operands[2]));
>DONE;
>  })
>  
> @@ -1167,7 +1163,12 @@ (define_insn "altivec_vmrghb_direct"
>(const_int 6) (const_int 22)
>(const_int 7) (const_int 23)])))]
>"TARGET_ALTIVEC"
> -  "vmrghb %0,%1,%2"
> +  {
> + if (BYTES_BIG_ENDIAN)
> +  return "vmrghb %0,%1,%2";
> +else
> +  return "vmrglb %0,%2,%1";
> + }
>[(set_attr "type" "vecperm")])
>  
>  (define_expand "altivec_vmrghh"
> @@ -1176,11 +1177,7 @@ (define_expand "altivec_vmrghh"
> (use (match_operand:V8HI 2 "register_operand"))]
>"TARGET_ALTIVEC"
>  {
> -  

Re: [PATCH][_GLIBCXX_DEBUG] Refine singular iterator state

2022-08-08 Thread Jonathan Wakely via Gcc-patches
On Mon, 8 Aug 2022, 19:15 François Dumont via Libstdc++, <
libstd...@gcc.gnu.org> wrote:

> On 08/08/22 15:19, Jonathan Wakely wrote:
> > On Mon, 8 Aug 2022 at 06:07, François Dumont via Libstdc++
> >  wrote:
> >> Another version of this patch with just a new test case showing what
> >> wrong code was unnoticed previously by the _GLIBCXX_DEBUG mode.
> >>
> >> On 04/08/22 22:56, François Dumont wrote:
> >>> This an old patch I had prepared a long time ago, I don't think I ever
> >>> submitted it.
> >>>
> >>>  libstdc++: [_GLIBCXX_DEBUG] Do not consider detached iterators as
> >>> value-initialized
> >>>
> >>>  An attach iterator has its _M_version set to something != 0. This
> >>> value shall be preserved
> >>>  when detaching it so that the iterator does not look like a value
> >>> initialized one.
> >>>
> >>>  libstdc++-v3/ChangeLog:
> >>>
> >>>  * include/debug/formatter.h (__singular_value_init): New
> >>> _Iterator_state enum entry.
> >>>  (_Parameter<>(const _Safe_iterator<>&, const char*,
> >>> _Is_iterator)): Check if iterator
> >>>  parameter is value-initialized.
> >>>  (_Parameter<>(const _Safe_local_iterator<>&, const char*,
> >>> _Is_iterator)): Likewise.
> >>>  * include/debug/safe_iterator.h
> >>> (_Safe_iterator<>::_M_value_initialized()): New. Adapt
> >>>  checks.
> >>>  * include/debug/safe_local_iterator.h
> >>> (_Safe_local_iterator<>::_M_value_initialized()): New.
> >>>  Adapt checks.
> >>>  * src/c++11/debug.cc (_Safe_iterator_base::_M_reset): Do
> >>> not reset _M_version.
> >>>  (print_field(PrintContext&, const _Parameter&, const
> >>> char*)): Adapt state_names.
> >>>  * testsuite/23_containers/deque/debug/iterator1_neg.cc:
> >>> New test.
> >>>  * testsuite/23_containers/deque/debug/iterator2_neg.cc:
> >>> New test.
> >>>  *
> >>> testsuite/23_containers/forward_list/debug/iterator1_neg.cc: New test.
> >>>  *
> >>> testsuite/23_containers/forward_list/debug/iterator2_neg.cc: New test.
> >>>
> >>> Tested under Linux x86_64 _GLIBCXX_DEBUG mode.
> >>>
> >>> Ok to commit ?
> >>>
> >>> François
> >
> >> diff --git a/libstdc++-v3/src/c++11/debug.cc
> b/libstdc++-v3/src/c++11/debug.cc
> >> index 4706defedf1..cf8e6f48081 100644
> >> --- a/libstdc++-v3/src/c++11/debug.cc
> >> +++ b/libstdc++-v3/src/c++11/debug.cc
> >> @@ -426,7 +426,8 @@ namespace __gnu_debug
> >>_M_reset() throw ()
> >>{
> >>  __atomic_store_n(&_M_sequence, (_Safe_sequence_base*)0,
> __ATOMIC_RELEASE);
> >> -_M_version = 0;
> >> +// Detach iterator shall not look like a value-initialized one.
> >> +// _M_version = 0;
> >>  _M_prior = 0;
> >>  _M_next = 0;
> >>}
> > I think this would be clearer as "Do not reset version, so that a
> > detached iterator does not look like a value-initialized one."
> >
> >> +// { dg-do run { xfail *-*-* } }
> >> +// { dg-require-debug-mode "" }
> >> +
> >> +#include 
> >> +
> >> +#include 
> >> +
> >> +void test01()
> >> +{
> >> +  typedef typename std::deque::iterator It;
> >> +  std::deque dq;
> >> +  dq.push_back(1);
> >> +
> >> +  It it = It();
> >> +  VERIFY( dq.begin() != it );
> > Is there any reason to use VERIFY here?
> Only make sure the compiler do not optimize this check away.
>

It can't do that. If the debug check results in output to the terminal and
aborting the program then that's an observable side effect that cannot be
optimised away.

If the compiler was somehow smart enough to know that the comparison is
undefined then we wouldn't need debug mode.


>
> > We're expecting the comparison to abort in the debug mode checks,
> > right? Which would happen if we just do:
> >
> > (void) dq.begin() == it;
>
> I guess this (void) cast is doing the same so adopted.
>

No, the cast silences a warning.



> > Using VERIFY just makes it look like we're expecting the test to be
> > XFAIL because the assertion will fail, but that's not what is being
> > tested.
> >
> > OK for trunk with those changes, thanks.
> >
> Updated committed patch attached.
>

Thanks.


>


[COMMITTED] PR tree-optimization/106556 - Evaluate condition arguments with the correct type.

2022-08-08 Thread Andrew MacLeod via Gcc-patches
when evaluating a COND_EXPR, we need to evaluate both operands. With the 
recent changes to floating point, we missed that we are accidentally 
using the LHS range type for the operands.. that was fine when 
everything was an irange... but no so any more.


This patch simply uses the right range type for the temporary ranges.

Bootstrapped on x86_64-pc-linux-gnu with no regressions.  Pushed.

Andrew
commit 6d57764711d3b4441d4d99876d9d3370bdec6284
Author: Andrew MacLeod 
Date:   Mon Aug 8 15:13:51 2022 -0400

Evaluate condition arguments with the correct type.

Processing of a cond_expr requires that a range of the correct type for the
operands of the cond_expr is passed in.

PR tree-optimization/106556
gcc/
* gimple-range-gori.cc (gori_compute::condexpr_adjust): Use the
  type of the cond_expr operands being evaluted.

gcc/testsuite/
* gfortran.dg/pr106556.f90: New.

diff --git a/gcc/gimple-range-gori.cc b/gcc/gimple-range-gori.cc
index a43e44c841e..8879e44cba1 100644
--- a/gcc/gimple-range-gori.cc
+++ b/gcc/gimple-range-gori.cc
@@ -1398,16 +1398,17 @@ gori_compute::condexpr_adjust (vrange , vrange , gimple *, tree cond,
 }
 
// Now solve for SSA1 or SSA2 if they are in the dependency chain.
-  Value_Range tmp (type);
if (ssa1 && in_chain_p (ssa1, cond_name))
 {
-  if (compute_operand_range (tmp, def_stmt, cond_true, ssa1, src))
-	r1.intersect (tmp);
+  Value_Range tmp1 (TREE_TYPE (ssa1));
+  if (compute_operand_range (tmp1, def_stmt, cond_true, ssa1, src))
+	r1.intersect (tmp1);
 }
   if (ssa2 && in_chain_p (ssa2, cond_name))
 {
-  if (compute_operand_range (tmp, def_stmt, cond_false, ssa2, src))
-	r2.intersect (tmp);
+  Value_Range tmp2 (TREE_TYPE (ssa2));
+  if (compute_operand_range (tmp2, def_stmt, cond_false, ssa2, src))
+	r2.intersect (tmp2);
 }
   if (idx)
 {
diff --git a/gcc/testsuite/gfortran.dg/pr106556.f90 b/gcc/testsuite/gfortran.dg/pr106556.f90
new file mode 100644
index 000..01b89a8eee2
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr106556.f90
@@ -0,0 +1,10 @@
+! { dg-do compile }
+! { dg-options "-O1 -fnon-call-exceptions -ftree-loop-if-convert" }
+
+
+program p
+  real :: a(2)
+
+  a(:) = 1.0
+  if (minloc (a, dim = 1).ne.1) STOP 1
+end


[PATCH v2] c++: Extend -Wredundant-move for const-qual objects [PR90428]

2022-08-08 Thread Marek Polacek via Gcc-patches
On Sat, Aug 06, 2022 at 03:58:13PM -0800, Jason Merrill wrote:
> On 8/6/22 11:13, Marek Polacek wrote:
> > In this PR, Jon suggested extending the -Wredundant-move warning
> > to warn when the user is moving a const object as in:
> > 
> >struct T { };
> > 
> >T f(const T& t)
> >{
> >  return std::move(t);
> >}
> > 
> > where the std::move is redundant, because T does not have
> > a T(const T&&) constructor (which is very unlikely).  Even with
> > the std::move, T(T&&) would not be used because it would mean
> > losing the const.  Instead, T(const T&) will be called.
> > 
> > I had to restructure the function a bit, but it's better now.  This patch
> > depends on my other recent patches to maybe_warn_pessimizing_move.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > PR c++/90428
> > 
> > gcc/cp/ChangeLog:
> > 
> > * typeck.cc (maybe_warn_pessimizing_move): Extend the
> > -Wredundant-move warning to warn about std::move on a
> > const-qualified object.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/Wredundant-move1.C: Adjust dg-warning.
> > * g++.dg/cpp0x/Wredundant-move9.C: Likewise.
> > * g++.dg/cpp0x/Wredundant-move10.C: New test.
> > ---
> >   gcc/cp/typeck.cc  | 157 +++---
> >   gcc/testsuite/g++.dg/cpp0x/Wredundant-move1.C |   3 +-
> >   .../g++.dg/cpp0x/Wredundant-move10.C  |  61 +++
> >   gcc/testsuite/g++.dg/cpp0x/Wredundant-move9.C |   3 +-
> >   4 files changed, 162 insertions(+), 62 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wredundant-move10.C
> > 
> > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> > index 70a5efc45de..802bc9c43fb 100644
> > --- a/gcc/cp/typeck.cc
> > +++ b/gcc/cp/typeck.cc
> > @@ -10411,72 +10411,109 @@ maybe_warn_pessimizing_move (tree expr, tree 
> > type, bool return_p)
> > return;
> >   }
> > -  /* We're looking for *std::move ((T &) ).  */
> > -  if (REFERENCE_REF_P (expr)
> > -  && TREE_CODE (TREE_OPERAND (expr, 0)) == CALL_EXPR)
> > -{
> > -  tree fn = TREE_OPERAND (expr, 0);
> > -  if (is_std_move_p (fn))
> > -   {
> > - tree arg = CALL_EXPR_ARG (fn, 0);
> > - tree moved;
> > - if (TREE_CODE (arg) != NOP_EXPR)
> > -   return;
> > - arg = TREE_OPERAND (arg, 0);
> > - if (TREE_CODE (arg) != ADDR_EXPR)
> > -   return;
> > - arg = TREE_OPERAND (arg, 0);
> > - arg = convert_from_reference (arg);
> > - if (can_do_rvo_p (arg, type))
> > -   {
> > - auto_diagnostic_group d;
> > - if (!warning_suppressed_p (expr, OPT_Wpessimizing_move)
> > - && warning_at (loc, OPT_Wpessimizing_move,
> > -"moving a temporary object prevents copy "
> > -"elision"))
> > -   inform (loc, "remove % call");
> > -   }
> > - /* The rest of the warnings is only relevant for when we are
> > -returning from a function.  */
> > - else if (!return_p)
> > -   return;
> > - /* Warn if we could do copy elision were it not for the move.  */
> > - else if (can_do_nrvo_p (arg, type))
> > +  /* First, check if this is a call to std::move.  */
> > +  if (!REFERENCE_REF_P (expr)
> > +  || TREE_CODE (TREE_OPERAND (expr, 0)) != CALL_EXPR)
> > +return;
> > +  tree fn = TREE_OPERAND (expr, 0);
> > +  if (!is_std_move_p (fn))
> > +return;
> > +  tree arg = CALL_EXPR_ARG (fn, 0);
> > +  if (TREE_CODE (arg) != NOP_EXPR)
> > +return;
> > +  /* If we're looking at *std::move ((T &) ), do the pessimizing 
> > N/RVO
> > + and implicitly-movable warnings.  */
> > +  if (TREE_CODE (TREE_OPERAND (arg, 0)) == ADDR_EXPR)
> > +{
> > +  arg = TREE_OPERAND (arg, 0);
> > +  arg = TREE_OPERAND (arg, 0);
> > +  arg = convert_from_reference (arg);
> > +  if (can_do_rvo_p (arg, type))
> 
> Incidentally, this function should probably have a different name if we're
> checking it in non-return situations.

I've renamed it to can_elide_copy_prvalue_p in this patch.
 
> > +   {
> > + auto_diagnostic_group d;
> > + if (!warning_suppressed_p (expr, OPT_Wpessimizing_move)
> > + && warning_at (loc, OPT_Wpessimizing_move,
> > +"moving a temporary object prevents copy elision"))
> > +   inform (loc, "remove % call");
> > +   }
> > +  /* The rest of the warnings is only relevant for when we are 
> > returning
> > +from a function.  */
> > +  if (!return_p)
> > +   return;
> > +
> > +  tree moved;
> > +  /* Warn if we could do copy elision were it not for the move.  */
> > +  if (can_do_nrvo_p (arg, type))
> > +   {
> > + auto_diagnostic_group d;
> > + if (!warning_suppressed_p (expr, OPT_Wpessimizing_move)
> > + && warning_at (loc, OPT_Wpessimizing_move,
> > +"moving a local object in a return statement "
> > +"prevents copy 

Re: [PATCH v4 2/2] preprocessor/106426: Treat u8 character literals as unsigned in char8_t modes.

2022-08-08 Thread Joseph Myers
On Mon, 8 Aug 2022, Tom Honermann via Gcc-patches wrote:

> On 8/2/22 6:14 PM, Joseph Myers wrote:
> > On Tue, 2 Aug 2022, Tom Honermann via Gcc-patches wrote:
> > 
> > > This patch corrects handling of UTF-8 character literals in preprocessing
> > > directives so that they are treated as unsigned types in char8_t enabled
> > > C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t).
> > > Previously,
> > > UTF-8 character literals were always treated as having the same type as
> > > ordinary character literals (signed or unsigned dependent on target or use
> > > of the -fsigned-char or -funsigned char options).
> > OK in the absence of C++ maintainer objections within 72 hours.  (This is
> > the case where, when I added support for such literals for C (commit
> > 7c5890cc0a0ecea0e88cc39e9fba6385fb579e61), I raised the question of
> > whether they should be unsigned in the preprocessor for C++ as well.)
> 
> Joseph, would you be so kind as to commit this patch series for me? I don't
> have commit access. Thank you in advance!

Done.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH v2] c++: Tweak for -Wpessimizing-move in templates [PR89780]

2022-08-08 Thread Marek Polacek via Gcc-patches
On Sat, Aug 06, 2022 at 04:02:13PM -0700, Jason Merrill wrote:
> On 8/4/22 11:46, Marek Polacek wrote:
> > In my previous patches I've been extending our std::move warnings,
> > but this tweak actually dials it down a little bit.  As reported in
> > bug 89780, it's questionable to warn about expressions in templates
> > that were type-dependent, but aren't anymore because we're instantiating
> > the template.  As in,
> > 
> >template 
> >Dest withMove() {
> >  T x;
> >  return std::move(x);
> >}
> > 
> >template Dest withMove(); // #1
> >template Dest withMove(); // #2
> > 
> > Saying that the std::move is pessimizing for #1 is not incorrect, but
> > it's not useful, because removing the std::move would then pessimize #2.
> > So the user can't really win.  At the same time, disabling the warning
> > just because we're in a template would be going too far, I still want to
> > warn for
> > 
> >template 
> >Dest withMove() {
> >  Dest x;
> >  return std::move(x);
> >}
> > 
> > because the std::move therein will be pessimizing for any instantiation.
> > 
> > So I'm using the suppress_warning machinery to that effect.
> > Problem: I had to add a new group to nowarn_spec_t, otherwise
> > suppressing the -Wpessimizing-move warning would disable a whole bunch
> > of other warnings, which we really don't want.
> > 
> > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > 
> > PR c++/89780
> > 
> > gcc/cp/ChangeLog:
> > 
> > * pt.cc (tsubst_copy_and_build) : Maybe suppress
> > -Wpessimizing-move.
> > * typeck.cc (maybe_warn_pessimizing_move): Don't issue warnings
> > if they are suppressed.
> > (check_return_expr): Disable -Wpessimizing-move when returning
> > a dependent expression.
> > 
> > gcc/ChangeLog:
> > 
> > * diagnostic-spec.cc (nowarn_spec_t::nowarn_spec_t): Handle
> > OPT_Wpessimizing_move and OPT_Wredundant_move.
> > * diagnostic-spec.h (nowarn_spec_t): Add NW_REDUNDANT enumerator.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > * g++.dg/cpp0x/Wpessimizing-move3.C: Remove dg-warning.
> > * g++.dg/cpp0x/Wpessimizing-move7.C: Likewise.
> > * g++.dg/cpp0x/Wredundant-move2.C: Likewise.
> > * g++.dg/cpp0x/Wpessimizing-move9.C: New test.
> > ---
> >   gcc/cp/pt.cc  |  3 +
> >   gcc/cp/typeck.cc  | 20 +++--
> >   gcc/diagnostic-spec.cc|  7 +-
> >   gcc/diagnostic-spec.h |  4 +-
> >   .../g++.dg/cpp0x/Wpessimizing-move3.C |  2 +-
> >   .../g++.dg/cpp0x/Wpessimizing-move7.C |  2 +-
> >   .../g++.dg/cpp0x/Wpessimizing-move9.C | 89 +++
> >   gcc/testsuite/g++.dg/cpp0x/Wredundant-move2.C |  4 +-
> >   8 files changed, 119 insertions(+), 12 deletions(-)
> >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move9.C
> > 
> > diff --git a/gcc/cp/pt.cc b/gcc/cp/pt.cc
> > index 6c581fe0fb7..fe7e809fc2d 100644
> > --- a/gcc/cp/pt.cc
> > +++ b/gcc/cp/pt.cc
> > @@ -21215,6 +21215,9 @@ tsubst_copy_and_build (tree t,
> >   CALL_EXPR_ORDERED_ARGS (call) = ord;
> >   CALL_EXPR_REVERSE_ARGS (call) = rev;
> > }
> > +   if (warning_suppressed_p (t, OPT_Wpessimizing_move))
> > + /* This also suppresses -Wredundant-move.  */
> > + suppress_warning (ret, OPT_Wpessimizing_move);
> >   }
> > RETURN (ret);
> > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> > index 2650beb780e..70a5efc45de 100644
> > --- a/gcc/cp/typeck.cc
> > +++ b/gcc/cp/typeck.cc
> > @@ -10430,9 +10430,10 @@ maybe_warn_pessimizing_move (tree expr, tree type, 
> > bool return_p)
> >   if (can_do_rvo_p (arg, type))
> > {
> >   auto_diagnostic_group d;
> > - if (warning_at (loc, OPT_Wpessimizing_move,
> > - "moving a temporary object prevents copy "
> > - "elision"))
> > + if (!warning_suppressed_p (expr, OPT_Wpessimizing_move)
> 
> I don't think we ever want to suppress this warning; moving it to a
> different warning flag (as I suggested on the other patch) would accomplish
> that.

Agreed.  I just removed the warning_suppressed_p check though, a new flag would
need another NW_ group etc.
 
> > + && warning_at (loc, OPT_Wpessimizing_move,
> > +"moving a temporary object prevents copy "
> > +"elision"))
> > inform (loc, "remove % call");
> > }
> >   /* The rest of the warnings is only relevant for when we are
> > @@ -10443,14 +10444,16 @@ maybe_warn_pessimizing_move (tree expr, tree 
> > type, bool return_p)
> >   else if (can_do_nrvo_p (arg, type))
> > {
> >   auto_diagnostic_group d;
> > - if (warning_at (loc, OPT_Wpessimizing_move,
> > - "moving a local object in a return statement "
> > -

Re: [PATCH] [PR83782] i386 PIE: avoid @GOTOFF for ifuncs and their aliases

2022-08-08 Thread Alexandre Oliva via Gcc-patches
On Aug  1, 2022, "H.J. Lu"  wrote:

> On Thu, Jul 28, 2022 at 9:31 AM H.J. Lu  wrote:

>> > You may also need to do something like this bit for mvc10.c on ia32 PIE.
>> > Because the ifunc is called through an alias, AFAICT we don't even
>> > notice that the call target is (an alias to) an ifunc.  GCC's
>> > gotoff_operand predicate accepts it, but binutils (the linker, IIRC)
>> > then rejects that reference, because the named symbol is an alias to an
>> > ifunc.
>> 
>> Yes, this change is needed.

> I think this fix should be applied to default_binds_local_p_3:

I was concerned that other tests might require such alias pointer
chasing, and figured we might be better off propagating the flag setting
to aliases.


[PR83782] ifunc: back-propagate ifunc_resolver to aliases

gcc.target/i386/mvc10.c fails with -fPIE on ia32 because we omit the
@PLT mark when calling an alias to an indirect function.  Such aliases
aren't marked as ifunc_resolvers in the cgraph, so the test that would
have forced the PLT call fails.

I've arranged for ifunc_resolver to be back-propagated to aliases, and
relaxed the test that required the ifunc attribute to be attached to
directly the decl, rather than taken from an aliased decl, when the
ifunc_resolver bit is set.

Regstrapped on x86_64-linux-gnu, also tested mvc10.c with -m32 -fPIE.
Ok to install?


for  gcc/ChangeLog

PR target/83782
* cgraph.h (symtab_node::set_ifunc_resolver): New, overloaded.
Back-propagate flag to aliases.
* cgraph.cc (cgraph_node::create): Use set_ifunc_resolver.
(cgraph_node::create_alias): Likewise.
* lto-cgraph.cc (input_node): Likewise.
* multiple_target.cc (create_dispatcher_calls): Propagate to
aliases when redirecting them.
* symtab.cc (symtab_node::verify_base): Accept ifunc_resolver
set in an alias to another ifunc_resolver nodes.
(symtab_node::resolve_alias): Propagate ifunc_resolver from
resolved target to alias.
* varasm.cc (do_assemble_alias): Checking for the attribute.
---
 gcc/cgraph.cc  |4 ++--
 gcc/cgraph.h   |   13 +
 gcc/lto-cgraph.cc  |2 +-
 gcc/multiple_target.cc |2 ++
 gcc/symtab.cc  |   15 ++-
 gcc/varasm.cc  |5 -
 6 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/gcc/cgraph.cc b/gcc/cgraph.cc
index 8d6ed38efa25d..699f2c20defa4 100644
--- a/gcc/cgraph.cc
+++ b/gcc/cgraph.cc
@@ -518,7 +518,7 @@ cgraph_node::create (tree decl)
 }
 
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl)))
-node->ifunc_resolver = true;
+node->set_ifunc_resolver ();
 
   node->register_symbol ();
   maybe_record_nested_function (node);
@@ -576,7 +576,7 @@ cgraph_node::create_alias (tree alias, tree target)
   if (lookup_attribute ("weakref", DECL_ATTRIBUTES (alias)) != NULL)
 alias_node->transparent_alias = alias_node->weakref = true;
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (alias)))
-alias_node->ifunc_resolver = true;
+alias_node->set_ifunc_resolver ();
   return alias_node;
 }
 
diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index 4be67e3cea906..9468b8a4e3662 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -467,6 +467,19 @@ public:
 return decl->decl_with_vis.symtab_node;
   }
 
+  /* Worked for the nonstatic set_ifunc_resolver, to vback-propagate
+ ifunc_resolver in the alias chain.  */
+  static bool set_ifunc_resolver (symtab_node *n, void * = NULL)
+  {
+n->ifunc_resolver = true;
+return false;
+  }
+
+  /* Set the ifunc_resolver bit in this node and in any aliases thereof.  */
+  void set_ifunc_resolver () {
+call_for_symbol_and_aliases (set_ifunc_resolver, NULL, true);
+  }
+
   /* Try to find a symtab node for declaration DECL and if it does not
  exist or if it corresponds to an inline clone, create a new one.  */
   static inline symtab_node * get_create (tree node);
diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
index 6d9c36ea8b67f..3d8ade1f9f042 100644
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -1280,7 +1280,7 @@ input_node (struct lto_file_decl_data *file_data,
   node = symtab->create_empty ();
   node->decl = fn_decl;
   if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (fn_decl)))
-   node->ifunc_resolver = 1;
+   node->set_ifunc_resolver ();
   node->register_symbol ();
 }
 
diff --git a/gcc/multiple_target.cc b/gcc/multiple_target.cc
index 3e2d26882c8e9..97801a4cc9ea9 100644
--- a/gcc/multiple_target.cc
+++ b/gcc/multiple_target.cc
@@ -160,6 +160,8 @@ create_dispatcher_calls (struct cgraph_node *node)
  source->create_reference (inode, IPA_REF_ALIAS);
  if (inode->get_comdat_group ())
source->add_to_same_comdat_group (inode);
+ if (!source->ifunc_resolver)
+   source->set_ifunc_resolver ();
}
  else
gcc_unreachable ();
diff --git a/gcc/symtab.cc 

[PATCH v2] c++: Extend -Wpessimizing-move for class prvalues [PR106276]

2022-08-08 Thread Marek Polacek via Gcc-patches
On Sat, Aug 06, 2022 at 04:07:54PM -0700, Jason Merrill wrote:
> On 8/6/22 15:49, Jason Merrill wrote:
> > On 7/27/22 17:14, Marek Polacek wrote:
> > > We already have a warning that warns about pessimizing std::move
> > > in a return statement, when it prevents the NRVO:
> > > 
> > >    T fn()
> > >    {
> > >  T t;
> > >  return std::move (t); // warning \o/
> > >    }
> > > 
> > > However, the warning doesn't warn when what we are returning is a class
> > > prvalue, that is, when std::move prevents the RVO:
> > > 
> > >    T fn()
> > >    {
> > >  T t;
> > >  return std::move (T{}); // no warning :-(
> > >    }
> > > 
> > > This came up recently in GCC:
> > > .
> > > 
> > > This patch fixes that.  I would like to extend the warning further, so
> > > that it warns in more contexts, e.g.:
> > > 
> > >    T t = std::move(T());
> > > 
> > > or
> > > 
> > >    void foo (T);
> > >    foo (std::move(T()));
> > > 
> > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > 
> > > PR c++/106276
> > > 
> > > gcc/cp/ChangeLog:
> > > 
> > > * typeck.cc (can_do_rvo_p): New.
> > > (maybe_warn_pessimizing_move): Warn when moving a temporary object
> > > in a return statement prevents copy elision.
> > > 
> > > gcc/testsuite/ChangeLog:
> > > 
> > > * g++.dg/cpp0x/Wpessimizing-move7.C: New test.
> > > ---
> > >   gcc/cp/typeck.cc  | 31 -
> > >   .../g++.dg/cpp0x/Wpessimizing-move7.C | 63 +++
> > >   2 files changed, 91 insertions(+), 3 deletions(-)
> > >   create mode 100644 gcc/testsuite/g++.dg/cpp0x/Wpessimizing-move7.C
> > > 
> > > diff --git a/gcc/cp/typeck.cc b/gcc/cp/typeck.cc
> > > index 6e4f23af982..9500c4e2fe8 100644
> > > --- a/gcc/cp/typeck.cc
> > > +++ b/gcc/cp/typeck.cc
> > > @@ -10287,12 +10287,29 @@ can_do_nrvo_p (tree retval, tree functype)
> > >     /* The cv-unqualified type of the returned value must be the
> > >    same as the cv-unqualified return type of the
> > >    function.  */
> > > -  && same_type_p ((TYPE_MAIN_VARIANT (TREE_TYPE (retval))),
> > > -  (TYPE_MAIN_VARIANT (functype)))
> > > +  && same_type_p (TYPE_MAIN_VARIANT (TREE_TYPE (retval)),
> > > +  TYPE_MAIN_VARIANT (functype))
> > >     /* And the returned value must be non-volatile.  */
> > >     && !TYPE_VOLATILE (TREE_TYPE (retval)));
> > >   }
> > > +/* Like can_do_nrvo_p, but we check if we're trying to move a class
> > > +   prvalue.  */
> > > +
> > > +static bool
> > > +can_do_rvo_p (tree retval, tree functype)
> > > +{
> > > +  if (functype == error_mark_node)
> > > +    return false;
> > > +  if (retval)
> > > +    STRIP_ANY_LOCATION_WRAPPER (retval);
> > > +  return (retval != NULL_TREE
> > > +  && TREE_CODE (retval) == TARGET_EXPR
> > 
> > Maybe use !glvalue_p instead of specifically checking for TARGET_EXPR? I
> > don't feel strongly about this.

OK, this version uses !glvalue_p.

> > > +  && same_type_p (TYPE_MAIN_VARIANT (TREE_TYPE (retval)),
> > > +  TYPE_MAIN_VARIANT (functype))
> > > +  && !TYPE_VOLATILE (TREE_TYPE (retval)));
> > > +}
> > > +
> > >   /* If we should treat RETVAL, an expression being returned, as if
> > > it were
> > >  designated by an rvalue, returns it adjusted accordingly;
> > > otherwise, returns
> > >  NULL_TREE.  See [class.copy.elision].  RETURN_P is true if this
> > > is a return
> > > @@ -10401,12 +10418,20 @@ maybe_warn_pessimizing_move (tree retval,
> > > tree functype)
> > >     "prevents copy elision"))
> > >   inform (loc, "remove % call");
> > >   }
> > > +  else if (can_do_rvo_p (arg, functype))
> > > +    {
> > > +  auto_diagnostic_group d;
> > > +  if (warning_at (loc, OPT_Wpessimizing_move,
> > > +  "moving a temporary object in a return statement "
> > > +  "prevents copy elision"))
> > 
> > It doesn't just prevent copy elision, it produces a dangling reference.
> >  This is a special case of the warning we talked about passing a
> > temporary to a function that returns a reference argument unchanged, and
> > should probably use a different warning flag.
> 
> Wait, no, I'm confused, the temporary does live long enough to get copied.
> 
> I still don't think we want to suppress this warning in the other patch.

Yeah, that makes sense.  I'll change it there.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
We already have a warning that warns about pessimizing std::move
in a return statement, when it prevents the NRVO:

  T fn()
  {
T t;
return std::move (t); // warning \o/
  }

However, the warning doesn't warn when what we are returning is a class
prvalue, that is, when std::move prevents the RVO:

  T fn()
  {
T t;
return std::move (T{}); // no warning :-(
  }

This came up recently in GCC:

[PATCH v2] c-family: Honor -Wno-init-self for cv-qual vars [PR102633]

2022-08-08 Thread Marek Polacek via Gcc-patches
On Sat, Aug 06, 2022 at 03:29:05PM -0700, Jason Merrill wrote:
> On 7/26/22 14:31, Marek Polacek wrote:
> > On Tue, Jul 26, 2022 at 04:24:18PM -0400, Jason Merrill wrote:
> > > On 7/26/22 15:03, Marek Polacek wrote:
> > > > Since r11-5188-g32934a4f45a721, we drop qualifiers during l-to-r
> > > > conversion by creating a NOP_EXPR.  For e.g.
> > > > 
> > > > const int i = i;
> > > > 
> > > > that means that the DECL_INITIAL is '(int) i' and not 'i' anymore.
> > > > Consequently, we don't suppress_warning here:
> > > > 
> > > > 711 case DECL_EXPR:
> > > > 715   if (VAR_P (DECL_EXPR_DECL (*expr_p))
> > > > 716   && !DECL_EXTERNAL (DECL_EXPR_DECL (*expr_p))
> > > > 717   && !TREE_STATIC (DECL_EXPR_DECL (*expr_p))
> > > > 718   && (DECL_INITIAL (DECL_EXPR_DECL (*expr_p)) == 
> > > > DECL_EXPR_DECL (*expr_p))
> > > > 719   && !warn_init_self)
> > > > 720 suppress_warning (DECL_EXPR_DECL (*expr_p), OPT_Winit_self);
> > > > 
> > > > because of the check on line 718 -- (int) i is not i.  So -Wno-init-self
> > > > doesn't disable the warning as it's supposed to.
> > > > 
> > > > The following patch fixes it...except it doesn't, for volatile variables
> > > > in C++.  The problem is that for
> > > > 
> > > > volatile int k = k;
> > > > 
> > > > we see that the initializer has TREE_SIDE_EFFECTS, so we perform dynamic
> > > > initialization.  So there's no DECL_INITIAL and the suppress_warning
> > > > call above is never done.  I suppose we could amend 
> > > > get_no_uninit_warning
> > > > to return true for volatile-qualified expressions.  I mean, can we ever
> > > > say for a fact that a volatile variable is uninitialized?
> > > > 
> > > > Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?
> > > > 
> > > > PR middle-end/102633
> > > > 
> > > > gcc/c-family/ChangeLog:
> > > > 
> > > > * c-gimplify.cc (c_gimplify_expr): Strip NOPs of DECL_INITIAL.
> > > 
> > > I wonder if we want to handle this i = i case earlier, like in 
> > > finish_decl.
> > 
> > I could, something like
> > 
> > @@ -5381,7 +5381,14 @@ finish_decl (tree decl, location_t init_loc, tree 
> > init,
> >   init = NULL_TREE;
> > 
> > if (init)
> > -store_init_value (init_loc, decl, init, origtype);
> > +{
> > +  /* In the self-init case, undo the artificial NOP_EXPR we may have
> > +added in convert_lvalue_to_rvalue so that c_gimplify_expr/DECL_EXPR
> > +can perform suppress_warning.  */
> > +  if (TREE_CODE (init) == NOP_EXPR && TREE_OPERAND (init, 0) == decl)
> > +   init = TREE_OPERAND (init, 0);
> > +  store_init_value (init_loc, decl, init, origtype);
> > +}
> > 
> > but then I'd have to do the same thing in cp_finish_decl because
> > decay_conversion also adds a NOP_EXPR for cv-qualified non-class prvalues.
> > Is that what we want?  To me that seems less clean than having 
> > c_gimplify_expr
> > see through NOP_EXPRs.
> 
> I was thinking of checking the form of the initializer before
> decay_conversion or anything else messes with it, and calling
> suppress_warning at that point instead of in c_gimplify_expr.

Aaah, okay.  Here's a patch that does it.  In the C FE it has to
happen really early.  Now both front ends behave the same wrt volatiles!

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?

-- >8 --
Since r11-5188-g32934a4f45a721, we drop qualifiers during l-to-r
conversion by creating a NOP_EXPR.  For e.g.

  const int i = i;

that means that the DECL_INITIAL is '(int) i' and not 'i' anymore.
Consequently, we don't suppress_warning here:

711 case DECL_EXPR:
715   if (VAR_P (DECL_EXPR_DECL (*expr_p))
716   && !DECL_EXTERNAL (DECL_EXPR_DECL (*expr_p))
717   && !TREE_STATIC (DECL_EXPR_DECL (*expr_p))
718   && (DECL_INITIAL (DECL_EXPR_DECL (*expr_p)) == DECL_EXPR_DECL 
(*expr_p))
719   && !warn_init_self)
720 suppress_warning (DECL_EXPR_DECL (*expr_p), OPT_Winit_self);

because of the check on line 718 -- (int) i is not i.  So -Wno-init-self
doesn't disable the warning as it's supposed to.

The following patch fixes it by moving the suppress_warning call from
c_gimplify_expr to the front ends, at points where we haven't created
the NOP_EXPR yet.

PR middle-end/102633

gcc/c-family/ChangeLog:

* c-gimplify.cc (c_gimplify_expr) : Don't call
suppress_warning here.

gcc/c/ChangeLog:

* c-parser.cc (c_parser_initializer): Add new tree parameter.  Use it.
Call suppress_warning.
(c_parser_declaration_or_fndef): Pass d down to c_parser_initializer.
(c_parser_omp_declare_reduction): Pass omp_priv down to
c_parser_initializer.

gcc/cp/ChangeLog:

* decl.cc (cp_finish_decl): Call suppress_warning.

gcc/testsuite/ChangeLog:

* c-c++-common/Winit-self1.c: New test.
* c-c++-common/Winit-self2.c: New test.
---
 gcc/c-family/c-gimplify.cc   | 12 -
 gcc/c/c-parser.cc  

[committed] d: Fix ICE in in add_stack_var, at cfgexpand.cc:476 (PR106555)

2022-08-08 Thread Iain Buclaw via Gcc-patches
Hi,

This patch fixes the ICE reported in PR d/106555.

The type that triggers the ICE never got completed by the semantic
analysis pass.  Checking for size forces it to be done, or issue a
compile-time error.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32,
committed to mainline, and backported to the releases/gcc-12 branch.

Regards,
Iain.

---
PR d/106555

gcc/d/ChangeLog:

* d-target.cc (Target::isReturnOnStack): Check for return type size.

gcc/testsuite/ChangeLog:

* gdc.dg/imports/pr106555.d: New test.
* gdc.dg/pr106555.d: New test.
---
 gcc/d/d-target.cc   |  2 ++
 gcc/testsuite/gdc.dg/imports/pr106555.d | 10 ++
 gcc/testsuite/gdc.dg/pr106555.d |  4 
 3 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gdc.dg/imports/pr106555.d
 create mode 100644 gcc/testsuite/gdc.dg/pr106555.d

diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc
index 610be74ad48..d4350e593e4 100644
--- a/gcc/d/d-target.cc
+++ b/gcc/d/d-target.cc
@@ -464,6 +464,8 @@ Target::isReturnOnStack (TypeFunction *tf, bool)
 return false;
 
   Type *tn = tf->next->toBasetype ();
+  if (tn->size () == SIZE_INVALID)
+return false;
 
   return (tn->ty == TY::Tstruct || tn->ty == TY::Tsarray);
 }
diff --git a/gcc/testsuite/gdc.dg/imports/pr106555.d 
b/gcc/testsuite/gdc.dg/imports/pr106555.d
new file mode 100644
index 000..0d3ab6bb747
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/imports/pr106555.d
@@ -0,0 +1,10 @@
+module imports.pr106555;
+struct S106555
+{
+int[] f106555;
+int max106555;
+this(int)
+{
+f106555.length = max106555;
+}
+}
diff --git a/gcc/testsuite/gdc.dg/pr106555.d b/gcc/testsuite/gdc.dg/pr106555.d
new file mode 100644
index 000..7b40f3c097b
--- /dev/null
+++ b/gcc/testsuite/gdc.dg/pr106555.d
@@ -0,0 +1,4 @@
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106555
+// { dg-do compile }
+// { dg-additional-options "-O2" }
+// { dg-additional-sources "imports/pr106555.d" }
-- 
2.34.1



Re: [PATCH][_GLIBCXX_DEBUG] Refine singular iterator state

2022-08-08 Thread François Dumont via Gcc-patches

On 08/08/22 15:19, Jonathan Wakely wrote:

On Mon, 8 Aug 2022 at 06:07, François Dumont via Libstdc++
 wrote:

Another version of this patch with just a new test case showing what
wrong code was unnoticed previously by the _GLIBCXX_DEBUG mode.

On 04/08/22 22:56, François Dumont wrote:

This an old patch I had prepared a long time ago, I don't think I ever
submitted it.

 libstdc++: [_GLIBCXX_DEBUG] Do not consider detached iterators as
value-initialized

 An attach iterator has its _M_version set to something != 0. This
value shall be preserved
 when detaching it so that the iterator does not look like a value
initialized one.

 libstdc++-v3/ChangeLog:

 * include/debug/formatter.h (__singular_value_init): New
_Iterator_state enum entry.
 (_Parameter<>(const _Safe_iterator<>&, const char*,
_Is_iterator)): Check if iterator
 parameter is value-initialized.
 (_Parameter<>(const _Safe_local_iterator<>&, const char*,
_Is_iterator)): Likewise.
 * include/debug/safe_iterator.h
(_Safe_iterator<>::_M_value_initialized()): New. Adapt
 checks.
 * include/debug/safe_local_iterator.h
(_Safe_local_iterator<>::_M_value_initialized()): New.
 Adapt checks.
 * src/c++11/debug.cc (_Safe_iterator_base::_M_reset): Do
not reset _M_version.
 (print_field(PrintContext&, const _Parameter&, const
char*)): Adapt state_names.
 * testsuite/23_containers/deque/debug/iterator1_neg.cc:
New test.
 * testsuite/23_containers/deque/debug/iterator2_neg.cc:
New test.
 *
testsuite/23_containers/forward_list/debug/iterator1_neg.cc: New test.
 *
testsuite/23_containers/forward_list/debug/iterator2_neg.cc: New test.

Tested under Linux x86_64 _GLIBCXX_DEBUG mode.

Ok to commit ?

François



diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc
index 4706defedf1..cf8e6f48081 100644
--- a/libstdc++-v3/src/c++11/debug.cc
+++ b/libstdc++-v3/src/c++11/debug.cc
@@ -426,7 +426,8 @@ namespace __gnu_debug
   _M_reset() throw ()
   {
 __atomic_store_n(&_M_sequence, (_Safe_sequence_base*)0, __ATOMIC_RELEASE);
-_M_version = 0;
+// Detach iterator shall not look like a value-initialized one.
+// _M_version = 0;
 _M_prior = 0;
 _M_next = 0;
   }

I think this would be clearer as "Do not reset version, so that a
detached iterator does not look like a value-initialized one."


+// { dg-do run { xfail *-*-* } }
+// { dg-require-debug-mode "" }
+
+#include 
+
+#include 
+
+void test01()
+{
+  typedef typename std::deque::iterator It;
+  std::deque dq;
+  dq.push_back(1);
+
+  It it = It();
+  VERIFY( dq.begin() != it );

Is there any reason to use VERIFY here?

Only make sure the compiler do not optimize this check away.


We're expecting the comparison to abort in the debug mode checks,
right? Which would happen if we just do:

(void) dq.begin() == it;


I guess this (void) cast is doing the same so adopted.


Using VERIFY just makes it look like we're expecting the test to be
XFAIL because the assertion will fail, but that's not what is being
tested.

OK for trunk with those changes, thanks.


Updated committed patch attached.

diff --git a/libstdc++-v3/include/debug/formatter.h b/libstdc++-v3/include/debug/formatter.h
index 80e8ba46d1e..748d4fbfea4 100644
--- a/libstdc++-v3/include/debug/formatter.h
+++ b/libstdc++-v3/include/debug/formatter.h
@@ -185,6 +185,7 @@ namespace __gnu_debug
   __rbegin,		// dereferenceable, and at the reverse-beginning
   __rmiddle,	// reverse-dereferenceable, not at the reverse-beginning
   __rend,		// reverse-past-the-end
+  __singular_value_init,	// singular, value initialized
   __last_state
 };
 
@@ -280,7 +281,12 @@ namespace __gnu_debug
 	  _M_variant._M_iterator._M_seq_type = _GLIBCXX_TYPEID(_Sequence);
 
 	  if (__it._M_singular())
-	_M_variant._M_iterator._M_state = __singular;
+	{
+	  if (__it._M_value_initialized())
+		_M_variant._M_iterator._M_state = __singular_value_init;
+	  else
+		_M_variant._M_iterator._M_state = __singular;
+	}
 	  else
 	{
 	  if (__it._M_is_before_begin())
@@ -308,7 +314,12 @@ namespace __gnu_debug
 	  _M_variant._M_iterator._M_seq_type = _GLIBCXX_TYPEID(_Sequence);
 
 	  if (__it._M_singular())
-	_M_variant._M_iterator._M_state = __singular;
+	{
+	  if (__it._M_value_initialized())
+		_M_variant._M_iterator._M_state = __singular_value_init;
+	  else
+		_M_variant._M_iterator._M_state = __singular;
+	}
 	  else
 	{
 	  if (__it._M_is_end())
diff --git a/libstdc++-v3/include/debug/safe_iterator.h b/libstdc++-v3/include/debug/safe_iterator.h
index d613933e236..33f7a86478a 100644
--- a/libstdc++-v3/include/debug/safe_iterator.h
+++ b/libstdc++-v3/include/debug/safe_iterator.h
@@ -41,8 +41,8 @@
 
 #define _GLIBCXX_DEBUG_VERIFY_OPERANDS(_Lhs, _Rhs, _BadMsgId, _DiffMsgId) \
   

Re: [PATCH] Add warning options -W[no-]compare-distinct-pointer-types

2022-08-08 Thread Joseph Myers
On Fri, 5 Aug 2022, Jose E. Marchesi via Gcc-patches wrote:

> +Wcompare-distinct-pointer-types
> +C C++ Var(warn_compare_distinct_pointer_types) Warning Init(1)
> +Warn if pointers of distinct types are compared without a cast.

There's no implementation for C++ in this patch, so the option shouldn't 
be supported for C++ in c.opt.  However, C options are normally supported 
for Objective-C; unless you have a specific reason why Objective-C support 
for this option would be a bad idea, "C ObjC" would be appropriate for the 
languages.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH] Teach vectorizer to deal with bitfield accesses (was: [RFC] Teach vectorizer to deal with bitfield reads)

2022-08-08 Thread Andre Vieira (lists) via Gcc-patches

Hi,

So I've changed the approach from the RFC as suggested, moving the 
bitfield lowering to the if-convert pass.


So to reiterate, ifcvt will lower COMPONENT_REF's with DECL_BIT_FIELD 
field's to either BIT_FIELD_REF if they are reads or BIT_INSERT_EXPR if 
they are writes, using loads and writes of 'representatives' that are 
big enough to contain the bitfield value.


In vect_recog I added two patterns to replace these BIT_FIELD_REF and 
BIT_INSERT_EXPR with shift's and masks as appropriate.


I'd like to see if it was possible to remove the 'load' part of a 
BIT_INSERT_EXPR if the representative write didn't change any relevant 
bits.  For example:


struct s{
int dont_care;
char a : 3;
};

s.a = ;

Should not require a load & write cycle, in fact it wouldn't even 
require any masking either. Though to achieve this we'd need to make 
sure the representative didn't overlap with any other field. Any 
suggestions on how to do this would be great, though I don't think we 
need to wait for that, as that's merely a nice-to-have optimization I guess?


I am not sure where I should 'document' this change of behavior to 
ifcvt, and/or we should change the name of the pass, since it's doing 
more than if-conversion now?


Bootstrapped and regression tested this patch on aarch64-none-linux-gnu.

gcc/ChangeLog:
2022-08-08  Andre Vieira  

    * tree-if-conv.cc (includes): Add expr.h and langhooks.h to 
list of includes.

    (need_to_lower_bitfields): New static bool.
    (need_to_ifcvt): Likewise.
    (version_loop_for_if_conversion): Adapt to work for bitfield 
lowering-only path.

    (bitfield_data_t): New typedef.
    (get_bitfield_data): New function.
    (lower_bitfield): New function.
    (bitfields_to_lower_p): New function.
    (tree_if_conversion): Change to lower-bitfields too.
    * tree-vect-data-refs.cc (vect_find_stmt_data_reference): 
Modify dump message to be more accurate.

    * tree-vect-patterns.cc (includes): Add gimplify-me.h include.
    (vect_recog_bitfield_ref_pattern): New function.
    (vect_recog_bit_insert_pattern): New function.
    (vect_vect_recog_func_ptrs): Add two new patterns.

gcc/testsuite/ChangeLog:
2022-08-08  Andre Vieira  

    * gcc.dg/vect/vect-bitfield-read-1.c: New test.
    * gcc.dg/vect/vect-bitfield-read-2.c: New test.
    * gcc.dg/vect/vect-bitfield-read-3.c: New test.
    * gcc.dg/vect/vect-bitfield-read-4.c: New test.
    * gcc.dg/vect/vect-bitfield-write-1.c: New test.
    * gcc.dg/vect/vect-bitfield-write-2.c: New test.
    * gcc.dg/vect/vect-bitfield-write-3.c: New test.

Kind regards,
Andrediff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
new file mode 100644
index 
..01cf34fb44484ca926ca5de99eef76dd99b69e92
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-1.c
@@ -0,0 +1,40 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s { int i : 31; };
+
+#define ELT0 {0}
+#define ELT1 {1}
+#define ELT2 {2}
+#define ELT3 {3}
+#define N 32
+#define RES 48
+struct s A[N]
+  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+int res = 0;
+for (int i = 0; i < n; ++i)
+  res += ptr[i].i;
+return res;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  if (f([0], N) != RES)
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c 
b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
new file mode 100644
index 
..1a4a1579c1478b9407ad21b19e8fbdca9f674b42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-bitfield-read-2.c
@@ -0,0 +1,43 @@
+/* { dg-require-effective-target vect_int } */
+
+#include 
+#include "tree-vect.h"
+
+extern void abort(void);
+
+struct s {
+unsigned i : 31;
+char a : 4;
+};
+
+#define N 32
+#define ELT0 {0x7FFFUL, 0}
+#define ELT1 {0x7FFFUL, 1}
+#define ELT2 {0x7FFFUL, 2}
+#define ELT3 {0x7FFFUL, 3}
+#define RES 48
+struct s A[N]
+  = { ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3,
+  ELT0, ELT1, ELT2, ELT3, ELT0, ELT1, ELT2, ELT3};
+
+int __attribute__ ((noipa))
+f(struct s *ptr, unsigned n) {
+int res = 0;
+for (int i = 0; i < n; ++i)
+  res += ptr[i].a;
+return res;
+}
+
+int main (void)
+{
+  check_vect ();
+
+  if (f([0], N) != RES)
+abort ();
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 

Re: [PATCH v4 2/2] preprocessor/106426: Treat u8 character literals as unsigned in char8_t modes.

2022-08-08 Thread Tom Honermann via Gcc-patches

On 8/2/22 6:14 PM, Joseph Myers wrote:

On Tue, 2 Aug 2022, Tom Honermann via Gcc-patches wrote:


This patch corrects handling of UTF-8 character literals in preprocessing
directives so that they are treated as unsigned types in char8_t enabled
C++ modes (C++17 with -fchar8_t or C++20 without -fno-char8_t). Previously,
UTF-8 character literals were always treated as having the same type as
ordinary character literals (signed or unsigned dependent on target or use
of the -fsigned-char or -funsigned char options).

OK in the absence of C++ maintainer objections within 72 hours.  (This is
the case where, when I added support for such literals for C (commit
7c5890cc0a0ecea0e88cc39e9fba6385fb579e61), I raised the question of
whether they should be unsigned in the preprocessor for C++ as well.)


Joseph, would you be so kind as to commit this patch series for me? I 
don't have commit access. Thank you in advance!


Tom.



Re: [PATCH] Add _GLIBCXX_DEBUG backtrace generation

2022-08-08 Thread Jonathan Wakely via Gcc-patches
On Wed, 13 Jul 2022 at 18:28, François Dumont via Libstdc++
 wrote:
>
> libstdc++: [_GLIBCXX_DEBUG] Add backtrace generation on demand
>
>Add _GLIBCXX_DEBUG_BACKTRACE macro to activate backtrace generation
> on _GLIBCXX_DEBUG assertions. Prerequisite is to have configure the lib
> with:
>
>--enable-libstdcxx-backtrace=yes
>
>libstdc++-v3/ChangeLog:
>
>* include/debug/formatter.h
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_state): Declare.
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_create_state): Declare.
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full_callback): Define.
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_error_callback): Define.
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full_func): Define.
>[_GLIBCXX_HAVE_STACKTRACE](__glibcxx_backtrace_full): Declare.
>[_GLIBCXX_HAVE_STACKTRACE](_Error_formatter::_M_backtrace_state): New.
>[_GLIBCXX_HAVE_STACKTRACE](_Error_formatter::_M_backtrace_full): New.
>* src/c++11/debug.cc (pretty_print): Rename into...
>(print_function): ...that.

This does more than just rename it, what are the other changes for?


>[_GLIBCXX_HAVE_STACKTRACE](print_backtrace): New.
>(_Error_formatter::_M_error()): Adapt.
>* src/libbacktrace/Makefile.am: Add backtrace.c.
>* src/libbacktrace/Makefile.in: Regenerate.
>* src/libbacktrace/backtrace-rename.h (backtrace_full): New.
>* testsuite/23_containers/vector/debug/assign4_neg.cc: Add backtrace
>  generation.
>* doc/xml/manual/debug_mode.xml: Document _GLIBCXX_DEBUG_BACKTRACE.
>* doc/xml/manual/using.xml: Likewise.
>
> Tested under Linux x86_64 normal and _GLIBCXX_DEBUG modes.
>
> Ok to commit ?


>--- a/libstdc++-v3/testsuite/23_containers/vector/debug/assign4_neg.cc
>+++ b/libstdc++-v3/testsuite/23_containers/vector/debug/assign4_neg.cc
>@@ -16,6 +16,7 @@
> // .
> //
> // { dg-do run { xfail *-*-* } }
>+// { dg-options "-D_GLIBCXX_DEBUG_BACKTRACE -lstdc++_libbacktrace" }
>
> #include 
> #include 

This will fail to link if the static lib isn't available.



Re: [PATCH][_GLIBCXX_DEBUG] Refine singular iterator state

2022-08-08 Thread Jonathan Wakely via Gcc-patches
On Mon, 8 Aug 2022 at 06:07, François Dumont via Libstdc++
 wrote:
>
> Another version of this patch with just a new test case showing what
> wrong code was unnoticed previously by the _GLIBCXX_DEBUG mode.
>
> On 04/08/22 22:56, François Dumont wrote:
> > This an old patch I had prepared a long time ago, I don't think I ever
> > submitted it.
> >
> > libstdc++: [_GLIBCXX_DEBUG] Do not consider detached iterators as
> > value-initialized
> >
> > An attach iterator has its _M_version set to something != 0. This
> > value shall be preserved
> > when detaching it so that the iterator does not look like a value
> > initialized one.
> >
> > libstdc++-v3/ChangeLog:
> >
> > * include/debug/formatter.h (__singular_value_init): New
> > _Iterator_state enum entry.
> > (_Parameter<>(const _Safe_iterator<>&, const char*,
> > _Is_iterator)): Check if iterator
> > parameter is value-initialized.
> > (_Parameter<>(const _Safe_local_iterator<>&, const char*,
> > _Is_iterator)): Likewise.
> > * include/debug/safe_iterator.h
> > (_Safe_iterator<>::_M_value_initialized()): New. Adapt
> > checks.
> > * include/debug/safe_local_iterator.h
> > (_Safe_local_iterator<>::_M_value_initialized()): New.
> > Adapt checks.
> > * src/c++11/debug.cc (_Safe_iterator_base::_M_reset): Do
> > not reset _M_version.
> > (print_field(PrintContext&, const _Parameter&, const
> > char*)): Adapt state_names.
> > * testsuite/23_containers/deque/debug/iterator1_neg.cc:
> > New test.
> > * testsuite/23_containers/deque/debug/iterator2_neg.cc:
> > New test.
> > *
> > testsuite/23_containers/forward_list/debug/iterator1_neg.cc: New test.
> > *
> > testsuite/23_containers/forward_list/debug/iterator2_neg.cc: New test.
> >
> > Tested under Linux x86_64 _GLIBCXX_DEBUG mode.
> >
> > Ok to commit ?
> >
> > François


>diff --git a/libstdc++-v3/src/c++11/debug.cc b/libstdc++-v3/src/c++11/debug.cc
>index 4706defedf1..cf8e6f48081 100644
>--- a/libstdc++-v3/src/c++11/debug.cc
>+++ b/libstdc++-v3/src/c++11/debug.cc
>@@ -426,7 +426,8 @@ namespace __gnu_debug
>   _M_reset() throw ()
>   {
> __atomic_store_n(&_M_sequence, (_Safe_sequence_base*)0, __ATOMIC_RELEASE);
>-_M_version = 0;
>+// Detach iterator shall not look like a value-initialized one.
>+// _M_version = 0;
> _M_prior = 0;
> _M_next = 0;
>   }

I think this would be clearer as "Do not reset version, so that a
detached iterator does not look like a value-initialized one."

>+// { dg-do run { xfail *-*-* } }
>+// { dg-require-debug-mode "" }
>+
>+#include 
>+
>+#include 
>+
>+void test01()
>+{
>+  typedef typename std::deque::iterator It;
>+  std::deque dq;
>+  dq.push_back(1);
>+
>+  It it = It();
>+  VERIFY( dq.begin() != it );

Is there any reason to use VERIFY here?

We're expecting the comparison to abort in the debug mode checks,
right? Which would happen if we just do:

(void) dq.begin() == it;

Using VERIFY just makes it look like we're expecting the test to be
XFAIL because the assertion will fail, but that's not what is being
tested.

OK for trunk with those changes, thanks.



RE: [PATCH 2/2][AArch32] Fix 128-bit sequential consistency atomic operations.

2022-08-08 Thread Kyrylo Tkachov via Gcc-patches


> -Original Message-
> From: Tamar Christina 
> Sent: Wednesday, June 8, 2022 3:50 PM
> To: gcc-patches@gcc.gnu.org
> Cc: nd ; Ramana Radhakrishnan
> ; Richard Earnshaw
> ; ni...@redhat.com; Kyrylo Tkachov
> 
> Subject: [PATCH 2/2][AArch32] Fix 128-bit sequential consistency atomic
> operations.
> 
> Hi All,
> 
> Similar to AArch64 the Arm implementation of 128-bit atomics is broken.
> 
> For 128-bit atomics we rely on pthread barriers to correct guard the address
> in the pointer to get correct memory ordering.  However for 128-bit atomics
> the
> address under the lock is different from the original pointer.
> 
> This means that one of the values under the atomic operation is not
> protected
> properly and so we fail during when the user has requested sequential
> consistency as there's no barrier to enforce this requirement.
> 
> As such users have resorted to adding an
> 
> #ifdef GCC
> 
> #endif
> 
> around the use of these atomics.
> 
> This corrects the issue by issuing a barrier only when __ATOMIC_SEQ_CST
> was
> requested.  I have hand verified that the barriers are inserted
> for atomic seq cst.
> 
> 
> Bootstrapped Regtested on arm-none-linux-gnueabihf and no issues.
> 
> Ok for master? and for backporting to GCC 12, 11 and 10?

Ok, with backports after a couple weeks on master.
Thanks,
Kyrill

> 
> Thanks,
> Tamar
> 
> libatomic/ChangeLog:
> 
>   PR target/102218
>   * config/arm/host-config.h (pre_seq_barrier, post_seq_barrier,
>   pre_post_seq_barrier): Require barrier on __ATOMIC_SEQ_CST.
> 
> --- inline copy of patch --
> diff --git a/libatomic/config/arm/host-config.h b/libatomic/config/arm/host-
> config.h
> index
> bbf4a3f84c3f3ae21fb2162aab68bdedf3fbdcb4..ef16fad2a35ec9055e918849e
> 69a1a0e23b62838 100644
> --- a/libatomic/config/arm/host-config.h
> +++ b/libatomic/config/arm/host-config.h
> @@ -1,4 +1,23 @@
>  /* Avoiding the DMB (or kernel helper) can be a good thing.  */
>  #define WANT_SPECIALCASE_RELAXED
> 
> +/* Glibc, at least, uses acq_rel in its pthread mutex
> +   implementation.  If the user is asking for seq_cst,
> +   this is insufficient.  */
> +
> +static inline void __attribute__((always_inline, artificial))
> +pre_seq_barrier(int model)
> +{
> +  if (model == __ATOMIC_SEQ_CST)
> +__atomic_thread_fence (__ATOMIC_SEQ_CST);
> +}
> +
> +static inline void __attribute__((always_inline, artificial))
> +post_seq_barrier(int model)
> +{
> +  pre_seq_barrier(model);
> +}
> +
> +#define pre_post_seq_barrier 1
> +
>  #include_next 
> 
> 
> 
> 
> --


RE: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic operations.

2022-08-08 Thread Kyrylo Tkachov via Gcc-patches


> -Original Message-
> From: Tamar Christina 
> Sent: Monday, August 8, 2022 10:28 AM
> To: Kyrylo Tkachov ; gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Richard Sandiford
> 
> Subject: RE: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic
> operations.
> 
> 
> > -Original Message-
> > From: Kyrylo Tkachov 
> > Sent: Tuesday, July 12, 2022 2:46 PM
> > To: Tamar Christina ; gcc-
> patc...@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Richard Sandiford
> > 
> > Subject: RE: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic
> > operations.
> >
> > Hi Tamar,
> >
> > Let me be the latest to offer my apologies for the slow review.
> >
> > > -Original Message-
> > > From: Tamar Christina 
> > > Sent: Wednesday, June 8, 2022 3:49 PM
> > > To: gcc-patches@gcc.gnu.org
> > > Cc: nd ; Richard Earnshaw
> ;
> > > Marcus Shawcroft ; Kyrylo Tkachov
> > > ; Richard Sandiford
> > > 
> > > Subject: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic
> > > operations.
> > >
> > > Hi All,
> > >
> > > The AArch64 implementation of 128-bit atomics is broken.
> > >
> > > For 128-bit atomics we rely on pthread barriers to correct guard the
> > > address in the pointer to get correct memory ordering.  However for
> > > 128-bit atomics the address under the lock is different from the
> > > original pointer.
> > >
> > > This means that one of the values under the atomic operation is not
> > > protected properly and so we fail during when the user has requested
> > > sequential consistency as there's no barrier to enforce this
> > > requirement.
> > >
> > > As such users have resorted to adding an
> > >
> > > #ifdef GCC
> > > 
> > > #endif
> > >
> > > around the use of these atomics.
> > >
> > > This corrects the issue by issuing a barrier only when
> > > __ATOMIC_SEQ_CST was requested.  To remedy this performance hit I
> > > think we should revisit using a similar approach to out-line-atomics
> > > for the 128-bit atomics.
> > >
> > > Note that I believe I need the empty file due to the include_next
> > > chain but I am not entirely sure.  I have hand verified that the
> > > barriers are inserted for atomic seq cst.
> > >
> > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> > >
> > > Ok for master? and for backporting to GCC 12, 11 and 10?
> >
> > I'll admit I'm not too familiar with the mechanics of libatomic but...
> >
> > >
> > > Thanks,
> > > Tamar
> > >
> > > libatomic/ChangeLog:
> > >
> > >   PR target/102218
> > >   * config/aarch64/aarch64-config.h: New file.
> > >   * config/aarch64/host-config.h: New file.
> > >
> > > --- inline copy of patch --
> > > diff --git a/libatomic/config/aarch64/aarch64-config.h
> > > b/libatomic/config/aarch64/aarch64-config.h
> > > new file mode 100644
> > > index
> > >
> ..d3474fa8ff80cb0c3ddbf8c4
> > > 8acd931d2339d33d
> > > --- /dev/null
> > > +++ b/libatomic/config/aarch64/aarch64-config.h
> > > @@ -0,0 +1,23 @@
> > > +/* Copyright (C) 2022 Free Software Foundation, Inc.
> > > +
> > > +   This file is part of the GNU Atomic Library (libatomic).
> > > +
> > > +   Libatomic is free software; you can redistribute it and/or modify it
> > > +   under the terms of the GNU General Public License as published by
> > > +   the Free Software Foundation; either version 3 of the License, or
> > > +   (at your option) any later version.
> > > +
> > > +   Libatomic is distributed in the hope that it will be useful, but
> > > + WITHOUT
> > > ANY
> > > +   WARRANTY; without even the implied warranty of MERCHANTABILITY
> or
> > > FITNESS
> > > +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > > +   more details.
> > > +
> > > +   Under Section 7 of GPL version 3, you are granted additional
> > > +   permissions described in the GCC Runtime Library Exception, version
> > > +   3.1, as published by the Free Software Foundation.
> > > +
> > > +   You should have received a copy of the GNU General Public License
> and
> > > +   a copy of the GCC Runtime Library Exception along with this program;
> > > +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not,
> see
> > > +   .  */
> > > +
> > > diff --git a/libatomic/config/aarch64/host-config.h
> > > b/libatomic/config/aarch64/host-config.h
> > > new file mode 100644
> > > index
> > >
> ..f445a47d25ef5cc51cd21670
> > > 69500245d07bf1bc
> > > --- /dev/null
> > > +++ b/libatomic/config/aarch64/host-config.h
> > > @@ -0,0 +1,46 @@
> > > +/* Copyright (C) 2022 Free Software Foundation, Inc.
> > > +
> > > +   This file is part of the GNU Atomic Library (libatomic).
> > > +
> > > +   Libatomic is free software; you can redistribute it and/or modify it
> > > +   under the terms of the GNU General Public License as published by
> > > +   the Free Software Foundation; either version 3 of the License, 

Re: [PATCH 4/4][RFC] VLA Constructor

2022-08-08 Thread Richard Biener via Gcc-patches
On Fri, Aug 5, 2022 at 2:59 PM Andre Vieira (lists) via Gcc-patches
 wrote:
>
> This isn't really a 'PATCH' yet, it's something I was working on but had
> to put on hold. Feel free to re-use any bits or trash all of it if you'd
> like.

@@ -10264,6 +10264,44 @@ expand_expr_real_2 (sepops ops, rtx target,
machine_mode tmode,

 case VEC_PERM_EXPR:
   {
+   if (TREE_CODE (treeop2) == VECTOR_CST
+   && targetm.vectorize.vla_constructor)
+ {
+   tree ctor0, ctor1;
+   if (TREE_CODE (treeop0) == SSA_NAME
+   && is_gimple_assign (SSA_NAME_DEF_STMT (treeop0)))
+ ctor0 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (treeop0));
+   else
+ ctor0 = treeop0;
+   if (TREE_CODE (treeop1) == SSA_NAME
+   && is_gimple_assign (SSA_NAME_DEF_STMT (treeop1)))
+ ctor1 = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (treeop1));

just to say - you can't lookup things like this, you have to go through the TER
machinery, otherwise the expansions for the CTOR elements might be
clobbered already.  That means to be fully effective doing this during RTL
expansion is likely limited.


Re: [PATCH] PR tree-optimization/64992: (B << 2) != 0 is B when B is Boolean.

2022-08-08 Thread Richard Biener via Gcc-patches
On Mon, Aug 8, 2022 at 11:06 AM Roger Sayle  wrote:
>
>
> This patch resolves both PR tree-optimization/64992 and PR
> tree-optimization/98956 which are missed optimization enhancement
> request, for which Andrew Pinski already has a proposed solution
> (related to a fix for PR tree-optimization/98954).  Yesterday,
> I proposed an alternate improved patch for PR98954, which although
> superior in most respects, alas didn't address this case [which
> doesn't include a BIT_AND_EXPR], hence this follow-up fix.
>
> For many functions, F(B), of a (zero-one) Boolean value B, the
> expression F(B) != 0 can often be simplified to just B.  Hence
> "(B * 5) != 0" is B, "-B != 0" is B, "bswap(B) != 0" is B,
> "(B >>r 3) != 0" is B.  These are all currently optimized by GCC,
> with the strange exception of left shifts by a constant (possibly
> due to the undefined/implementation defined behaviour when the
> shift constant is larger than the first operand's precision).
> This patch adds support for this particular case, when the shift
> constant is valid.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?

+/* (X << C) != 0 can be simplified to X, when X is zero_one_valued_p.  */
+(simplify
+  (ne (lshift zero_one_valued_p@0 INTEGER_CST@1) integer_zerop@2)
+  (if (tree_fits_shwi_p (@1)
+   && tree_to_shwi (@1) > 0
+   && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0)))
+(convert @0)))

while we deliberately do not fold int << 34 since the result is undefined
there is IMHO no reason to not fold the above for any (even non-constant)
shift value.  We have guards with TYPE_OVERFLOW_SANITIZED in
some cases but I think that's not appropriate here, there's one
flag_sanitize check, maybe there's a special bit for SHIFT overflow we
can use.  Why is (X << 0) != 0 excempt in the condition?

>
> 2022-08-08  Roger Sayle  
>
> gcc/ChangeLog
> PR tree-optimization/64992
> PR tree-optimization/98956
> * match.pd (ne (lshift @0 @1) 0): Simplify (X << C) != 0 to X
> when X is zero_one_valued_p and the shift constant C is valid.
> (eq (lshift @0 @1) 0): Likewise, simplify (X << C) == 0 to !X
> when X is zero_one_valued_p and the shift constant C is valid.
>
> gcc/testsuite/ChangeLog
> PR tree-optimization/64992
> * gcc.dg/pr64992.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH] PR tree-optimization/71343: Optimize (X<

2022-08-08 Thread Richard Biener via Gcc-patches
On Mon, Aug 8, 2022 at 10:07 AM Roger Sayle  wrote:
>
>
> This patch resolves PR tree-optimization/71343, a missed-optimization
> enhancement request where GCC fails to see that (a<<2)+(b<<2) == a*4+b*4.
> This requires two related (sets of) optimizations to be added to match.pd.
>
> The first is that (X< for many binary operators, including AND, IOR, XOR, and (if overflow
> isn't an issue) PLUS and MINUS.  Likewise, the right shifts (both logical
> and arithmetic) and bit-wise logical operators can be simplified in a
> similar fashion.  These all reduce the number of GIMPLE binary operations
> from 3 to 2, by combining/eliminating a shift operation.
>
> The second optimization reflects that the middle-end doesn't impose a
> canonical form on multiplications by powers of two, vs. left shifts,
> instead leaving these operations as specified by the programmer
> unless there's a good reason to change them.  Hence, GIMPLE code may
> contain the expressions "X * 8" and "X << 3" even though these represent
> the same value/computation.  The tweak to match.pd is that comparison
> operations whose operands are equivalent non-canonical expressions can
> be taught their equivalence.  Hence "(X * 8) == (X << 3)" will always
> evaluate to true, and "(X<<2) > 4*X" will always evaluate to false.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok for mainline?

+/* Shifts by constants distribute over several binary operations,
+   hence (X << C) + (Y << C) can be simplified to (X + Y) << C.  */
+(for op (plus minus)
+  (simplify
+(op (lshift:s @0 INTEGER_CST@1) (lshift:s @2 INTEGER_CST@1))
+(if (INTEGRAL_TYPE_P (type)
+&& TYPE_OVERFLOW_WRAPS (type)
+&& !TYPE_SATURATING (type)
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION (type))

I do wonder why we need to restrict this to shifts by constants?
Any out-of-bound shift was already there, no?

+/* Some tree expressions are intentionally non-canonical.
+   We handle the comparison of the equivalent forms here.  */
+(for cmp (eq le ge)
+  (simplify
+(cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+&& wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+  { constant_boolean_node (true, type); })))
+
+(for cmp (ne lt gt)
+  (simplify
+(cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+&& wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+  { constant_boolean_node (false, type); })))

hmm.  I wonder if it makes more sense to handle this in value-numbering.
tree-ssa-sccvn.cc:visit_nary_op handles some cases that are not
exactly canonicalization issues but the shift vs mult could be handled
there by just performing the alternate lookup.  That would also enable
CSE and by means of that of course the comparisons you do above.

Thanks,
Richard.

>
> 2022-08-08  Roger Sayle  
>
> gcc/ChangeLog
> PR tree-optimization/71343
> * match.pd (op (lshift @0 @1) (lshift @2 @1)): Optimize the
> expression (X< (op (rshift @0 @1) (rshift @2 @1)): Likwise, simplify (X>>C)^(Y>>C)
> to (X^Y)>>C for binary logical operators, AND, IOR and XOR.
> (cmp:c (lshift @0) (mult @1)): Optimize comparisons between
> shifts by integer constants and multiplications by powers of 2.
>
> gcc/testsuite/ChangeLog
> PR tree-optimization/71343
> * gcc.dg/pr71343-1.c: New test case.
> * gcc.dg/pr71343-2.c: Likewise.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH] middle-end: Optimize ((X >> C1) & C2) != C3 for more cases.

2022-08-08 Thread Richard Biener via Gcc-patches
On Sun, Aug 7, 2022 at 9:08 PM Roger Sayle  wrote:
>
>
> Following my middle-end patch for PR tree-optimization/94026, I'd promised
> Jeff Law that I'd clean up the dead-code in fold-const.cc now that these
> optimizations are handled in match.pd.  Alas, I discovered things aren't
> quite that simple, as the transformations I'd added avoided cases where
> C2 overlapped with the new bits introduced by the shift, but the original
> code handled any value of C2 provided that it had a single-bit set (under
> the condition that C3 was always zero).
>
> This patch upgrades the transformations supported by match.pd to cover
> any values of C2 and C3, provided that C1 is a valid bit shift constant,
> for all three shift types (logical right, arithmetic right and left).
> This then makes the code in fold-const.cc fully redundant, and adds
> support for some new (corner) cases not previously handled.  If the
> constant C1 is valid for the type's precision, the shift is now always
> eliminated (with C2 and C3 possibly updated to test the sign bit).
>
> Interestingly, the fold-const.cc code that I'm now deleting was originally
> added by me back in 2006 to resolve PR middle-end/21137.  I've confirmed
> that those testcase(s) remain resolved with this patch (and I'll close
> 21137 in Bugzilla).  This patch also implements most (but not all) of the
> examples mentioned in PR tree-optimization/98954, for which I have some
> follow-up patches.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32},
> with no new failures. Ok for mainline?

+ (with { wide_int smask = wi::arshift (sb, c1); }
+   (if ((c2 & smask) == 0)
+ (cmp (bit_and @0 { wide_int_to_tree (t0, c2 << c1); })
+  { wide_int_to_tree (t0, c3 << c1); })
+ (if ((c3 & smask) == 0)
+   (cmp (bit_and @0 { wide_int_to_tree (t0, (c2 << c1) | sb); })
+{ wide_int_to_tree (t0, c3 << c1); })
+   (if ((c2 & smask) != (c3 & smask))

you can use

   (switch
(if ((c2 & smask) == 0)
 (...)
(if ((c3 & smask) == 0)
 (..)
(if ((c2 & smask) != (c3 & smask))
 (..)))

to make this better readable (switch is basically an if else-if
else-if ... clause).

OK with that change.

Thanks,
Richard.

>
> 2022-08-07  Roger Sayle  
>
> gcc/ChangeLog
> PR middle-end/21137
> PR tree-optimization/98954
> * fold-const.cc (fold_binary_loc): Remove optimizations to
> optimize ((X >> C1) & C2) ==/!= 0.
> * match.pd (cmp (bit_and (lshift @0 @1) @2) @3): Remove wi::ctz
> check, and handle all values of INTEGER_CSTs @2 and @3.
> (cmp (bit_and (rshift @0 @1) @2) @3): Likewise, remove wi::clz
> checks, and handle all values of INTEGER_CSTs @2 and @3.
>
> gcc/testsuite/ChangeLog
> PR middle-end/21137
> PR tree-optimization/98954
> * gcc.dg/fold-eqandshift-4.c: New test case.
>
>
> Thanks in advance,
> Roger
> --
>


Re: [PATCH 10/12 V2] arm: Implement cortex-M return signing address codegen

2022-08-08 Thread Andrea Corallo via Gcc-patches
Richard Earnshaw  writes:

[...]

> +(define_insn "pac_nop"
> +  [(set (reg:SI IP_REGNUM)
> + (unspec:SI [(reg:SI SP_REGNUM) (reg:SI LR_REGNUM)]
> +   UNSPEC_PAC_NOP))]
> +  "TARGET_THUMB2"
> +  "pac\t%|ip, %|lr, %|sp"
> +  [(set_attr "length" "2")])
>
> This pattern is missing a type.

Which type do you think is missing?

> The length is also incorrect as the
> instruction is 32-bits (4 bytes).

Ack.

> Similarly for the other
> instructions below.  Also, you need to mark them as incompatible with
> conditional execution (they're constrained-unpredictable in IT
> blocks).

I guess this would translate in setting it with '(set_attr "predicable" "no")'

But isn't this already the default?

Thanks

  Andrea


RE: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic operations.

2022-08-08 Thread Tamar Christina via Gcc-patches

> -Original Message-
> From: Kyrylo Tkachov 
> Sent: Tuesday, July 12, 2022 2:46 PM
> To: Tamar Christina ; gcc-patches@gcc.gnu.org
> Cc: nd ; Richard Earnshaw ;
> Marcus Shawcroft ; Richard Sandiford
> 
> Subject: RE: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic
> operations.
> 
> Hi Tamar,
> 
> Let me be the latest to offer my apologies for the slow review.
> 
> > -Original Message-
> > From: Tamar Christina 
> > Sent: Wednesday, June 8, 2022 3:49 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: nd ; Richard Earnshaw ;
> > Marcus Shawcroft ; Kyrylo Tkachov
> > ; Richard Sandiford
> > 
> > Subject: [PATCH 1/2]AArch64 Fix 128-bit sequential consistency atomic
> > operations.
> >
> > Hi All,
> >
> > The AArch64 implementation of 128-bit atomics is broken.
> >
> > For 128-bit atomics we rely on pthread barriers to correct guard the
> > address in the pointer to get correct memory ordering.  However for
> > 128-bit atomics the address under the lock is different from the
> > original pointer.
> >
> > This means that one of the values under the atomic operation is not
> > protected properly and so we fail during when the user has requested
> > sequential consistency as there's no barrier to enforce this
> > requirement.
> >
> > As such users have resorted to adding an
> >
> > #ifdef GCC
> > 
> > #endif
> >
> > around the use of these atomics.
> >
> > This corrects the issue by issuing a barrier only when
> > __ATOMIC_SEQ_CST was requested.  To remedy this performance hit I
> > think we should revisit using a similar approach to out-line-atomics
> > for the 128-bit atomics.
> >
> > Note that I believe I need the empty file due to the include_next
> > chain but I am not entirely sure.  I have hand verified that the
> > barriers are inserted for atomic seq cst.
> >
> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
> >
> > Ok for master? and for backporting to GCC 12, 11 and 10?
> 
> I'll admit I'm not too familiar with the mechanics of libatomic but...
> 
> >
> > Thanks,
> > Tamar
> >
> > libatomic/ChangeLog:
> >
> > PR target/102218
> > * config/aarch64/aarch64-config.h: New file.
> > * config/aarch64/host-config.h: New file.
> >
> > --- inline copy of patch --
> > diff --git a/libatomic/config/aarch64/aarch64-config.h
> > b/libatomic/config/aarch64/aarch64-config.h
> > new file mode 100644
> > index
> > ..d3474fa8ff80cb0c3ddbf8c4
> > 8acd931d2339d33d
> > --- /dev/null
> > +++ b/libatomic/config/aarch64/aarch64-config.h
> > @@ -0,0 +1,23 @@
> > +/* Copyright (C) 2022 Free Software Foundation, Inc.
> > +
> > +   This file is part of the GNU Atomic Library (libatomic).
> > +
> > +   Libatomic is free software; you can redistribute it and/or modify it
> > +   under the terms of the GNU General Public License as published by
> > +   the Free Software Foundation; either version 3 of the License, or
> > +   (at your option) any later version.
> > +
> > +   Libatomic is distributed in the hope that it will be useful, but
> > + WITHOUT
> > ANY
> > +   WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > FITNESS
> > +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > +   more details.
> > +
> > +   Under Section 7 of GPL version 3, you are granted additional
> > +   permissions described in the GCC Runtime Library Exception, version
> > +   3.1, as published by the Free Software Foundation.
> > +
> > +   You should have received a copy of the GNU General Public License and
> > +   a copy of the GCC Runtime Library Exception along with this program;
> > +   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
> > +   .  */
> > +
> > diff --git a/libatomic/config/aarch64/host-config.h
> > b/libatomic/config/aarch64/host-config.h
> > new file mode 100644
> > index
> > ..f445a47d25ef5cc51cd21670
> > 69500245d07bf1bc
> > --- /dev/null
> > +++ b/libatomic/config/aarch64/host-config.h
> > @@ -0,0 +1,46 @@
> > +/* Copyright (C) 2022 Free Software Foundation, Inc.
> > +
> > +   This file is part of the GNU Atomic Library (libatomic).
> > +
> > +   Libatomic is free software; you can redistribute it and/or modify it
> > +   under the terms of the GNU General Public License as published by
> > +   the Free Software Foundation; either version 3 of the License, or
> > +   (at your option) any later version.
> > +
> > +   Libatomic is distributed in the hope that it will be useful, but
> > + WITHOUT
> > ANY
> > +   WARRANTY; without even the implied warranty of MERCHANTABILITY or
> > FITNESS
> > +   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> > +   more details.
> > +
> > +   Under Section 7 of GPL version 3, you are granted additional
> > +   permissions described in the GCC Runtime Library Exception, version
> > +   3.1, as published by the Free Software Foundation.
> > +
> > +   You 

[PATCH] lto/106540 - fix LTO tree input wrt dwarf2out_register_external_die

2022-08-08 Thread Richard Biener via Gcc-patches
I've revisited the earlier two workarounds for dwarf2out_register_external_die
getting duplicate entries.  It turns out that r11-525-g03d90a20a1afcb
added dref_queue pruning to lto_input_tree but decl reading uses that
to stream in DECL_INITIAL even when in the middle of SCC streaming.
When that SCC then gets thrown away we can end up with debug nodes
registered which isn't supposed to happen.  The following adjusts
the DECL_INITIAL streaming to go the in-SCC way, using lto_input_tree_1,
since no SCCs are expected at this point, just refs.

LTO bootstrapped and tested on x86_64-unknown-linux-gnu, pushed.

PR lto/106540
PR lto/106334
* dwarf2out.cc (dwarf2out_register_external_die): Restore
original assert.
* lto-streamer-in.cc (lto_read_tree_1): Use lto_input_tree_1
to input DECL_INITIAL, avoiding to commit drefs.
---
 gcc/dwarf2out.cc   | 7 +--
 gcc/lto-streamer-in.cc | 7 +--
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
index cfea9cf6451..e3920c898f5 100644
--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
@@ -6069,12 +6069,7 @@ dwarf2out_register_external_die (tree decl, const char 
*sym,
 
   if (!external_die_map)
 external_die_map = hash_map::create_ggc (1000);
-  /* When we do tree merging during WPA or with -flto-partition=none we
- can end up re-using GC memory as there's currently no way to unregister
- external DIEs.  Ideally we'd register them only after merging finished
- but allowing override here is easiest.  See PR106334.  */
-  gcc_checking_assert (!(in_lto_p && !flag_wpa)
-  || !external_die_map->get (decl));
+  gcc_checking_assert (!external_die_map->get (decl));
   sym_off_pair p = { IDENTIFIER_POINTER (get_identifier (sym)), off };
   external_die_map->put (decl, p);
 }
diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index fe5a4e7fe1d..a7dad70363f 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1699,11 +1699,14 @@ lto_read_tree_1 (class lto_input_block *ib, class 
data_in *data_in, tree expr)
   /* Read all the pointer fields in EXPR.  */
   streamer_read_tree_body (ib, data_in, expr);
 
-  /* Read any LTO-specific data not read by the tree streamer.  */
+  /* Read any LTO-specific data not read by the tree streamer.  Do not use
+ stream_read_tree here since that flushes the dref_queue in mids of
+ SCC reading.  */
   if (DECL_P (expr)
   && TREE_CODE (expr) != FUNCTION_DECL
   && TREE_CODE (expr) != TRANSLATION_UNIT_DECL)
-DECL_INITIAL (expr) = stream_read_tree (ib, data_in);
+DECL_INITIAL (expr)
+  = lto_input_tree_1 (ib, data_in, streamer_read_record_start (ib), 0);
 
   /* Stream references to early generated DIEs.  Keep in sync with the
  trees handled in dwarf2out_register_external_die.  */
-- 
2.35.3


[PATCH] PR tree-optimization/64992: (B << 2) != 0 is B when B is Boolean.

2022-08-08 Thread Roger Sayle

This patch resolves both PR tree-optimization/64992 and PR
tree-optimization/98956 which are missed optimization enhancement
request, for which Andrew Pinski already has a proposed solution
(related to a fix for PR tree-optimization/98954).  Yesterday,
I proposed an alternate improved patch for PR98954, which although
superior in most respects, alas didn't address this case [which
doesn't include a BIT_AND_EXPR], hence this follow-up fix.

For many functions, F(B), of a (zero-one) Boolean value B, the
expression F(B) != 0 can often be simplified to just B.  Hence
"(B * 5) != 0" is B, "-B != 0" is B, "bswap(B) != 0" is B,
"(B >>r 3) != 0" is B.  These are all currently optimized by GCC,
with the strange exception of left shifts by a constant (possibly
due to the undefined/implementation defined behaviour when the
shift constant is larger than the first operand's precision).
This patch adds support for this particular case, when the shift
constant is valid.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-08-08  Roger Sayle  

gcc/ChangeLog
PR tree-optimization/64992
PR tree-optimization/98956
* match.pd (ne (lshift @0 @1) 0): Simplify (X << C) != 0 to X
when X is zero_one_valued_p and the shift constant C is valid.
(eq (lshift @0 @1) 0): Likewise, simplify (X << C) == 0 to !X
when X is zero_one_valued_p and the shift constant C is valid.

gcc/testsuite/ChangeLog
PR tree-optimization/64992
* gcc.dg/pr64992.c: New test case.


Thanks in advance,
Roger
--

diff --git a/gcc/match.pd b/gcc/match.pd
index f82f94a..ef6d8e2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1900,6 +1900,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (@0)))
   (mult (convert @1) (convert @2
 
+/* (X << C) != 0 can be simplified to X, when X is zero_one_valued_p.  */
+(simplify
+  (ne (lshift zero_one_valued_p@0 INTEGER_CST@1) integer_zerop@2)
+  (if (tree_fits_shwi_p (@1)
+   && tree_to_shwi (@1) > 0
+   && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0)))
+(convert @0)))
+
+/* (X << C) == 0 can be simplified to X == 0, when X is zero_one_valued_p.  */
+(simplify
+  (eq (lshift zero_one_valued_p@0 INTEGER_CST@1) integer_zerop@2)
+  (if (tree_fits_shwi_p (@1)
+   && tree_to_shwi (@1) > 0
+   && tree_to_shwi (@1) < TYPE_PRECISION (TREE_TYPE (@0)))
+(eq @0 @2)))
+
 /* Convert ~ (-A) to A - 1.  */
 (simplify
  (bit_not (convert? (negate @0)))
diff --git a/gcc/testsuite/gcc.dg/pr64992.c b/gcc/testsuite/gcc.dg/pr64992.c
new file mode 100644
index 000..43fbcf7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr64992.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+_Bool foo(_Bool x) { return (x << 2) != 0; }
+_Bool bar(_Bool x) { return (x << 2) == 0; }
+
+/* { dg-final { scan-tree-dump-not " << " "optimized" } } */


Re: ICE after folding svld1rq to vec_perm_expr duing forwprop

2022-08-08 Thread Richard Biener via Gcc-patches
On Mon, Aug 1, 2022 at 5:17 AM Prathamesh Kulkarni
 wrote:
>
> On Thu, 21 Jul 2022 at 12:21, Richard Biener  
> wrote:
> >
> > On Wed, Jul 20, 2022 at 5:36 PM Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Mon, 18 Jul 2022 at 11:57, Richard Biener  
> > > wrote:
> > > >
> > > > On Fri, Jul 15, 2022 at 3:49 PM Prathamesh Kulkarni
> > > >  wrote:
> > > > >
> > > > > On Thu, 14 Jul 2022 at 17:22, Richard Sandiford
> > > > >  wrote:
> > > > > >
> > > > > > Richard Biener  writes:
> > > > > > > On Thu, Jul 14, 2022 at 9:55 AM Prathamesh Kulkarni
> > > > > > >  wrote:
> > > > > > >>
> > > > > > >> On Wed, 13 Jul 2022 at 12:22, Richard Biener 
> > > > > > >>  wrote:
> > > > > > >> >
> > > > > > >> > On Tue, Jul 12, 2022 at 9:12 PM Prathamesh Kulkarni via 
> > > > > > >> > Gcc-patches
> > > > > > >> >  wrote:
> > > > > > >> > >
> > > > > > >> > > Hi Richard,
> > > > > > >> > > For the following test:
> > > > > > >> > >
> > > > > > >> > > svint32_t f2(int a, int b, int c, int d)
> > > > > > >> > > {
> > > > > > >> > >   int32x4_t v = (int32x4_t) {a, b, c, d};
> > > > > > >> > >   return svld1rq_s32 (svptrue_b8 (), [0]);
> > > > > > >> > > }
> > > > > > >> > >
> > > > > > >> > > The compiler emits following ICE with -O3 -mcpu=generic+sve:
> > > > > > >> > > foo.c: In function ‘f2’:
> > > > > > >> > > foo.c:4:11: error: non-trivial conversion in 
> > > > > > >> > > ‘view_convert_expr’
> > > > > > >> > > 4 | svint32_t f2(int a, int b, int c, int d)
> > > > > > >> > >   |   ^~
> > > > > > >> > > svint32_t
> > > > > > >> > > __Int32x4_t
> > > > > > >> > > _7 = VIEW_CONVERT_EXPR<__Int32x4_t>(_8);
> > > > > > >> > > during GIMPLE pass: forwprop
> > > > > > >> > > dump file: foo.c.109t.forwprop2
> > > > > > >> > > foo.c:4:11: internal compiler error: verify_gimple failed
> > > > > > >> > > 0xfda04a verify_gimple_in_cfg(function*, bool)
> > > > > > >> > > ../../gcc/gcc/tree-cfg.cc:5568
> > > > > > >> > > 0xe9371f execute_function_todo
> > > > > > >> > > ../../gcc/gcc/passes.cc:2091
> > > > > > >> > > 0xe93ccb execute_todo
> > > > > > >> > > ../../gcc/gcc/passes.cc:2145
> > > > > > >> > >
> > > > > > >> > > This happens because, after folding svld1rq_s32 to 
> > > > > > >> > > vec_perm_expr, we have:
> > > > > > >> > >   int32x4_t v;
> > > > > > >> > >   __Int32x4_t _1;
> > > > > > >> > >   svint32_t _9;
> > > > > > >> > >   vector(4) int _11;
> > > > > > >> > >
> > > > > > >> > >:
> > > > > > >> > >   _1 = {a_3(D), b_4(D), c_5(D), d_6(D)};
> > > > > > >> > >   v_12 = _1;
> > > > > > >> > >   _11 = v_12;
> > > > > > >> > >   _9 = VEC_PERM_EXPR <_11, _11, { 0, 1, 2, 3, ... }>;
> > > > > > >> > >   return _9;
> > > > > > >> > >
> > > > > > >> > > During forwprop, simplify_permutation simplifies 
> > > > > > >> > > vec_perm_expr to
> > > > > > >> > > view_convert_expr,
> > > > > > >> > > and the end result becomes:
> > > > > > >> > >   svint32_t _7;
> > > > > > >> > >   __Int32x4_t _8;
> > > > > > >> > >
> > > > > > >> > > ;;   basic block 2, loop depth 0
> > > > > > >> > > ;;pred:   ENTRY
> > > > > > >> > >   _8 = {a_2(D), b_3(D), c_4(D), d_5(D)};
> > > > > > >> > >   _7 = VIEW_CONVERT_EXPR<__Int32x4_t>(_8);
> > > > > > >> > >   return _7;
> > > > > > >> > > ;;succ:   EXIT
> > > > > > >> > >
> > > > > > >> > > which causes the error duing verify_gimple since 
> > > > > > >> > > VIEW_CONVERT_EXPR
> > > > > > >> > > has incompatible types (svint32_t, int32x4_t).
> > > > > > >> > >
> > > > > > >> > > The attached patch disables simplification of VEC_PERM_EXPR
> > > > > > >> > > in simplify_permutation, if lhs and rhs have non compatible 
> > > > > > >> > > types,
> > > > > > >> > > which resolves ICE, but am not sure if it's the correct 
> > > > > > >> > > approach ?
> > > > > > >> >
> > > > > > >> > It for sure papers over the issue.  I think the error happens 
> > > > > > >> > earlier,
> > > > > > >> > the V_C_E should have been built with the type of the 
> > > > > > >> > VEC_PERM_EXPR
> > > > > > >> > which is the type of the LHS.  But then you probably run into 
> > > > > > >> > the
> > > > > > >> > different sizes ICE (VLA vs constant size).  I think for this 
> > > > > > >> > case you
> > > > > > >> > want a BIT_FIELD_REF instead of a VIEW_CONVERT_EXPR,
> > > > > > >> > selecting the "low" part of the VLA vector.
> > > > > > >> Hi Richard,
> > > > > > >> Sorry I don't quite follow. In this case, we use VEC_PERM_EXPR to
> > > > > > >> represent dup operation
> > > > > > >> from fixed width to VLA vector. I am not sure how folding it to
> > > > > > >> BIT_FIELD_REF will work.
> > > > > > >> Could you please elaborate ?
> > > > > > >>
> > > > > > >> Also, the issue doesn't seem restricted to this case.
> > > > > > >> The following test case also ICE's during forwprop:
> > > > > > >> svint32_t foo()
> > > > > > >> {
> > > > > > >>   int32x4_t v = (int32x4_t) {1, 2, 3, 4};
> > > > > > >>   svint32_t v2 = svld1rq_s32 (svptrue_b8 (), [0]);
> > > > > > >>   return v2;
> 

[PATCH] PR tree-optimization/71343: Optimize (X<

2022-08-08 Thread Roger Sayle

This patch resolves PR tree-optimization/71343, a missed-optimization
enhancement request where GCC fails to see that (a<<2)+(b<<2) == a*4+b*4.
This requires two related (sets of) optimizations to be added to match.pd.

The first is that (X< 4*X" will always evaluate to false.

This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
and make -k check, both with and without --target_board=unix{-m32},
with no new failures.  Ok for mainline?


2022-08-08  Roger Sayle  

gcc/ChangeLog
PR tree-optimization/71343
* match.pd (op (lshift @0 @1) (lshift @2 @1)): Optimize the
expression (X<>C)^(Y>>C)
to (X^Y)>>C for binary logical operators, AND, IOR and XOR.
(cmp:c (lshift @0) (mult @1)): Optimize comparisons between
shifts by integer constants and multiplications by powers of 2.

gcc/testsuite/ChangeLog
PR tree-optimization/71343
* gcc.dg/pr71343-1.c: New test case.
* gcc.dg/pr71343-2.c: Likewise.


Thanks in advance,
Roger
--

diff --git a/gcc/match.pd b/gcc/match.pd
index f82f94a..d6b9cfb 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -982,6 +982,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& tree_nop_conversion_p (type, TREE_TYPE (@1)))
(lshift @0 @2)))
 
+/* Shifts by constants distribute over several binary operations,
+   hence (X << C) + (Y << C) can be simplified to (X + Y) << C.  */
+(for op (plus minus)
+  (simplify
+(op (lshift:s @0 INTEGER_CST@1) (lshift:s @2 INTEGER_CST@1))
+(if (INTEGRAL_TYPE_P (type)
+&& TYPE_OVERFLOW_WRAPS (type)
+&& !TYPE_SATURATING (type)
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION (type))
+  (lshift (op @0 @2) @1
+
+(for op (bit_and bit_ior bit_xor)
+  (simplify
+(op (lshift:s @0 INTEGER_CST@1) (lshift:s @2 INTEGER_CST@1))
+(if (INTEGRAL_TYPE_P (type)
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION (type))
+  (lshift (op @0 @2) @1)))
+  (simplify
+(op (rshift:s @0 INTEGER_CST@1) (rshift:s @2 INTEGER_CST@1))
+(if (INTEGRAL_TYPE_P (type)
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION (type))
+  (rshift (op @0 @2) @1
+
 /* Fold (1 << (C - x)) where C = precision(type) - 1
into ((1 << C) >> x). */
 (simplify
@@ -2241,6 +2270,28 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (if (TREE_INT_CST_LOW (@1) & 1)
{ constant_boolean_node (cmp == NE_EXPR, type); })))
 
+/* Some tree expressions are intentionally non-canonical.
+   We handle the comparison of the equivalent forms here.  */
+(for cmp (eq le ge)
+  (simplify
+(cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+&& wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+  { constant_boolean_node (true, type); })))
+
+(for cmp (ne lt gt)
+  (simplify
+(cmp:c (lshift @0 INTEGER_CST@1) (mult @0 integer_pow2p@2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+&& tree_fits_shwi_p (@1)
+&& tree_to_shwi (@1) > 0
+&& tree_to_shwi (@1) < TYPE_PRECISION  (TREE_TYPE (@0))
+&& wi::to_wide (@1) == wi::exact_log2 (wi::to_wide (@2)))
+  { constant_boolean_node (false, type); })))
+
 /* Arguments on which one can call get_nonzero_bits to get the bits
possibly set.  */
 (match with_possible_nonzero_bits
diff --git a/gcc/testsuite/gcc.dg/pr71343-1.c b/gcc/testsuite/gcc.dg/pr71343-1.c
new file mode 100644
index 000..146f5fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr71343-1.c
@@ -0,0 +1,56 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+unsigned int foo_plus(unsigned int a, unsigned int b)
+{
+  return (a << 2) + (b << 2);
+}
+
+unsigned int foo_and(unsigned int a, unsigned int b)
+{
+  return (a << 2) & (b << 2);
+}
+
+unsigned int foo_ior(unsigned int a, unsigned int b)
+{
+  return (a << 2) | (b << 2);
+}
+
+unsigned int foo_xor(unsigned int a, unsigned int b)
+{
+  return (a << 2) ^ (b << 2);
+}
+
+unsigned int bar_and(unsigned int a, unsigned int b)
+{
+  return (a >> 2) & (b >> 2);
+}
+
+unsigned int bar_ior(unsigned int a, unsigned int b)
+{
+  return (a >> 2) | (b >> 2);
+}
+
+unsigned int bar_xor(unsigned int a, unsigned int b)
+{
+  return (a >> 2) ^ (b >> 2);
+}
+
+int baz_and(int a, int b)
+{
+  return (a >> 2) & (b >> 2);
+}
+
+int baz_ior(int a, int b)
+{
+  return (a >> 2) | (b >> 2);
+}
+
+int baz_xor(int a, int b)
+{
+  return (a >> 2) ^ (b >> 2);
+}
+
+/* { dg-final { scan-tree-dump-times " << " 4 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " >> " 6 "optimized" } } */
+
diff --git a/gcc/testsuite/gcc.dg/pr71343-2.c b/gcc/testsuite/gcc.dg/pr71343-2.c
new file mode 100644
index 

Re: [PATCH] Fix middle-end/103645: empty struct store not removed when using compound literal

2022-08-08 Thread Richard Biener via Gcc-patches
On Mon, Aug 8, 2022 at 5:38 AM apinski--- via Gcc-patches
 wrote:
>
> From: Andrew Pinski 
>
> For compound literals empty struct stores are not removed as they go down a
> different path of the gimplifier; trying to optimize the init constructor.
> This fixes the problem by not adding the gimple assignment at the end
> of gimplify_init_constructor if it was an empty type.
>
> Note this updates gcc.dg/pr87052.c where we had:
> const char d[0] = { };
> And was expecting a store to d but after this, there is no store
> as the decl's type is zero in size.
>
> OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

OK.

> gcc/ChangeLog:
>
> PR middle-end/103645
> * gimplify.c (gimplify_init_constructor): Don't build/add
> gimple assignment of an empty type.
>
> testsuite/ChangeLog:
> * gcc.dg/pr87052.c: Update d var to expect nothing.
> ---
>  gcc/gimplify.cc| 7 +--
>  gcc/testsuite/gcc.dg/pr87052.c | 6 +++---
>  2 files changed, 8 insertions(+), 5 deletions(-)
>
> diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
> index 2ac7ca0855e..f0fbdb48012 100644
> --- a/gcc/gimplify.cc
> +++ b/gcc/gimplify.cc
> @@ -5488,8 +5488,11 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
> *pre_p, gimple_seq *post_p,
>if (ret == GS_ERROR)
>  return GS_ERROR;
>/* If we have gimplified both sides of the initializer but have
> - not emitted an assignment, do so now.  */
> -  if (*expr_p)
> + not emitted an assignment, do so now.   */
> +  if (*expr_p
> +  /* If the type is an empty type, we don't need to emit the
> +assignment. */
> +  && !is_empty_type (TREE_TYPE (TREE_OPERAND (*expr_p, 0
>  {
>tree lhs = TREE_OPERAND (*expr_p, 0);
>tree rhs = TREE_OPERAND (*expr_p, 1);
> diff --git a/gcc/testsuite/gcc.dg/pr87052.c b/gcc/testsuite/gcc.dg/pr87052.c
> index 18e092c4674..796fe6440c1 100644
> --- a/gcc/testsuite/gcc.dg/pr87052.c
> +++ b/gcc/testsuite/gcc.dg/pr87052.c
> @@ -23,8 +23,7 @@ void test (void)
>
>const char d[0] = { };
>
> -  /* Expect the following:
> - d = ""; */
> +  /* Expect nothing.  */
>
>const char e[0] = "";
>
> @@ -36,6 +35,7 @@ void test (void)
>  /* { dg-final { scan-tree-dump-times "a = \"x00ab\";" 1 "gimple" } }
> { dg-final { scan-tree-dump-times "b = \"ax00bc\";"  1 "gimple" } }
> { dg-final { scan-tree-dump-times "c = \"\";"  1 "gimple" } }
> -   { dg-final { scan-tree-dump-times "d = { *};"  1 "gimple" } }
> +   { dg-final { scan-tree-dump-times "d = "  1 "gimple" } }
> +   { dg-final { scan-tree-dump-times "d = {CLOBBER\\(eol\\)}"  1 "gimple" } }
> { dg-final { scan-tree-dump-times "e = "  1 "gimple" } }
> { dg-final { scan-tree-dump-times "e = {CLOBBER\\(eol\\)}"  1 "gimple" } 
> }  */
> --
> 2.27.0
>


Re: [x86 PATCH] Move V1TI shift/rotate lowering from expand to pre-reload split.

2022-08-08 Thread Uros Bizjak via Gcc-patches
On Fri, Aug 5, 2022 at 8:36 PM Roger Sayle  wrote:
>
>
> This patch moves the lowering of 128-bit V1TImode shifts and rotations by
> constant bit counts to sequences of SSE operations from the RTL expansion
> pass to the pre-reload split pass.  Postponing this splitting of shifts
> and rotates enables (will enable) the TImode equivalents of these
> operations/
> instructions to be considered as candidates by the (TImode) STV pass.
> Technically, this patch changes the existing expanders to continue to
> lower shifts by variable amounts, but constant operands become RTL
> instructions, specified by define_insn_and_split that are triggered by
> x86_pre_reload_split.  The one minor complication is that logical shifts
> by multiples of eight, don't get split, but are handled by existing insn
> patterns, such as sse2_ashlv1ti3 and sse2_lshrv1ti3.  There should be no
> changes in generated code with this patch, which just adjusts the pass
> in which transformations get applied.
>
> This patch has been tested on x86_64-pc-linux-gnu with make bootstrap
> and make -k check, both with and without --target_board=unix{-m32}, with
> no new failures.  Ok for mainline?
>
>
>
> 2022-08-05  Roger Sayle  
>
> gcc/ChangeLog
> * config/i386/sse.md (ashlv1ti3): Delay lowering of logical left
> shifts by constant bit counts.
> (*ashlvti3_internal): New define_insn_and_split that lowers
> logical left shifts by constant bit counts, that aren't multiples
> of 8, before reload.
> (lshrv1ti3): Delay lowering of logical right shifts by constant.
> (*lshrv1ti3_internal): New define_insn_and_split that lowers
> logical right shifts by constant bit counts, that aren't multiples
> of 8, before reload.
> (ashrv1ti3):: Delay lowering of arithmetic right shifts by
> constant bit counts.
> (*ashrv1ti3_internal): New define_insn_and_split that lowers
> arithmetic right shifts by constant bit counts before reload.
> (rotlv1ti3): Delay lowering of rotate left by constant.
> (*rotlv1ti3_internal): New define_insn_and_split that lowers
> rotate left by constant bits counts before reload.
> (rotrv1ti3): Delay lowering of rotate right by constant.
> (*rotrv1ti3_internal): New define_insn_and_split that lowers
> rotate right by constant bits counts before reload.

+(define_insn_and_split "*ashlv1ti3_internal"
+  [(set (match_operand:V1TI 0 "register_operand")
  (ashift:V1TI
  (match_operand:V1TI 1 "register_operand")
- (match_operand:QI 2 "general_operand")))]
-  "TARGET_SSE2 && TARGET_64BIT"
+ (match_operand:SI 2 "const_0_to_255_operand")))]
+  "TARGET_SSE2
+   && TARGET_64BIT
+   && (INTVAL (operands[2]) & 7) != 0

Please introduce const_0_to_255_not_mul_8_operand predicate.
Alternatively, and preferably, you can use pattern shadowing, where
the preceding, more constrained pattern will match before the
following, more broad pattern will.

Uros.


Re: [x86 PATCH take #2] Add peephole2 to reduce double word register shuffling

2022-08-08 Thread Uros Bizjak via Gcc-patches
On Sun, Aug 7, 2022 at 7:04 PM Roger Sayle  wrote:
>
>
> This is a resubmission of my patch from June to fix some forms of
> inefficient
> register allocation using an additional peephole2 in i386.md.
> https://gcc.gnu.org/pipermail/gcc-patches/2022-June/596064.html
>
> Since the original, a number of supporting patches/improvements have
> been reviewed and approved, making this peephole even more effective.
> Hence for the simple function:
> __int128 foo(__int128 x, __int128 y) { return x+y; }
>
> mainline GCC on x86_64 with -O2 currently generates:
> movq%rsi, %rax
> movq%rdi, %r8
> movq%rax, %rdi
> movq%rdx, %rax
> movq%rcx, %rdx
> addq%r8, %rax
> adcq%rdi, %rdx
> ret
>
> with this patch we now generate (a three insn improvement):
> movq%rdx, %rax
> movq%rcx, %rdx
> addq%rdi, %rax
> adcq%rsi, %rdx
> ret
>
> Back in June the review of the original patch stalled, as peephole2
> isn't the ideal place to fix this (with which I fully agree), and this patch
> is really just a workaround for a deeper deficiency in reload/lra.
> To address this I've now filed a new enhancement PR in Bugzilla,
> PR rtl-optimization/106518, that describes that underlying issue,
> which might make an interesting (GSoC) project for anyone brave
> (fool hardy) enough to tweak GCC's register allocation.
>
> By comparison, this single peephole can't adversely affect other targets,
> and should the happy day come that it's no longer required, at worst
> would just become a harmless legacy transform that no longer triggers.
>
> I'm also investigating Uros' suggestion that it may be possible for RTL
> expansion to do a better job expanding the function prologue, but
> ultimately the hard register placement constraints are fixed by the
> target ABI, and poor allocation/assignment of hard registers is the
> responsibility/fault of the register allocation passes.
> But it may still be possible to reduce register pressure, but avoiding the
> use of SUBREGs (which keep the source and destination double words
> live during shuffling) along the lines of Richard's CONCAT suggestion.
>
> This patch has been retested again mainline using make bootstrap and
> make -k check, both with and without --target_board=unix{-m32},
> with no new failures.  Ok mainline?
>
>
> 2022-08-07  Roger Sayle  
>
> gcc/ChangeLog
> PR target/43644
> PR rtl-optimization/97756
> PR rtl-optimization/98438
> * config/i386/i386.md (define_peephole2): Recognize double word
> swap sequences, and replace them with more efficient idioms,
> including using xchg when optimizing for size.
>
> gcc/testsuite/ChangeLog
> PR target/43644
> * gcc.target/i386/pr43644.c: New test case.

+;; Replace a double word swap that requires 4 mov insns with a
+;; 3 mov insn implementation (or an xchg when optimizing for size).
+(define_peephole2
+  [(set (match_operand:DWIH 0 "general_reg_operand")
+ (match_operand:DWIH 1 "general_reg_operand"))
+   (set (match_operand:DWIH 2 "general_reg_operand")
+ (match_operand:DWIH 3 "general_reg_operand"))
+   (clobber (match_operand: 4 "general_reg_operand"))
+   (set (match_dup 3) (match_dup 0))
+   (set (match_dup 1) (match_dup 2))]
+  "REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[1]) != REGNO (operands[2])
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && REGNO (operands[3]) == REGNO (operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && peep2_reg_dead_p (5, operands[2])"
+  [(parallel [(set (match_dup 1) (match_dup 3))
+  (set (match_dup 3) (match_dup 1))])]

I'm not sure it is correct to remove the clobber here. Some RTL expert
should comment on this change.

+  if (!optimize_insn_for_size_p ())
+{
+  rtx tmp = REGNO (operands[0]) > REGNO (operands[2]) ? operands[0]
+  : operands[2];

Hm, this is a strange relation, and it is not obvious why it is done
in that way. Usually, REGNO (op1) != REGNO (op2) does the trick. At
least a comment should be added here.

Uros.

+  emit_move_insn (tmp, operands[1]);
+  emit_move_insn (operands[1], operands[3]);
+  emit_move_insn (operands[3], tmp);
+  DONE;
+}





>
> Thanks in advance,
> Roger
> --
>


[PATCH v2, rs6000] Add multiply-add expand pattern [PR103109]

2022-08-08 Thread HAO CHEN GUI via Gcc-patches
Hi,
  This patch adds an expand and several insns for multiply-add with three
64bit operands.

  Compared with last version, the main changes are:
1 The "maddld" pattern is reused for the low-part generation.
2 A runnable testcase replaces the original compiling case.
3 Fixes indention problems.

  Bootstrapped and tested on powerpc64-linux BE and LE with no regressions.
Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2022-08-08  Haochen Gui  

gcc/
PR target/103109
* config/rs6000/rs6000.md (maddditi4): New pattern for multiply-add.
(madddi4_highpart): New.
(madddi4_highpart_le): New.

gcc/testsuite/
PR target/103109
* gcc.target/powerpc/pr103109.c: New.



patch.diff
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index c55ee7e171a..4c58023490a 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -3217,7 +3217,7 @@ (define_expand "mul3"
   DONE;
 })

-(define_insn "*maddld4"
+(define_insn "maddld4"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(plus:GPR (mult:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
(match_operand:GPR 2 "gpc_reg_operand" "r"))
@@ -3226,6 +3226,52 @@ (define_insn "*maddld4"
   "maddld %0,%1,%2,%3"
   [(set_attr "type" "mul")])

+(define_expand "maddditi4"
+  [(set (match_operand:TI 0 "gpc_reg_operand")
+   (plus:TI
+ (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand"))
+  (any_extend:TI (match_operand:DI 2 "gpc_reg_operand")))
+ (any_extend:TI (match_operand:DI 3 "gpc_reg_operand"]
+  "TARGET_MADDLD && TARGET_POWERPC64"
+{
+  rtx op0_lo = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 8 : 0);
+  rtx op0_hi = gen_rtx_SUBREG (DImode, operands[0], BYTES_BIG_ENDIAN ? 0 : 8);
+
+  emit_insn (gen_maddlddi4 (op0_lo, operands[1], operands[2], operands[3]));
+
+  if (BYTES_BIG_ENDIAN)
+emit_insn (gen_madddi4_highpart (op0_hi, operands[1], operands[2],
+   operands[3]));
+  else
+emit_insn (gen_madddi4_highpart_le (op0_hi, operands[1], operands[2],
+  operands[3]));
+  DONE;
+})
+
+(define_insn "madddi4_highpart"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+0))]
+  "TARGET_MADDLD && BYTES_BIG_ENDIAN && TARGET_POWERPC64"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
+(define_insn "madddi4_highpart_le"
+  [(set (match_operand:DI 0 "gpc_reg_operand" "=r")
+   (subreg:DI
+ (plus:TI
+   (mult:TI (any_extend:TI (match_operand:DI 1 "gpc_reg_operand" "r"))
+(any_extend:TI (match_operand:DI 2 "gpc_reg_operand" "r")))
+   (any_extend:TI (match_operand:DI 3 "gpc_reg_operand" "r")))
+8))]
+  "TARGET_MADDLD && !BYTES_BIG_ENDIAN && TARGET_POWERPC64"
+  "maddhd %0,%1,%2,%3"
+  [(set_attr "type" "mul")])
+
 (define_insn "udiv3"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
 (udiv:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr103109.c 
b/gcc/testsuite/gcc.target/powerpc/pr103109.c
new file mode 100644
index 000..969b9751b21
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/pr103109.c
@@ -0,0 +1,110 @@
+/* { dg-do run { target { has_arch_ppc64 } } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power9 -save-temps" } */
+/* { dg-require-effective-target int128 } */
+/* { dg-require-effective-target p9modulo_hw } */
+/* { dg-final { scan-assembler-times {\mmaddld\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mmaddhd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mmaddhdu\M} 1 } } */
+
+union U {
+  __int128 i128;
+  struct {
+long l1;
+long l2;
+  } s;
+};
+
+__int128
+create_i128 (long most_sig, long least_sig)
+{
+  union U u;
+
+#if __LITTLE_ENDIAN__
+  u.s.l1 = least_sig;
+  u.s.l2 = most_sig;
+#else
+  u.s.l1 = most_sig;
+  u.s.l2 = least_sig;
+#endif
+  return u.i128;
+}
+
+
+#define DEBUG 0
+
+#if DEBUG
+#include 
+#include 
+
+void print_i128(__int128 val, int unsignedp)
+{
+  if (unsignedp)
+printf(" %llu ", (unsigned long long)(val >> 64));
+  else
+printf(" %lld ", (signed long long)(val >> 64));
+
+  printf("%llu (0x%llx %llx)",
+ (unsigned long long)(val & 0x),
+ (unsigned long long)(val >> 64),
+ (unsigned long long)(val & 0x));
+}
+#endif
+
+void abort (void);
+
+__attribute__((noinline))
+__int128 multiply_add (long a, long b, long c)
+{
+  return (__int128) a * b + c;
+}
+
+__attribute__((noinline))
+unsigned __int128 multiply_addu (unsigned long a, unsigned long b,
+